from IPython.display import HTML
HTML('<iframe width="936" height="527" src="https://www.youtube.com/embed/s-XV4GCSjtc?start=10437" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>')
from parselmouth.praat import call
import os
import pympi
from tabulate import tabulate
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
 
    
    
def get_multispeaker_transitions(utterances_1, utterances_2, utterances_3):
    # set max distance between them to be 2s, then find the closest occurence in
    # second dataframe. Max distance is based on Heldner & Edlund 2010
    # output times are in ms
    
    speaker_count = 3
    if not utterances_1:
        speaker_count -=1
    if not utterances_2:
        speaker_count -=1
    if not utterances_3:
        speaker_count -=1
    
    if speaker_count < 2: 
        trans_dur = np.nan
        gap_dur = np.nan
        utterance_time_trans = np.nan
        utterance_time_gap = np.nan
    else:
        trans_dur1, gap_dur1, utterance_time_trans1, utterance_time_gap1, overlap_dur1, utterance_time_overlap1,timestamp1 = get_gaps_overlaps(
          utterances_1, utterances_2, utterances_3)
        trans_dur2, gap_dur2, utterance_time_trans2, utterance_time_gap2, overlap_dur2, utterance_time_overlap2,timestamp2 = get_gaps_overlaps(
          utterances_2, utterances_1, utterances_3) 
        trans_dur3, gap_dur3, utterance_time_trans3, utterance_time_gap3, overlap_dur3, utterance_time_overlap3,timestamp3 = get_gaps_overlaps(
          utterances_3, utterances_2, utterances_1) 
        
    
        print("Turn Transition Statistics for Participant 1")
        get_turn_trans_stats(trans_dur1,gap_dur1, overlap_dur1)
        print("Turn Transition Statistics for Participant 2")
        get_turn_trans_stats(trans_dur2,gap_dur2, overlap_dur2)
        print("Turn Transition Statistics for Participant 3")
        get_turn_trans_stats(trans_dur3,gap_dur3, overlap_dur3)
        
        # these will be used in the dataframe to indicate which speaker we're talking about
        p1 = ["1"]*len(trans_dur1)
        p2 = ["2"]*len(trans_dur2)
        p3 = ["3"]*len(trans_dur3)
        
        participant = p1 + p2 + p3
        timestamp = timestamp1 + timestamp2 + timestamp3
        # now we need to stack the utterance data
        trans_dur = trans_dur1 + trans_dur2 + trans_dur3
        utterance_time_trans = utterance_time_trans1 + utterance_time_trans2 + utterance_time_trans3
        
        df = pd.DataFrame(np.column_stack([participant,timestamp,trans_dur, utterance_time_trans]),
                               columns=['participant', 'timestamp','transition_duration','utterance_duration']
                               )
        
        return df
        
        
def get_gaps_overlaps(utterances_1, utterances_2, utterances_3):
    # this gets the utterance and turn-transition speaker from the perspective of one speaker
    # It doesn't matter what order the second two are in, as we just care about the averages in 
    # the end anyway
    trans_dur = []
    utterance_time_trans = []
    timestamp=[]
    for u_idx,u in enumerate(utterances_1):
        # here we want to get the difference between this utterance's offset (u[1]) and all onsets from the other two speakers
        diffs2 = [abs(u[1] - u2[0]) for u2 in utterances_2]
        diffs3 = [abs(u[1] - u3[0]) for u3 in utterances_3]
        min_dif2 = np.argmin(diffs2)
        min_dif3 = np.argmin(diffs3)        
        # also get within-speaker differences
        # we get this to make sure our inter-speaker differences are less than this
        # otherwise, we may not be getting the turn transition from this utterance
        if u_idx < len(utterances_1)-1:
            min_pause = utterances_1[u_idx+1][0] - u[1]
        else:
            # if this is not the last utterance, we ignore this
            min_pause = min([diffs2[min_dif2],diffs3[min_dif3]]) +1
        # is there an utterance from p2 or p3 starting within 2s of this utterance from p1?
        if diffs2[min_dif2] < 2000 and diffs2[min_dif2] < min_pause:
            trans_dur.append(int(utterances_2[min_dif2][0] - u[1]))
            utterance_time_trans.append(int(utterances_1[u_idx][1] - utterances_1[u_idx][0]))
            timestamp.append(int(utterances_2[min_dif2][0]))
        if diffs3[min_dif3] < 2000 and diffs3[min_dif3] < min_pause:
            trans_dur.append(int(utterances_3[min_dif3][0] - u[1]))
            utterance_time_trans.append(int(utterances_1[u_idx][1] - utterances_1[u_idx][0]))
            timestamp.append(int(utterances_3[min_dif3][0]))
            
    # finally, we want to separate these values into gaps and overlaps
    gap_dur = []
    overlap_dur = []
    utterance_time_gap = []
    utterance_time_overlap = []
    for idx, val in enumerate(trans_dur):
        if val > 0:
            gap_dur.append(val)
            utterance_time_gap.append(int(utterance_time_trans[idx]))
        elif val <= 0:
            overlap_dur.append(val)
            utterance_time_overlap.append(int(utterance_time_trans[idx]))
    return trans_dur, gap_dur, utterance_time_trans, utterance_time_gap, overlap_dur, utterance_time_overlap,timestamp
        
This function just gets us some summary stats for the turn taking behavior
def get_turn_trans_stats(transitions, gaps, overlaps):
    if transitions and not np.all(np.isnan(transitions)):
        Gap_freq = len(gaps)/len(transitions)
        Overlap_freq = len(overlaps)/len(transitions)
    else:
        Gap_freq = 0
        Overlap_freq = 0
    turn_data = [['turn transition mean', np.mean(transitions)],
                 ['turn transition SD', np.std(transitions)],
                 ['turn transition median',np.median(transitions)],
                 ['Gap mean', np.mean(gaps)],
                 ['Gap SD',np.std(gaps)],
                 ['Gap median',  np.median(gaps)],
                 ['Gap freq. (%)',Gap_freq],
                 ['Overlap mean', np.mean(overlaps)],
                 ['Overlap SD',np.std(overlaps)],
                 ['Overlap median',  np.median(overlaps)],
                 ['Overlap freq. (%)',Overlap_freq],]
    
    print(tabulate(turn_data, headers=['Stat','Value']))
annot_dir = "./annotations/" 
annotation_files = os.listdir(annot_dir)
eafob = pympi.Elan.Eaf(annot_dir + annotation_files[0])
utterances_1 = [annotation for annotation in eafob.get_annotation_data_for_tier("Participant1")]
utterances_2 = [annotation for annotation in eafob.get_annotation_data_for_tier("Participant2")]
utterances_3 = [annotation for annotation in eafob.get_annotation_data_for_tier("Participant3")]
df = get_multispeaker_transitions(utterances_1, utterances_2, utterances_3)
df.transition_duration = pd.to_numeric(df.transition_duration, errors='coerce')
# plotting
tt_1 = df[df["participant"]=="1"]["transition_duration"]
tt_2 = df[df["participant"]=="2"]["transition_duration"]
tt_3 = df[df["participant"]=="3"]["transition_duration"]
plt.hist([tt_1,tt_2,tt_3], 
         alpha=0.5, bins=25, 
         label=["speaker 1","speaker 2","speaker 3"])
plt.legend(loc='upper right')
plt.show()
The above output shows the turn transition information for each person in our interaction. Remember that each of the values are based on the transitions from this speaker's uterance. So for example, speaker 1 has a gap frequency of 56%, and an overlap frequency of 66%. This means that when speaker 1 finishes an utterance, s/he is "interrupted" 44% of the time (on average by 558ms), but typically there is a gap of 439ms before the next speaker begins their untterance. 
This information can already be useful for looking into how individual speakers behave with one another. One way to proceed with this data would be to simply take these summary values, per individual, per group, and statistically compare groups with one another. Do certain groups have smoother turn transition timings? We can also correlate the turn timings with other information, such as KAI scores.
Entropy of Turn Timing
Another way to capture the dynamics of turn-taking behavior is to look at Approximate Entropy. In order to have an idea of what this provides us, let's consider the statistics output above. In a perfectly 'stable' back-and-forth interaction, each turn transition could be approximately 36ms, across the entire interaction. We would then get a mean turn transition time of 4ms, with a SD of 667ms (as seen above for speaker 1 -- reflecting the tendency around 0). However, it may be that these values change over the course of the interaction, and knowing the mean won't necessarily let you accurately predict the next transition time. The latter case is an example of higher entropy.
import antropy as ant
# first we need to sort this into a proper timeseries, disregarding individual IDs
df.timestamp = pd.to_numeric(df.timestamp, errors='coerce')
df.transition_duration = pd.to_numeric(df.transition_duration, errors='coerce')
sort_df = df.sort_values(by=["timestamp"], ascending=True)
sort_df.head()
# Calculate Approximate entropy
print(ant.sample_entropy(sort_df["transition_duration"]))
Aside from the general entropy across the whole session, which we could compare across groups or correlate with personal or group characteristics, we can also look at the change in entropy over time. We'll split the session into 4 minute windows to assess entropy across the session.
For the analysis, we'll use the categorical data corresponding to the "transitioning speaker" and calculate Permutation Entropy. Permutation entropy captures the ordering of values in a time-series. In this case, we want the complexity with which the group switches from one speaker to the next. Is it always speaker 1 followed by speaker 2, and vice versa? This would be low entropy. Or do we see all possible orders; speaker 3, then 1, followed by 3 again, then 2.. The more unpredictable the switching, the higher the entropy.
tt_entropy = []
window_length = (1000*60)*4 #written out just to make the window length easily understandable
window_start = 0
window_end = window_length 
while window_end < sort_df["timestamp"][len(sort_df)-1]   :
    # get data for this time window
    window = sort_df[(sort_df["timestamp"] >window_start)&(sort_df["timestamp"] <window_end)]
    # calculate permutation entropy
    tt_entropy.append(ant.perm_entropy(window["participant"], normalize=True))
    # shift the window forward
    window_start = window_end
    window_end += window_length
    
# now plot the entropy over time
plt.scatter(["1-4","5-8","9-11","12-15","16-19"],tt_entropy)
plt.title("Entropy per 4 Minute Time-Window")
plt.show()
These values can again be correlated with group level characteristics, or to momentary annotations regarding breakthroughs, changes in topic, etc.