location code: https://github.com/WimPouw/EnvisionBootcamp2021/tree/main/Python/MediaBodyTracking
citation: Trujillo, J.P. & Pouw, W.T.J.L.(2021-11-18). MultiParty Tracking: Linking Frames and Pairing Hands [day you visited the site]. Retrieved from: https://github.com/WimPouw/EnvisionBootcamp2021/tree/main/Python/MediaBodyTracking
After we have the initial data, we want to add some additional metadata that we get by going over all of these loose frames again. 
In this first block of the script, we go through each file, and try to track hands from frame to frame. While we're going through the files, a block within this set of loops will determine the orientation of each hand with respect to the sides of the screen. In other words, we want to try to find the origin of each hand. Does it seem to be 'projecting' from the left side, right side, top, or bottom of the image? This is used in the block directly below to try to find pairs of hands.
from IPython.display import HTML
HTML('<iframe width="935" height="584" src="https://www.youtube.com/embed/mw8RymohMp0?start=7556" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>')
import pandas as pd
import numpy as np
from os import listdir
from os.path import isfile, join
import os
import cv2
#list all videos in mediafolder
mypath = "./MediaToAnalyze/"
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))] # get all files that are in mediatoanalyze
#time series output folder
foldtime = "./Timeseries_Output/"
def get_closest_hand(hand,prev_frame, detected_this_frame):
    # this function looks at the hand coordinates given for the current frame, and tries to find the closest match in the previous frame
    # The idea being that, from one frame to the next, the same hand should appear in only a slightly different position
    # Note that this may not be the case when tracking is lost, but the hand moves some distance away.
    tolerance = 3 # somewhat arbitrary number, but ensures that we don't take hands that are extremely far away
    distance_list = []
    
    # first we drop any hands that have already been connected to one this frame
    prev_frame = prev_frame[~prev_frame.hand_ID.isin(detected_this_frame)]
    if not prev_frame.empty:
        # then we need to reset the index (due to the prev_frame being composed of rows from different
        # points in the dataframe)
        prev_frame = prev_frame.reset_index()
        # one hand at a time..
        for _,prev_hand in prev_frame.iterrows():
            #.. we compare coordinates between current hand and previous hands
            distance = 0
            for name, vals in prev_hand.iteritems():
                if name[0] == 'X' or name[0] == 'Y' or name[0] == 'Z':
                    distance += np.abs(prev_hand[name] - hand[name])
            # take the sum of all coordinate differences
            distance_list.append(np.sum(distance))
        # find the closest match
        closest = np.min(distance_list)
        closest_ID = prev_frame["hand_ID"][np.where(distance_list == np.min(distance_list))[0][0]]
        # make sure it is within some threshold
        if closest < tolerance:
            return closest_ID
        else:
            return np.nan
    else:
        return np.nan
  
    
def getslope(a,b):
    # simple function to calculate the slope based on two points
    slope = (b[1]-a[1]) / (b[0]-a[0])
    return slope
def get_intercept(slope, x,y):
    # gets the y-intercept
    intercept = y-slope*x
    return intercept
def get_interceptX(slope, p1,p2):
    # get the x-intercept
    a = p1[1] - p2[1]
    b = p1[0] - p2[0]
    
    m = a/b
    c = p1[1]-m*p1[0]
    # solve for y
    y = 0
    x_intercept = (y-c)/m
    return x_intercept
def output_progress(df_idx, len_df, checkpoints):
    # This funcion is just to give us some progress update, since the script can take a minute to run. This way we know it hasn't crashed.   
    if checkpoints:
        if df_idx >= len_df*checkpoints[0]:
            print(str(int(checkpoints[0]*100)) + "% complete")
            checkpoints.remove(checkpoints[0])
        
def flip(original_val):
    trans_val = 1 - original_val
    return trans_val
def find_duplicates(hand,hand_idx, frame):
    # this function looks for hands that match too closely with the current one
    # in this case, it's probably a double-tracking issue, and we need to drop the duplicate from our dataframe
    
    duplicates = []
    for frame_idx, other_hand in frame.iterrows():
        if hand_idx != frame_idx:
            if abs(hand["X_WRIST"] - frame.loc[frame_idx,"X_WRIST"])  < 0.05 and abs(hand["Y_WRIST"] - frame.loc[frame_idx,"Y_WRIST"]) < 0.05:
                duplicates.append(frame_idx)
    return duplicates
        
This first block is going to go through our df frame by frame and try to calculate which way the hand is facing, and thus where its origin may be. It will also try to match the hands from one frame to the next, that way we can look at a time-series, rather than single tracked points.
datafile = os.listdir(foldtime)[0]
print("working on linking " + datafile + "...")
df = pd.read_csv(foldtime + datafile)
# create empty column for the hand IDs
df["hand_ID"] = np.nan
df["origin"] = np.nan
df["x_intercept"] = np.nan
df["y_intercept"] = np.nan
df_idx = 0
arbID = 0 # this will be a first pass ID 
# a little progress counter so we know how far along it is
df_len = len(df)
# make a list of checkpoint values for 10% increments
checkpoints = [round(x * 0.1,1) for x in range(1, 11)]
# we'll use this to move through the dataframe
time_points = list(set(df["time"]))
time_points.sort()
time_idx = 0
# starting values
current_time = time_points[time_idx]
frame = df.loc[df["time"] == current_time]
while current_time <= max(df["time"]):
    detected_this_frame = [] # we set this up to not let two hands be assigned to the same previous hand
    duplicates_this_frame =[]
    duplicate =[]
    for frame_idx,hand in frame.iterrows():
        if not [frame_idx] in duplicates_this_frame:
            # check for duplicates
            duplicate= find_duplicates(hand, frame_idx, frame)
            duplicates_this_frame.append(duplicate)
            if duplicate:
                frame = frame.drop(duplicate)
                df = df.drop(duplicate)
        
            # if this is the first frame, each Hand is new, and receives a new ID
            if df_idx == 0:
                df.loc[frame_idx,"hand_ID"] = arbID
                arbID +=1
            else:
            # after the first frame, we need to try to match each hand with a previous ID
                closest_ID = get_closest_hand(hand,prev_hands, detected_this_frame)
                detected_this_frame.append(closest_ID)
                # if there are no good matches, we assume it's a new (previously untracked) hand
                # and we assign it a new ID
                if np.isnan(closest_ID):
                    closest_ID = arbID
                    arbID +=1
                df.loc[frame_idx,"hand_ID"] = closest_ID
                ##################################
                #### This block calculates hand orientation ####
                # so first get middle point of base
                midbase = [hand["X_PINKY_MCP"] + ((hand["X_INDEX_MCP"] - hand["X_PINKY_MCP"])/2),
                           flip(hand["Y_PINKY_MCP"]) + ((flip(hand["Y_INDEX_MCP"]) - flip(hand["Y_PINKY_MCP"]))/2)]
                wrist_coords = [hand["X_WRIST"],flip(hand["Y_WRIST"])]
                if midbase[0] > hand["X_WRIST"]:
                    hand_direction = "R"
                else:
                    hand_direction = "L"
                    # if left facing, we need to shift the coordinates to the other side of the y-axis
                    wrist_coords[0] = hand["X_WRIST"]*-1
                    midbase[0] = midbase[0]*-1
                # then the slope from mid base to tip
                slope = getslope([hand["X_WRIST"],flip(hand["Y_WRIST"])],midbase)
                # now get the initial X and Y intercept
                # this is used to figured out where in our image the hand seems 
                # to be projecting from (Top, Bottom, Left or Right)
                intercept_X = get_interceptX(slope,
                                               wrist_coords,
                                               midbase) 
                intercept_Y = get_intercept(slope,wrist_coords[0],wrist_coords[1]) 
                if hand_direction == "R" and intercept_Y < 0 and 0 < intercept_X < 1:
                    origin = "Bot"
                elif hand_direction == "R" and 0 < intercept_Y < 1:
                    origin = "Left"
                elif hand_direction == "R" and intercept_Y > 1 and intercept_X > 0:
                    origin = "Top"
                    intercept_X = get_interceptX(slope,
                                               [wrist_coords[0],wrist_coords[1]*-1],
                                               midbase) 
                elif hand_direction =="L" and intercept_Y < 0 and intercept_X > -1:
                    origin = "Bot"
                    intercept_X = intercept_X*-1
                elif hand_direction =="L" and 0 < intercept_Y < 1:
                    origin = "Right"
                elif hand_direction =="L" and intercept_Y > 1 and intercept_X < 0:
                    origin = "Top"
                    intercept_X = get_interceptX(slope,
                                               [wrist_coords[0],wrist_coords[1]*-1],
                                               midbase) 
                # update this info
                df.loc[frame_idx,"origin"] = origin
                df.loc[frame_idx,"x_intercept"] = intercept_X
                df.loc[frame_idx,"y_intercept"] = intercept_Y
            ##################################
    # this needs to be updated to reflect the new hand_IDs
    frame = df.loc[df["time"] == current_time]
        
    # if this is the first frame, we store these values for later
    if df_idx == 0:        
        prev_hands = frame.copy()
    # after getting this first set of hand-coordinates, we need to update it on each frame
    else:
        # for each hand in the current frame, check..
        for _,hand in frame.iterrows():
            # .. if it's been logged in prev_hands already, update it
            if hand["hand_ID"] in set(prev_hands["hand_ID"]):
                prev_hands.loc[prev_hands["hand_ID"] == hand["hand_ID"]] = [hand]
            else:
                prev_hands = prev_hands.append(hand)
    #move the index forward to the next time point
    if len(frame) > 0:
        df_idx += len(frame)
    else: 
        df_idx += 1
    time_idx +=1
        
        
    if current_time == max(df["time"]):
        break
        
    current_time = time_points[time_idx]
    frame = df.loc[df["time"] == current_time]
    
    # check progress
    output_progress(df_idx,df_len, checkpoints)
df.to_csv(foldtime + datafile.split(".")[0] + "_linked.csv")
datafile = os.listdir(foldtime)[0]
print("working on linking " + datafile + "...")
df = pd.read_csv(foldtime + datafile)
# create empty column for the hand IDs
df["hand_ID"] = np.nan
df["origin"] = np.nan
df["x_intercept"] = np.nan
df["y_intercept"] = np.nan
df_idx = 0
arbID = 0 # this will be a first pass ID 
# a little progress counter so we know how far along it is
df_len = len(df)
# make a list of checkpoint values for 10% increments
checkpoints = [round(x * 0.1,1) for x in range(1, 11)]
current_time = 0
time_points = list(set(df["time"]))
time_points.sort()
time_idx = 0
current_time = time_points[time_idx]
frame = df.loc[df["time"] == current_time]
while current_time <= max(df["time"]):
    detected_this_frame = [] # we set this up to not let two hands be assigned to the same previous hand
    #######
    duplicates_this_frame =[]
    for frame_idx,hand in frame.iterrows():
        if not [frame_idx] in duplicates_this_frame:
            
            # check for duplicates
            duplicate= find_duplicates(hand, frame_idx, frame)
            duplicates_this_frame.append(duplicate)
            if duplicate:
                frame = frame.drop(duplicate)
                df = df.drop(duplicate)
    
########
            # if this is the first frame, each Hand is new, and receives a new ID
            if df_idx == 0:
                df.loc[frame_idx,"hand_ID"] = arbID
                arbID +=1
            else:
    
            # after the first frame, we need to try to match each hand with a previous ID
                closest_ID = get_closest_hand(hand,prev_hands, detected_this_frame)
    
                detected_this_frame.append(closest_ID)
    
                # if there are no good matches, we assume it's a new (previously untracked) hand
                # and we assign it a new ID
                if np.isnan(closest_ID):
                    closest_ID = arbID
                    arbID +=1
    
                df.loc[frame_idx,"hand_ID"] = closest_ID
    
                ##################################
                #### This block calculates hand orientation ####
                # so first get middle point of base
                midbase = [hand["X_PINKY_MCP"] + ((hand["X_INDEX_MCP"] - hand["X_PINKY_MCP"])/2),
                           flip(hand["Y_PINKY_MCP"]) + ((flip(hand["Y_INDEX_MCP"]) - flip(hand["Y_PINKY_MCP"]))/2)]
                wrist_coords = [hand["X_WRIST"],flip(hand["Y_WRIST"])]
                if midbase[0] > hand["X_WRIST"]:
                    hand_direction = "R"
                else:
                    hand_direction = "L"
                    # if left facing, we need to shift the coordinates to the other side of the y-axis
                    wrist_coords[0] = hand["X_WRIST"]*-1
                    midbase[0] = midbase[0]*-1
    
                # then the slope from mid base to tip
                slope = getslope([hand["X_WRIST"],flip(hand["Y_WRIST"])],midbase)
    
                # now get the initial X and Y intercept
                # this is used to figured out where in our image the hand seems 
                # to be projecting from (Top, Bottom, Left or Right)
                intercept_X = get_interceptX(slope,
                                               wrist_coords,
                                               midbase) 
                intercept_Y = get_intercept(slope,wrist_coords[0],wrist_coords[1]) 
    
                if hand_direction == "R" and intercept_Y < 0 and 0 < intercept_X < 1:
                    origin = "Bot"
                elif hand_direction == "R" and 0 < intercept_Y < 1:
                    origin = "Left"
                elif hand_direction == "R" and intercept_Y > 1 and intercept_X > 0:
                    origin = "Top"
                    intercept_X = get_interceptX(slope,
                                               [wrist_coords[0],wrist_coords[1]*-1],
                                               midbase) 
                elif hand_direction =="L" and intercept_Y < 0 and intercept_X > -1:
                    origin = "Bot"
                    intercept_X = intercept_X*-1
                elif hand_direction =="L" and 0 < intercept_Y < 1:
                    origin = "Right"
                elif hand_direction =="L" and intercept_Y > 1 and intercept_X < 0:
                    origin = "Top"
                    intercept_X = get_interceptX(slope,
                                               [wrist_coords[0],wrist_coords[1]*-1],
                                               midbase) 
    
                # update this info
                df.loc[frame_idx,"origin"] = origin
                df.loc[frame_idx,"x_intercept"] = intercept_X
                df.loc[frame_idx,"y_intercept"] = intercept_Y
                
                
                ##################################
    # this needs to be updated to reflect the new hand_IDs
    frame = df.loc[df["time"] == current_time]
        
    # if this is the first frame, we store these values for later
    if df_idx == 0:        
        prev_hands = frame.copy()
    # after getting this first set of hand-coordinates, we need to update it on each frame
    else:
        # for each hand in the current frame, check..
        for _,hand in frame.iterrows():
            # .. if it's been logged in prev_hands already, update it
            if hand["hand_ID"] in set(prev_hands["hand_ID"]):
                prev_hands.loc[prev_hands["hand_ID"] == hand["hand_ID"]] = [hand]
            else:
                prev_hands = prev_hands.append(hand)
            
                
    #move the index forward to the next time point
    if len(frame) > 0:
        df_idx += len(frame)
    else: 
        df_idx += 1
    time_idx +=1
    # check progress
    if current_time == max(df["time"]):
        break
        
    current_time = time_points[time_idx]
    frame = df.loc[df["time"] == current_time]
    
    
    output_progress(df_idx,df_len, checkpoints)
    
df.head()
import matplotlib.pyplot as plt
import collections
# let's drop hands that were only tracked for a fraction of the time, as these are likely to be artefacts
quarter_time = int(len(set(df["time"]))*0.25)
tracked_frames = collections.Counter(df["hand_ID"])
for ID in set(df["hand_ID"]):
    if tracked_frames[ID] < quarter_time:
        df = df[df.hand_ID != ID]
df.to_csv(foldtime + datafile.split(".")[0] + "_linked.csv")
This next bit can be used to verify that there aren't too many hands being tracked, missing data, etc.
df = pd.read_csv(foldtime + datafile.split(".")[0] + "_linked.csv")
checkpoints = [round(x * 0.1,1) for x in range(1, 11)]
# first, we need to switch the hand labels
# this is because mediapipe assumes the image is mirrored
for idx,row in df.iterrows():
    if "Right" in row["hand"]:
        df.loc[idx,"hand"] = 'label: "Left"'
    else:
        df.loc[idx,"hand"] = 'label: "Right"'
df_idx = 0
while df_idx < len(df):
    current_time = df["time"][df_idx]
    frame = df.loc[df["time"] == current_time]
    # create a new dataframe where collect all of the right-hand distances from this left
    pairing_df = pd.DataFrame()
    pairing_idx = 0
    for frame_idx,hand_origin in frame.iterrows():            
        # Once we collect both intercepts (each will have one zero in it), we need to pair with another hand
        # so first we add an identifier indicating wall orientation (left, right, top, bot)
        # For left- or right-oriented, we assume that a right hand should pair with a left hand with a
        #    higher y-value, with the same LR orientation
        # For top or bot oriented, we assume L should pair with R with a lower (top) or higher (bot)
        #   x-value, with the same orientation
        ## based on intercepts
        if "Left" in hand_origin["hand"]:
            # once we find a left hand, we need to find any potential match
            for match_idx,hand in frame.iterrows():
                # for each potential match in the frame, we need to record the ID of the left hand (origin hand),
                # as well as the distance and ID of the potential pair
                # That way, at the end we can sort out which ones fit best together
                if "Right" in hand["hand"]:
                    # if oriented bottom and we slide from left to right
                    if hand_origin["origin"] == "Bot":
                        # first we check if there is a right hand, also with an origin on the bottom
                        # and that is to the right of the left hand we are looking at
                        if (hand["x_intercept"] - hand_origin["x_intercept"] > 0) and hand["origin"] == "Bot":
                            # we want this to be >0, because that indicates
                            # that the right hand is indeed to the right of the left
                            pairing_df.loc[pairing_idx,"origin_ID"] = hand_origin["hand_ID"]
                            pairing_df.loc[pairing_idx,"hand_id"] = hand["hand_ID"]
                            pairing_df.loc[pairing_idx,"pairing_dist"] = hand["x_intercept"] - hand_origin["x_intercept"]
                            pairing_df.loc[pairing_idx,"idx"] = frame_idx 
                            pairing_df.loc[pairing_idx,"match_idx"] = match_idx 
                            pairing_idx+=1
                        #if not, we check if this right hand is on the Right side
                        elif  hand["origin"] == "Right":
                            distance = hand["y_intercept"] + (1 - hand_origin["x_intercept"])
                            # distance < 1 is an arbitrary cutoff just to ensure it's not picking up
                            # a hand on the other side of the screen
                            if distance > 0 and distance < 1:
                                pairing_df.loc[pairing_idx,"origin_ID"] = hand_origin["hand_ID"]
                                pairing_df.loc[pairing_idx,"hand_id"] = hand["hand_ID"]
                                pairing_df.loc[pairing_idx,"pairing_dist"] = hand["y_intercept"] + (hand["x_intercept"] - hand_origin["x_intercept"])
                                pairing_df.loc[pairing_idx,"idx"] = frame_idx
                                pairing_df.loc[pairing_idx,"match_idx"] = match_idx 
                                pairing_idx+=1
                    # if oriented top, we go right to left
                    elif  hand_origin["origin"] == "Top":
                            #is this right hand also on the top?
                        if hand["origin"] == "Top" and (hand_origin["x_intercept"] - hand["x_intercept"] >0):
                                pairing_df.loc[pairing_idx,"origin_ID"] = hand_origin["hand_ID"]
                                pairing_df.loc[pairing_idx,"hand_id"] = hand["hand_ID"]
                                pairing_df.loc[pairing_idx,"pairing_dist"] = hand_origin["x_intercept"] - hand["x_intercept"]
                                pairing_df.loc[pairing_idx,"idx"] = frame_idx
                                pairing_df.loc[pairing_idx,"match_idx"] = match_idx 
                                pairing_idx+=1
                       # slide up the next side, if there is anything there
                        elif hand["origin"] == "Left":
                            distance =(1 -hand["y_intercept"]) +(1 - hand_origin["x_intercept"])
                            # distance < 1 is an arbitrary cutoff just to ensure it's not picking up
                            # a hand on the other side of the screen
                            if distance > 0 and distance < 1:
                                pairing_df.loc[pairing_idx,"origin_ID"] = hand_origin["hand_ID"]
                                pairing_df.loc[pairing_idx,"hand_id"] = hand["hand_ID"]
                                pairing_df.loc[pairing_idx,"pairing_dist"] = hand["y_intercept"] +(hand_origin["x_intercept"] - hand["x_intercept"])
                                pairing_df.loc[pairing_idx,"idx"] = frame_idx
                                pairing_df.loc[pairing_idx,"match_idx"] = match_idx 
                                pairing_idx+=1
                    # if origin is on the right, we move bot to top then right to left
                    elif hand_origin["origin"] == "Right":
                        if (hand["y_intercept"] - hand_origin["y_intercept"] > 0) and hand["origin"] == "Right":
                            # we want this to be >0, because that indicates
                            # that the right hand is indeed above of the left
                            pairing_df.loc[pairing_idx,"origin_ID"] = hand_origin["hand_ID"]
                            pairing_df.loc[pairing_idx,"hand_id"] = hand["hand_ID"]
                            pairing_df.loc[pairing_idx,"pairing_dist"] = hand["y_intercept"] - hand_origin["y_intercept"]
                            pairing_df.loc[pairing_idx,"idx"] = frame_idx 
                            pairing_df.loc[pairing_idx,"match_idx"] = match_idx 
                            pairing_idx+=1
                        #if not, we check if this right hand is on the Top
                        elif  hand["origin"] == "Top":
                            distance = (1-hand["x_intercept"]) + (1 - hand_origin["y_intercept"])
                            # distance < 1 is an arbitrary cutoff just to ensure it's not picking up
                            # a hand on the other side of the screen
                            if distance > 0 and distance < 1:
                                pairing_df.loc[pairing_idx,"origin_ID"] = hand_origin["hand_ID"]
                                pairing_df.loc[pairing_idx,"hand_id"] = hand["hand_ID"]
                                pairing_df.loc[pairing_idx,"pairing_dist"] = distance
                                pairing_df.loc[pairing_idx,"idx"] = frame_idx
                                pairing_df.loc[pairing_idx,"match_idx"] = match_idx 
                                pairing_idx+=1
                                
                    # if origin is on the left, we move bot to top then left to right            
                    elif hand_origin["origin"] == "Left":
                        if (hand["y_intercept"] - hand_origin["y_intercept"] > 0) and hand["origin"] == "Left":
                            # we want this to be >0, because that indicates
                            # that the right hand is indeed above of the left
                            pairing_df.loc[pairing_idx,"origin_ID"] = hand_origin["hand_ID"]
                            pairing_df.loc[pairing_idx,"hand_id"] = hand["hand_ID"]
                            pairing_df.loc[pairing_idx,"pairing_dist"] = hand["y_intercept"] - hand_origin["y_intercept"]
                            pairing_df.loc[pairing_idx,"idx"] = frame_idx 
                            pairing_df.loc[pairing_idx,"match_idx"] = match_idx 
                            pairing_idx+=1
                        #if not, we check if this right hand is on the Top
                        elif  hand["origin"] == "Top":
                            distance = hand["x_intercept"] + (1 - hand_origin["y_intercept"])
                            # distance < 1 is an arbitrary cutoff just to ensure it's not picking up
                            # a hand on the other side of the screen
                            if distance > 0 and distance < 1:
                                pairing_df.loc[pairing_idx,"origin_ID"] = hand_origin["hand_ID"]
                                pairing_df.loc[pairing_idx,"hand_id"] = hand["hand_ID"]
                                pairing_df.loc[pairing_idx,"pairing_dist"] = distance
                                pairing_df.loc[pairing_idx,"idx"] = frame_idx
                                pairing_df.loc[pairing_idx,"match_idx"] = match_idx 
                                pairing_idx+=1
    # at the end of each frame, we need to pick the pairs
    while len(pairing_df) >0:
        best_match = pairing_df[["pairing_dist"]].idxmin()[0]
        match_origin= pairing_df.loc[best_match,"origin_ID"]
        match_pair = pairing_df.loc[best_match,"hand_id"]
        origin_idx = pairing_df.loc[best_match,"idx"]
        match_idx = pairing_df.loc[best_match,"match_idx"]
        # need to get row number (above)
        df.loc[origin_idx,"paired_hand"] =  match_pair
        df.loc[origin_idx,"pairing_distance"] =  pairing_df.loc[best_match,"pairing_dist"]
        df.loc[match_idx,"paired_hand"] = match_origin
        df.loc[match_idx,"pairing_distance"] =  pairing_df.loc[best_match,"pairing_dist"]
        # then remove these hands from the pairing_df
        pairing_df = pairing_df[pairing_df.origin_ID != match_origin]
        pairing_df = pairing_df[pairing_df.hand_id != match_pair]
    df_idx+= len(frame)
    # check progress
    output_progress(df_idx,df_len, checkpoints)
import collections
# This next block stabilizes the inconsistencies in the tracking
# first, we need to make sure there are no short switches between a hand being ID'd as left or right
for ID in set(df["hand_ID"]):
    if not np.isnan(ID):
        # this gets the most frequently associated label with this hand ID
        label_List = df.loc[df["hand_ID"]==ID]["hand"]
        label_count = collections.Counter(label_List)
        max_label = label_count.most_common(1)[0][0]
        # now fill this in
        df.loc[df["hand_ID"]==ID,"hand"] = max_label
# Then, we interpolate the pairing based on the data that we have
# so even if the second hand isn't always present, we know that it's a pair
Left_hand = df.loc[df["hand"]=='label: "Left"']
Left_IDs = set(Left_hand["hand_ID"])
for ID in Left_IDs:
    if not np.isnan(ID):
        # first get all Right hands that this Left has been paired with
        pair_list = df.loc[df["hand_ID"]==ID]["paired_hand"]
        pair_count = collections.Counter(pair_list)
        max_pair = pair_count.most_common(1)[0][0]
        # now fill this in
        df.loc[df["hand_ID"]==ID,"paired_hand"] = max_pair
Right_hand = df.loc[df["hand"]=='label: "Right"']
Right_IDs = set(Right_hand["hand_ID"])
for ID in Right_IDs:
    if not np.isnan(ID):
        # first get all Right hands that this Left has been paired with
        pair_list = df.loc[df["hand_ID"]==ID]["paired_hand"]
        pair_count = collections.Counter(pair_list)
        max_pair = pair_count.most_common(1)[0][0]
        # now fill this in
        df.loc[df["hand_ID"]==ID,"paired_hand"] = max_pair    
    
# finally, save the new dataframe
df.to_csv("./Timeseries_Output/" + datafile.split(".")[0] + "_paired.csv")
Let's see what this modified dataframe gives us.
df.loc[35:40,]
See that we now have an estimation for which hand is paired with which other (eg 2nd row shows hand 1.0 paired with 2.0). We also have the pairing distance, which gives us some idea how far away the origin points of the two hands are. 
It's nice to have these data, but we should of course check the quality of these estimations to see how accurate they are.
This code can be found at: https://github.com/WimPouw/EnvisionBootcamp2021/tree/main/Python/MediaBodyTracking/visualize_tracking_IDs.py
The full output video can be found under https://github.com/WimPouw/EnvisionBootcamp2021/tree/main/Python/MediaBodyTracking/Videotracking_output/sampletopview_paired.mp4
import time
videofile = "sampletopview.mp4"
checkpoints = [round(x * 0.1,1) for x in range(1, 11)]
#Hand landmarks
markers = ['WRIST', 'THUMB_CMC', 'THUMB_MCP', 'THUMB_IP', 'THUMB_TIP', 
 'INDEX_MCP', 'INDEX_PIP', 'INDEX_DIP', 'INDEX_TIP', 
 'MIDDLE_MCP', 'MIDDLE_PIP', 'MIDDLE_DIP','MIDDLE_TIP', 
 'RING_MCP', 'RING_TIP', 'RING_DIP', 'RING_TIP', 
 'PINKY_MCP', 'PINKY_PIP', 'PINKY_DIP', 'PINKY_TIP']
tracking_name = foldtime + videofile.split(".")[0] + "_paired.csv"
tracking_file = pd.read_csv(tracking_name)
#load in the video file
cap = cv2.VideoCapture(mypath + videofile)
hasFrame, frame = cap.read()
# create an output file to see our visualized tracking
output_filename = "./Videotracking_output_withIDs/" + videofile.split(".")[0] + "_paired.mp4"
vid_writer = cv2.VideoWriter(output_filename,cv2.VideoWriter_fourcc('m','p','4','v'), 30, (frame.shape[1],frame.shape[0]))
no_frames = max(tracking_file["index"])
frame_no = 1
while hasFrame:
    t = time.time()
    hasFrame, frame = cap.read() #grabs *next* frame
    frameCopy = np.copy(frame)
    if not hasFrame:
        cv2.waitKey()
        break
    # mediapipe scales x,y coordinates to a 0,1 range, so we need to recalculate the pixel coordinates
    frameWidth = frame.shape[1]
    frameHeight = frame.shape[0] 
    # get just the tracking data for this frame
    tracking_frame = tracking_file.loc[tracking_file["index"] == frame_no]
    for _,hand in tracking_frame.iterrows():
        # then we go through each joint/marker and add a circle, and an ID
        for marker in markers:
            x = int(hand["X_" + marker]*frameWidth)
            y = int(hand["Y_" + marker]*frameHeight)
            # we want to loop through each column and get the x,y coordinates of
            # any tracked hand 
            cv2.circle(frameCopy, (int(x), int(y)), 5, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
        # we want our IDs to be about the center horizontally,
        hand_cent_x = np.median([(hand["X_" + marker]*frameWidth) for marker in markers])
        # and just above all the points
        hand_cent_y = max([(hand["Y_" + marker]*frameHeight) for marker in markers]) + 10
        if "Right" in hand["hand"]:
            hand_label = "R"
        else:
            hand_label = "L"
        hand_text = hand_label + "  ID:" + str(hand["hand_ID"])+ "   pair: " + str(hand["paired_hand"])
        cv2.putText(frameCopy, hand_text, (int(hand_cent_x), int(hand_cent_y)), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 1, lineType=cv2.LINE_AA)
    cv2.imshow('Frame',frameCopy)    
    frame_no +=1
    vid_writer.write(frameCopy)
    output_progress(frame_no,no_frames, checkpoints)
vid_writer.release()
Look at the video, and see what you think of: