location code: https://github.com/WimPouw/EnvisionBootcamp2021/tree/main/Python/MediaBodyTracking
citation: Trujillo, J.P. & Pouw, W.T.J.L.(2021-11-18). MultiParty Tracking: Linking Frames and Pairing Hands [day you visited the site]. Retrieved from: https://github.com/WimPouw/EnvisionBootcamp2021/tree/main/Python/MediaBodyTracking
After we have the initial data, we want to add some additional metadata that we get by going over all of these loose frames again.
In this first block of the script, we go through each file, and try to track hands from frame to frame. While we're going through the files, a block within this set of loops will determine the orientation of each hand with respect to the sides of the screen. In other words, we want to try to find the origin of each hand. Does it seem to be 'projecting' from the left side, right side, top, or bottom of the image? This is used in the block directly below to try to find pairs of hands.
from IPython.display import HTML
HTML('<iframe width="935" height="584" src="https://www.youtube.com/embed/mw8RymohMp0?start=7556" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>')
import pandas as pd
import numpy as np
from os import listdir
from os.path import isfile, join
import os
import cv2
#list all videos in mediafolder
mypath = "./MediaToAnalyze/"
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))] # get all files that are in mediatoanalyze
#time series output folder
foldtime = "./Timeseries_Output/"
def get_closest_hand(hand,prev_frame, detected_this_frame):
# this function looks at the hand coordinates given for the current frame, and tries to find the closest match in the previous frame
# The idea being that, from one frame to the next, the same hand should appear in only a slightly different position
# Note that this may not be the case when tracking is lost, but the hand moves some distance away.
tolerance = 3 # somewhat arbitrary number, but ensures that we don't take hands that are extremely far away
distance_list = []
# first we drop any hands that have already been connected to one this frame
prev_frame = prev_frame[~prev_frame.hand_ID.isin(detected_this_frame)]
if not prev_frame.empty:
# then we need to reset the index (due to the prev_frame being composed of rows from different
# points in the dataframe)
prev_frame = prev_frame.reset_index()
# one hand at a time..
for _,prev_hand in prev_frame.iterrows():
#.. we compare coordinates between current hand and previous hands
distance = 0
for name, vals in prev_hand.iteritems():
if name[0] == 'X' or name[0] == 'Y' or name[0] == 'Z':
distance += np.abs(prev_hand[name] - hand[name])
# take the sum of all coordinate differences
distance_list.append(np.sum(distance))
# find the closest match
closest = np.min(distance_list)
closest_ID = prev_frame["hand_ID"][np.where(distance_list == np.min(distance_list))[0][0]]
# make sure it is within some threshold
if closest < tolerance:
return closest_ID
else:
return np.nan
else:
return np.nan
def getslope(a,b):
# simple function to calculate the slope based on two points
slope = (b[1]-a[1]) / (b[0]-a[0])
return slope
def get_intercept(slope, x,y):
# gets the y-intercept
intercept = y-slope*x
return intercept
def get_interceptX(slope, p1,p2):
# get the x-intercept
a = p1[1] - p2[1]
b = p1[0] - p2[0]
m = a/b
c = p1[1]-m*p1[0]
# solve for y
y = 0
x_intercept = (y-c)/m
return x_intercept
def output_progress(df_idx, len_df, checkpoints):
# This funcion is just to give us some progress update, since the script can take a minute to run. This way we know it hasn't crashed.
if checkpoints:
if df_idx >= len_df*checkpoints[0]:
print(str(int(checkpoints[0]*100)) + "% complete")
checkpoints.remove(checkpoints[0])
def flip(original_val):
trans_val = 1 - original_val
return trans_val
def find_duplicates(hand,hand_idx, frame):
# this function looks for hands that match too closely with the current one
# in this case, it's probably a double-tracking issue, and we need to drop the duplicate from our dataframe
duplicates = []
for frame_idx, other_hand in frame.iterrows():
if hand_idx != frame_idx:
if abs(hand["X_WRIST"] - frame.loc[frame_idx,"X_WRIST"]) < 0.05 and abs(hand["Y_WRIST"] - frame.loc[frame_idx,"Y_WRIST"]) < 0.05:
duplicates.append(frame_idx)
return duplicates
This first block is going to go through our df frame by frame and try to calculate which way the hand is facing, and thus where its origin may be. It will also try to match the hands from one frame to the next, that way we can look at a time-series, rather than single tracked points.
datafile = os.listdir(foldtime)[0]
print("working on linking " + datafile + "...")
df = pd.read_csv(foldtime + datafile)
# create empty column for the hand IDs
df["hand_ID"] = np.nan
df["origin"] = np.nan
df["x_intercept"] = np.nan
df["y_intercept"] = np.nan
df_idx = 0
arbID = 0 # this will be a first pass ID
# a little progress counter so we know how far along it is
df_len = len(df)
# make a list of checkpoint values for 10% increments
checkpoints = [round(x * 0.1,1) for x in range(1, 11)]
# we'll use this to move through the dataframe
time_points = list(set(df["time"]))
time_points.sort()
time_idx = 0
# starting values
current_time = time_points[time_idx]
frame = df.loc[df["time"] == current_time]
while current_time <= max(df["time"]):
detected_this_frame = [] # we set this up to not let two hands be assigned to the same previous hand
duplicates_this_frame =[]
duplicate =[]
for frame_idx,hand in frame.iterrows():
if not [frame_idx] in duplicates_this_frame:
# check for duplicates
duplicate= find_duplicates(hand, frame_idx, frame)
duplicates_this_frame.append(duplicate)
if duplicate:
frame = frame.drop(duplicate)
df = df.drop(duplicate)
# if this is the first frame, each Hand is new, and receives a new ID
if df_idx == 0:
df.loc[frame_idx,"hand_ID"] = arbID
arbID +=1
else:
# after the first frame, we need to try to match each hand with a previous ID
closest_ID = get_closest_hand(hand,prev_hands, detected_this_frame)
detected_this_frame.append(closest_ID)
# if there are no good matches, we assume it's a new (previously untracked) hand
# and we assign it a new ID
if np.isnan(closest_ID):
closest_ID = arbID
arbID +=1
df.loc[frame_idx,"hand_ID"] = closest_ID
##################################
#### This block calculates hand orientation ####
# so first get middle point of base
midbase = [hand["X_PINKY_MCP"] + ((hand["X_INDEX_MCP"] - hand["X_PINKY_MCP"])/2),
flip(hand["Y_PINKY_MCP"]) + ((flip(hand["Y_INDEX_MCP"]) - flip(hand["Y_PINKY_MCP"]))/2)]
wrist_coords = [hand["X_WRIST"],flip(hand["Y_WRIST"])]
if midbase[0] > hand["X_WRIST"]:
hand_direction = "R"
else:
hand_direction = "L"
# if left facing, we need to shift the coordinates to the other side of the y-axis
wrist_coords[0] = hand["X_WRIST"]*-1
midbase[0] = midbase[0]*-1
# then the slope from mid base to tip
slope = getslope([hand["X_WRIST"],flip(hand["Y_WRIST"])],midbase)
# now get the initial X and Y intercept
# this is used to figured out where in our image the hand seems
# to be projecting from (Top, Bottom, Left or Right)
intercept_X = get_interceptX(slope,
wrist_coords,
midbase)
intercept_Y = get_intercept(slope,wrist_coords[0],wrist_coords[1])
if hand_direction == "R" and intercept_Y < 0 and 0 < intercept_X < 1:
origin = "Bot"
elif hand_direction == "R" and 0 < intercept_Y < 1:
origin = "Left"
elif hand_direction == "R" and intercept_Y > 1 and intercept_X > 0:
origin = "Top"
intercept_X = get_interceptX(slope,
[wrist_coords[0],wrist_coords[1]*-1],
midbase)
elif hand_direction =="L" and intercept_Y < 0 and intercept_X > -1:
origin = "Bot"
intercept_X = intercept_X*-1
elif hand_direction =="L" and 0 < intercept_Y < 1:
origin = "Right"
elif hand_direction =="L" and intercept_Y > 1 and intercept_X < 0:
origin = "Top"
intercept_X = get_interceptX(slope,
[wrist_coords[0],wrist_coords[1]*-1],
midbase)
# update this info
df.loc[frame_idx,"origin"] = origin
df.loc[frame_idx,"x_intercept"] = intercept_X
df.loc[frame_idx,"y_intercept"] = intercept_Y
##################################
# this needs to be updated to reflect the new hand_IDs
frame = df.loc[df["time"] == current_time]
# if this is the first frame, we store these values for later
if df_idx == 0:
prev_hands = frame.copy()
# after getting this first set of hand-coordinates, we need to update it on each frame
else:
# for each hand in the current frame, check..
for _,hand in frame.iterrows():
# .. if it's been logged in prev_hands already, update it
if hand["hand_ID"] in set(prev_hands["hand_ID"]):
prev_hands.loc[prev_hands["hand_ID"] == hand["hand_ID"]] = [hand]
else:
prev_hands = prev_hands.append(hand)
#move the index forward to the next time point
if len(frame) > 0:
df_idx += len(frame)
else:
df_idx += 1
time_idx +=1
if current_time == max(df["time"]):
break
current_time = time_points[time_idx]
frame = df.loc[df["time"] == current_time]
# check progress
output_progress(df_idx,df_len, checkpoints)
df.to_csv(foldtime + datafile.split(".")[0] + "_linked.csv")
datafile = os.listdir(foldtime)[0]
print("working on linking " + datafile + "...")
df = pd.read_csv(foldtime + datafile)
# create empty column for the hand IDs
df["hand_ID"] = np.nan
df["origin"] = np.nan
df["x_intercept"] = np.nan
df["y_intercept"] = np.nan
df_idx = 0
arbID = 0 # this will be a first pass ID
# a little progress counter so we know how far along it is
df_len = len(df)
# make a list of checkpoint values for 10% increments
checkpoints = [round(x * 0.1,1) for x in range(1, 11)]
current_time = 0
time_points = list(set(df["time"]))
time_points.sort()
time_idx = 0
current_time = time_points[time_idx]
frame = df.loc[df["time"] == current_time]
while current_time <= max(df["time"]):
detected_this_frame = [] # we set this up to not let two hands be assigned to the same previous hand
#######
duplicates_this_frame =[]
for frame_idx,hand in frame.iterrows():
if not [frame_idx] in duplicates_this_frame:
# check for duplicates
duplicate= find_duplicates(hand, frame_idx, frame)
duplicates_this_frame.append(duplicate)
if duplicate:
frame = frame.drop(duplicate)
df = df.drop(duplicate)
########
# if this is the first frame, each Hand is new, and receives a new ID
if df_idx == 0:
df.loc[frame_idx,"hand_ID"] = arbID
arbID +=1
else:
# after the first frame, we need to try to match each hand with a previous ID
closest_ID = get_closest_hand(hand,prev_hands, detected_this_frame)
detected_this_frame.append(closest_ID)
# if there are no good matches, we assume it's a new (previously untracked) hand
# and we assign it a new ID
if np.isnan(closest_ID):
closest_ID = arbID
arbID +=1
df.loc[frame_idx,"hand_ID"] = closest_ID
##################################
#### This block calculates hand orientation ####
# so first get middle point of base
midbase = [hand["X_PINKY_MCP"] + ((hand["X_INDEX_MCP"] - hand["X_PINKY_MCP"])/2),
flip(hand["Y_PINKY_MCP"]) + ((flip(hand["Y_INDEX_MCP"]) - flip(hand["Y_PINKY_MCP"]))/2)]
wrist_coords = [hand["X_WRIST"],flip(hand["Y_WRIST"])]
if midbase[0] > hand["X_WRIST"]:
hand_direction = "R"
else:
hand_direction = "L"
# if left facing, we need to shift the coordinates to the other side of the y-axis
wrist_coords[0] = hand["X_WRIST"]*-1
midbase[0] = midbase[0]*-1
# then the slope from mid base to tip
slope = getslope([hand["X_WRIST"],flip(hand["Y_WRIST"])],midbase)
# now get the initial X and Y intercept
# this is used to figured out where in our image the hand seems
# to be projecting from (Top, Bottom, Left or Right)
intercept_X = get_interceptX(slope,
wrist_coords,
midbase)
intercept_Y = get_intercept(slope,wrist_coords[0],wrist_coords[1])
if hand_direction == "R" and intercept_Y < 0 and 0 < intercept_X < 1:
origin = "Bot"
elif hand_direction == "R" and 0 < intercept_Y < 1:
origin = "Left"
elif hand_direction == "R" and intercept_Y > 1 and intercept_X > 0:
origin = "Top"
intercept_X = get_interceptX(slope,
[wrist_coords[0],wrist_coords[1]*-1],
midbase)
elif hand_direction =="L" and intercept_Y < 0 and intercept_X > -1:
origin = "Bot"
intercept_X = intercept_X*-1
elif hand_direction =="L" and 0 < intercept_Y < 1:
origin = "Right"
elif hand_direction =="L" and intercept_Y > 1 and intercept_X < 0:
origin = "Top"
intercept_X = get_interceptX(slope,
[wrist_coords[0],wrist_coords[1]*-1],
midbase)
# update this info
df.loc[frame_idx,"origin"] = origin
df.loc[frame_idx,"x_intercept"] = intercept_X
df.loc[frame_idx,"y_intercept"] = intercept_Y
##################################
# this needs to be updated to reflect the new hand_IDs
frame = df.loc[df["time"] == current_time]
# if this is the first frame, we store these values for later
if df_idx == 0:
prev_hands = frame.copy()
# after getting this first set of hand-coordinates, we need to update it on each frame
else:
# for each hand in the current frame, check..
for _,hand in frame.iterrows():
# .. if it's been logged in prev_hands already, update it
if hand["hand_ID"] in set(prev_hands["hand_ID"]):
prev_hands.loc[prev_hands["hand_ID"] == hand["hand_ID"]] = [hand]
else:
prev_hands = prev_hands.append(hand)
#move the index forward to the next time point
if len(frame) > 0:
df_idx += len(frame)
else:
df_idx += 1
time_idx +=1
# check progress
if current_time == max(df["time"]):
break
current_time = time_points[time_idx]
frame = df.loc[df["time"] == current_time]
output_progress(df_idx,df_len, checkpoints)
df.head()
import matplotlib.pyplot as plt
import collections
# let's drop hands that were only tracked for a fraction of the time, as these are likely to be artefacts
quarter_time = int(len(set(df["time"]))*0.25)
tracked_frames = collections.Counter(df["hand_ID"])
for ID in set(df["hand_ID"]):
if tracked_frames[ID] < quarter_time:
df = df[df.hand_ID != ID]
df.to_csv(foldtime + datafile.split(".")[0] + "_linked.csv")
This next bit can be used to verify that there aren't too many hands being tracked, missing data, etc.
df = pd.read_csv(foldtime + datafile.split(".")[0] + "_linked.csv")
checkpoints = [round(x * 0.1,1) for x in range(1, 11)]
# first, we need to switch the hand labels
# this is because mediapipe assumes the image is mirrored
for idx,row in df.iterrows():
if "Right" in row["hand"]:
df.loc[idx,"hand"] = 'label: "Left"'
else:
df.loc[idx,"hand"] = 'label: "Right"'
df_idx = 0
while df_idx < len(df):
current_time = df["time"][df_idx]
frame = df.loc[df["time"] == current_time]
# create a new dataframe where collect all of the right-hand distances from this left
pairing_df = pd.DataFrame()
pairing_idx = 0
for frame_idx,hand_origin in frame.iterrows():
# Once we collect both intercepts (each will have one zero in it), we need to pair with another hand
# so first we add an identifier indicating wall orientation (left, right, top, bot)
# For left- or right-oriented, we assume that a right hand should pair with a left hand with a
# higher y-value, with the same LR orientation
# For top or bot oriented, we assume L should pair with R with a lower (top) or higher (bot)
# x-value, with the same orientation
## based on intercepts
if "Left" in hand_origin["hand"]:
# once we find a left hand, we need to find any potential match
for match_idx,hand in frame.iterrows():
# for each potential match in the frame, we need to record the ID of the left hand (origin hand),
# as well as the distance and ID of the potential pair
# That way, at the end we can sort out which ones fit best together
if "Right" in hand["hand"]:
# if oriented bottom and we slide from left to right
if hand_origin["origin"] == "Bot":
# first we check if there is a right hand, also with an origin on the bottom
# and that is to the right of the left hand we are looking at
if (hand["x_intercept"] - hand_origin["x_intercept"] > 0) and hand["origin"] == "Bot":
# we want this to be >0, because that indicates
# that the right hand is indeed to the right of the left
pairing_df.loc[pairing_idx,"origin_ID"] = hand_origin["hand_ID"]
pairing_df.loc[pairing_idx,"hand_id"] = hand["hand_ID"]
pairing_df.loc[pairing_idx,"pairing_dist"] = hand["x_intercept"] - hand_origin["x_intercept"]
pairing_df.loc[pairing_idx,"idx"] = frame_idx
pairing_df.loc[pairing_idx,"match_idx"] = match_idx
pairing_idx+=1
#if not, we check if this right hand is on the Right side
elif hand["origin"] == "Right":
distance = hand["y_intercept"] + (1 - hand_origin["x_intercept"])
# distance < 1 is an arbitrary cutoff just to ensure it's not picking up
# a hand on the other side of the screen
if distance > 0 and distance < 1:
pairing_df.loc[pairing_idx,"origin_ID"] = hand_origin["hand_ID"]
pairing_df.loc[pairing_idx,"hand_id"] = hand["hand_ID"]
pairing_df.loc[pairing_idx,"pairing_dist"] = hand["y_intercept"] + (hand["x_intercept"] - hand_origin["x_intercept"])
pairing_df.loc[pairing_idx,"idx"] = frame_idx
pairing_df.loc[pairing_idx,"match_idx"] = match_idx
pairing_idx+=1
# if oriented top, we go right to left
elif hand_origin["origin"] == "Top":
#is this right hand also on the top?
if hand["origin"] == "Top" and (hand_origin["x_intercept"] - hand["x_intercept"] >0):
pairing_df.loc[pairing_idx,"origin_ID"] = hand_origin["hand_ID"]
pairing_df.loc[pairing_idx,"hand_id"] = hand["hand_ID"]
pairing_df.loc[pairing_idx,"pairing_dist"] = hand_origin["x_intercept"] - hand["x_intercept"]
pairing_df.loc[pairing_idx,"idx"] = frame_idx
pairing_df.loc[pairing_idx,"match_idx"] = match_idx
pairing_idx+=1
# slide up the next side, if there is anything there
elif hand["origin"] == "Left":
distance =(1 -hand["y_intercept"]) +(1 - hand_origin["x_intercept"])
# distance < 1 is an arbitrary cutoff just to ensure it's not picking up
# a hand on the other side of the screen
if distance > 0 and distance < 1:
pairing_df.loc[pairing_idx,"origin_ID"] = hand_origin["hand_ID"]
pairing_df.loc[pairing_idx,"hand_id"] = hand["hand_ID"]
pairing_df.loc[pairing_idx,"pairing_dist"] = hand["y_intercept"] +(hand_origin["x_intercept"] - hand["x_intercept"])
pairing_df.loc[pairing_idx,"idx"] = frame_idx
pairing_df.loc[pairing_idx,"match_idx"] = match_idx
pairing_idx+=1
# if origin is on the right, we move bot to top then right to left
elif hand_origin["origin"] == "Right":
if (hand["y_intercept"] - hand_origin["y_intercept"] > 0) and hand["origin"] == "Right":
# we want this to be >0, because that indicates
# that the right hand is indeed above of the left
pairing_df.loc[pairing_idx,"origin_ID"] = hand_origin["hand_ID"]
pairing_df.loc[pairing_idx,"hand_id"] = hand["hand_ID"]
pairing_df.loc[pairing_idx,"pairing_dist"] = hand["y_intercept"] - hand_origin["y_intercept"]
pairing_df.loc[pairing_idx,"idx"] = frame_idx
pairing_df.loc[pairing_idx,"match_idx"] = match_idx
pairing_idx+=1
#if not, we check if this right hand is on the Top
elif hand["origin"] == "Top":
distance = (1-hand["x_intercept"]) + (1 - hand_origin["y_intercept"])
# distance < 1 is an arbitrary cutoff just to ensure it's not picking up
# a hand on the other side of the screen
if distance > 0 and distance < 1:
pairing_df.loc[pairing_idx,"origin_ID"] = hand_origin["hand_ID"]
pairing_df.loc[pairing_idx,"hand_id"] = hand["hand_ID"]
pairing_df.loc[pairing_idx,"pairing_dist"] = distance
pairing_df.loc[pairing_idx,"idx"] = frame_idx
pairing_df.loc[pairing_idx,"match_idx"] = match_idx
pairing_idx+=1
# if origin is on the left, we move bot to top then left to right
elif hand_origin["origin"] == "Left":
if (hand["y_intercept"] - hand_origin["y_intercept"] > 0) and hand["origin"] == "Left":
# we want this to be >0, because that indicates
# that the right hand is indeed above of the left
pairing_df.loc[pairing_idx,"origin_ID"] = hand_origin["hand_ID"]
pairing_df.loc[pairing_idx,"hand_id"] = hand["hand_ID"]
pairing_df.loc[pairing_idx,"pairing_dist"] = hand["y_intercept"] - hand_origin["y_intercept"]
pairing_df.loc[pairing_idx,"idx"] = frame_idx
pairing_df.loc[pairing_idx,"match_idx"] = match_idx
pairing_idx+=1
#if not, we check if this right hand is on the Top
elif hand["origin"] == "Top":
distance = hand["x_intercept"] + (1 - hand_origin["y_intercept"])
# distance < 1 is an arbitrary cutoff just to ensure it's not picking up
# a hand on the other side of the screen
if distance > 0 and distance < 1:
pairing_df.loc[pairing_idx,"origin_ID"] = hand_origin["hand_ID"]
pairing_df.loc[pairing_idx,"hand_id"] = hand["hand_ID"]
pairing_df.loc[pairing_idx,"pairing_dist"] = distance
pairing_df.loc[pairing_idx,"idx"] = frame_idx
pairing_df.loc[pairing_idx,"match_idx"] = match_idx
pairing_idx+=1
# at the end of each frame, we need to pick the pairs
while len(pairing_df) >0:
best_match = pairing_df[["pairing_dist"]].idxmin()[0]
match_origin= pairing_df.loc[best_match,"origin_ID"]
match_pair = pairing_df.loc[best_match,"hand_id"]
origin_idx = pairing_df.loc[best_match,"idx"]
match_idx = pairing_df.loc[best_match,"match_idx"]
# need to get row number (above)
df.loc[origin_idx,"paired_hand"] = match_pair
df.loc[origin_idx,"pairing_distance"] = pairing_df.loc[best_match,"pairing_dist"]
df.loc[match_idx,"paired_hand"] = match_origin
df.loc[match_idx,"pairing_distance"] = pairing_df.loc[best_match,"pairing_dist"]
# then remove these hands from the pairing_df
pairing_df = pairing_df[pairing_df.origin_ID != match_origin]
pairing_df = pairing_df[pairing_df.hand_id != match_pair]
df_idx+= len(frame)
# check progress
output_progress(df_idx,df_len, checkpoints)
import collections
# This next block stabilizes the inconsistencies in the tracking
# first, we need to make sure there are no short switches between a hand being ID'd as left or right
for ID in set(df["hand_ID"]):
if not np.isnan(ID):
# this gets the most frequently associated label with this hand ID
label_List = df.loc[df["hand_ID"]==ID]["hand"]
label_count = collections.Counter(label_List)
max_label = label_count.most_common(1)[0][0]
# now fill this in
df.loc[df["hand_ID"]==ID,"hand"] = max_label
# Then, we interpolate the pairing based on the data that we have
# so even if the second hand isn't always present, we know that it's a pair
Left_hand = df.loc[df["hand"]=='label: "Left"']
Left_IDs = set(Left_hand["hand_ID"])
for ID in Left_IDs:
if not np.isnan(ID):
# first get all Right hands that this Left has been paired with
pair_list = df.loc[df["hand_ID"]==ID]["paired_hand"]
pair_count = collections.Counter(pair_list)
max_pair = pair_count.most_common(1)[0][0]
# now fill this in
df.loc[df["hand_ID"]==ID,"paired_hand"] = max_pair
Right_hand = df.loc[df["hand"]=='label: "Right"']
Right_IDs = set(Right_hand["hand_ID"])
for ID in Right_IDs:
if not np.isnan(ID):
# first get all Right hands that this Left has been paired with
pair_list = df.loc[df["hand_ID"]==ID]["paired_hand"]
pair_count = collections.Counter(pair_list)
max_pair = pair_count.most_common(1)[0][0]
# now fill this in
df.loc[df["hand_ID"]==ID,"paired_hand"] = max_pair
# finally, save the new dataframe
df.to_csv("./Timeseries_Output/" + datafile.split(".")[0] + "_paired.csv")
Let's see what this modified dataframe gives us.
df.loc[35:40,]
See that we now have an estimation for which hand is paired with which other (eg 2nd row shows hand 1.0 paired with 2.0). We also have the pairing distance, which gives us some idea how far away the origin points of the two hands are.
It's nice to have these data, but we should of course check the quality of these estimations to see how accurate they are.
This code can be found at: https://github.com/WimPouw/EnvisionBootcamp2021/tree/main/Python/MediaBodyTracking/visualize_tracking_IDs.py
The full output video can be found under https://github.com/WimPouw/EnvisionBootcamp2021/tree/main/Python/MediaBodyTracking/Videotracking_output/sampletopview_paired.mp4
import time
videofile = "sampletopview.mp4"
checkpoints = [round(x * 0.1,1) for x in range(1, 11)]
#Hand landmarks
markers = ['WRIST', 'THUMB_CMC', 'THUMB_MCP', 'THUMB_IP', 'THUMB_TIP',
'INDEX_MCP', 'INDEX_PIP', 'INDEX_DIP', 'INDEX_TIP',
'MIDDLE_MCP', 'MIDDLE_PIP', 'MIDDLE_DIP','MIDDLE_TIP',
'RING_MCP', 'RING_TIP', 'RING_DIP', 'RING_TIP',
'PINKY_MCP', 'PINKY_PIP', 'PINKY_DIP', 'PINKY_TIP']
tracking_name = foldtime + videofile.split(".")[0] + "_paired.csv"
tracking_file = pd.read_csv(tracking_name)
#load in the video file
cap = cv2.VideoCapture(mypath + videofile)
hasFrame, frame = cap.read()
# create an output file to see our visualized tracking
output_filename = "./Videotracking_output_withIDs/" + videofile.split(".")[0] + "_paired.mp4"
vid_writer = cv2.VideoWriter(output_filename,cv2.VideoWriter_fourcc('m','p','4','v'), 30, (frame.shape[1],frame.shape[0]))
no_frames = max(tracking_file["index"])
frame_no = 1
while hasFrame:
t = time.time()
hasFrame, frame = cap.read() #grabs *next* frame
frameCopy = np.copy(frame)
if not hasFrame:
cv2.waitKey()
break
# mediapipe scales x,y coordinates to a 0,1 range, so we need to recalculate the pixel coordinates
frameWidth = frame.shape[1]
frameHeight = frame.shape[0]
# get just the tracking data for this frame
tracking_frame = tracking_file.loc[tracking_file["index"] == frame_no]
for _,hand in tracking_frame.iterrows():
# then we go through each joint/marker and add a circle, and an ID
for marker in markers:
x = int(hand["X_" + marker]*frameWidth)
y = int(hand["Y_" + marker]*frameHeight)
# we want to loop through each column and get the x,y coordinates of
# any tracked hand
cv2.circle(frameCopy, (int(x), int(y)), 5, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
# we want our IDs to be about the center horizontally,
hand_cent_x = np.median([(hand["X_" + marker]*frameWidth) for marker in markers])
# and just above all the points
hand_cent_y = max([(hand["Y_" + marker]*frameHeight) for marker in markers]) + 10
if "Right" in hand["hand"]:
hand_label = "R"
else:
hand_label = "L"
hand_text = hand_label + " ID:" + str(hand["hand_ID"])+ " pair: " + str(hand["paired_hand"])
cv2.putText(frameCopy, hand_text, (int(hand_cent_x), int(hand_cent_y)), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 1, lineType=cv2.LINE_AA)
cv2.imshow('Frame',frameCopy)
frame_no +=1
vid_writer.write(frameCopy)
output_progress(frame_no,no_frames, checkpoints)
vid_writer.release()
Look at the video, and see what you think of: