330 lines
14 KiB
330 lines
14 KiB
import os
import json
import subprocess
import sys
import cv2
import numpy as np
import pandas as pd
from typing import List
import skeleton_lib as skel
import concurrent.futures
def json_to_keypoints_openpose(json_file: str) -> List[skel.Keypoint]:
with open(json_file, 'r') as file:
data = json.load(file)
keypoints = data[0]['people'][0]['pose_keypoints_2d']
keypoints = [skel.Keypoint(keypoints[i], keypoints[i + 1]) for i in range(0, len(keypoints), 3)]
return keypoints
def array_json_to_Skeleton_Seqences(json_file: str) -> List[skel.Skeleton_Seqence]:
with open(json_file, 'r') as file:
data = json.load(file)
skeleton_sequences = []
for frame in data:
for i in range(len(frame)):
while len(skeleton_sequences) <= i:
skeleton_sequences[i] = skel.Skeleton_Seqence([])
skeleton = skel.Skeleton([skel.Keypoint(keypoint[0], keypoint[1], keypoint[2]) for keypoint in frame[i]])
return skeleton_sequences
def read_new_yolo_keypoints(json_file: str) -> List[List[skel.Skeleton]]:
with open(json_file, 'r') as file:
data = json.load(file)
frames = []
for frame in data:
skeletons = []
for skeleton in frame:
keypoints = [skel.Keypoint(point['x'], point['y'], point['confidence']) for point in skeleton['keypoints']]
return frames
def add_neck_to_og_coco(folder: str):
for file in os.listdir(folder):
with open(os.path.join(folder, file), 'r') as f:
data = json.load(f)
print(f"Processing {file}")
for frame in data:
for skeleton in frame:
_skeleton = skeleton['keypoints']
# Add neck keypoint to the original COCO keypoints
if len(_skeleton) > 6:
neck = {'x': (_skeleton[6]['x'] + _skeleton[7]['x']) / 2, 'y': (_skeleton[6]['y'] + _skeleton[7]['y']) / 2, 'confidence': (_skeleton[6]['confidence'] + _skeleton[7]['confidence']) / 2}
_skeleton.insert(6, neck)
with open(os.path.join(folder, file), 'w') as f:
json.dump(data, f, indent=4)
def Skeleton_Seqences_save_to_array_json(skeleton_sequences: List[skel.Skeleton_Seqence], json_file: str):
# Ensure the directory exists
os.makedirs(os.path.dirname(json_file), exist_ok=True)
data = []
for i in range(len(skeleton_sequences[0].skeletons_frame)):
sliced = skel.get_time_slice_for_Skeleton_Seqences(skeleton_sequences, i)
sequence_data = []
for skeleton in sliced:
keypoints_data = [[kp.x, kp.y, kp.confidence] for kp in skeleton.keypoints]
with open(json_file, 'w') as file:
json.dump(data, file, indent=4)
def process_json_file(json_file, directory, output_directory):
json_file = os.path.join(directory, json_file)
# print(json_file)
skeleton_sequences = array_json_to_Skeleton_Seqences(json_file)
frame_count = max(len(skeleton_sequences[i].skeletons_frame) for i in range(len(skeleton_sequences)) if skeleton_sequences[i] is not None)
sliced_list = [skel.get_time_slice_for_Skeleton_Seqences(skeleton_sequences, i) for i in range(frame_count)]
for i in range(frame_count):
last_sliced = sliced_list[i - 1] if i > 0 else None
next_sliced = sliced_list[i + 1] if i < frame_count - 1 else None
sliced = sliced_list[i]
for j, skeleton in enumerate(sliced):
last_keypoints = last_sliced[j].keypoints if last_sliced else None
next_keypoints = next_sliced[j].keypoints if next_sliced else None
keypoints = skeleton.keypoints
keypoints = skel.fix_keypoints(keypoints, last_keypoints, next_keypoints)
skeleton_sequences[j].get_frame(i).keypoints = keypoints
Skeleton_Seqences_save_to_array_json(skeleton_sequences, output_directory + os.path.basename(json_file))
def process_json_files_chunk(json_files_chunk, directory, output_directory):
for json_file in json_files_chunk:
process_json_file(json_file, directory, output_directory)
def process_json_files_multi_threaded(json_files, directory, output_directory):
json_files = [f for f in os.listdir(directory) if f.endswith('.json')]
if not json_files:
print("No JSON files found in the directory.")
json_files_chunks = np.array_split(json_files, 64)
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = [executor.submit(process_json_files_chunk, chunk, directory) for chunk in json_files_chunks]
for future in concurrent.futures.as_completed(futures):
except Exception as e:
print(f"Error processing file chunk: {e}")
def process_clip_descriptor(input_file_path, output_file_path):
ClipDescriptorKaggle = pd.read_csv(input_file_path)
# Add a new column to store the YouTube video ID by extracting it from the URL
ClipDescriptorKaggle['video_id'] = ClipDescriptorKaggle['URL'].apply(lambda x: x.split('=')[1].split('&')[0])
# Save the processed DataFrame to a new CSV file
with open(output_file_path, 'w') as file:
ClipDescriptorKaggle.to_csv(file, index=False)
def get_frames_from_fixed_json(json_file):
frames = []
with open(json_file, 'r') as file:
data = json.load(file)
for index, frame in enumerate(data):
two_skeletons = []
for i in range(2): # Assuming there are always 2 skeletons
keypoints = []
for point in frame[i]:
keypoint = skel.Keypoint(point[0], point[1], point[2])
return frames
def get_avg_keypoints(keypoints):
x = [point.x for point in keypoints]
y = [point.y for point in keypoints]
confidence = [point.confidence for point in keypoints]
avg_x = sum(x) / len(x) if len(x) > 0 else 0
avg_y = sum(y) / len(y) if len(y) > 0 else 0
avg_confidence = sum(confidence) / len(confidence) if len(confidence) > 0 else 0
return skel.Keypoint(avg_x, avg_y, avg_confidence)
def get_avg_keypoints_in_frames(frames):
avg_keypoints_in_frames = []
for frame in frames:
avg_keypoints = []
for skeleton in frame:
return avg_keypoints_in_frames
def process_avg_keypoints_row(row, output_dir):
json_file = './fixed/' + row['ClipName'] + '.json'
print(f"Processing {json_file}")
frames = get_frames_from_fixed_json(json_file)
avg_keypoints = get_avg_keypoints_in_frames(frames)
with open(os.path.join(output_dir, row['ClipName'] + '.json'), 'w') as file:
json.dump(avg_keypoints, file, indent=4, cls=skel.KeypointEncoder)
def process_batch_avg_keypoints_row(batch, output_dir):
print(f"Processing batch of {len(batch)} rows.")
for _, row in batch.iterrows():
process_avg_keypoints_row(row, output_dir)
def process_descriptor_save_avg_keypoints(descriptor: pd.DataFrame):
num_threads = 64
batch_size = max(1, len(descriptor) // num_threads)
output_dir = './avg_keypoints'
if not os.path.exists(output_dir):
batches = [descriptor.iloc[i:i + batch_size] for i in range(0, len(descriptor), batch_size)]
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
executor.map(lambda batch: process_batch_avg_keypoints_row(batch, output_dir), batches)
def download_video(youtube_url, video_file):
if os.path.exists(video_file):
command = [
'yt-dlp', '-f', 'best[height<=360]', '-o', video_file, youtube_url
subprocess.run(command, check=True)
def extract_frames(video_file, start_frame, end_frame, output_folder):
if not os.path.exists(output_folder):
if not os.path.exists(video_file):
if len(os.listdir(output_folder)) == end_frame - start_frame:
command = [
'ffmpeg', '-i', video_file, '-vf', f"select='between(n\\,{start_frame}\\,{end_frame - 1})'",
'-vsync', 'vfr', '-frame_pts', 'true', os.path.join(output_folder, '%08d.png')
subprocess.run(command, check=True)
def process_video_frames(row, video_path, video_frame_path):
video_file = os.path.join(video_path, f"{row['video_id']}.mp4")
start_frame = int(row['Start_frame'])
end_frame = int(row['End_frame'])
clip_name = row['ClipName']
output_folder = os.path.join(video_frame_path, clip_name)
# if not os.path.exists(video_file):
# download_video(row['URL'], video_file)
extract_frames(video_file, start_frame, end_frame, output_folder)
# remove the leading zeros from the frame names
for filename in os.listdir(output_folder):
os.rename(os.path.join(output_folder, filename), os.path.join(output_folder, filename.lstrip('0')))
def process_video_frames_multi_threaded(descriptor: pd.DataFrame, video_path, video_frame_path):
with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
futures = [
executor.submit(process_video_frames, row, video_path, video_frame_path)
for _, row in descriptor.iterrows()
for future in concurrent.futures.as_completed(futures):
except Exception as e:
print(f"Error processing row: {e}")
def cal_lerp_avg_keypoints(keypoints: list[skel.Keypoint]):
# in these keypoints, the confidence is 0.0 if the keypoint is not detected
# lerp linearly from last valid keypoint to next valid keypoint, fill in the missing keypoint(s)
# Find the first valid keypoint
first_valid_idx = next((i for i, kp in enumerate(keypoints) if kp.confidence > 0.0), None)
if first_valid_idx is None:
return keypoints # No valid keypoints found
# Find the last valid keypoint
last_valid_idx = next((i for i, kp in reversed(list(enumerate(keypoints))) if kp.confidence > 0.0), None)
# Copy the first valid keypoint's values to all preceding invalid keypoints
for i in range(first_valid_idx):
keypoints[i].x = keypoints[first_valid_idx].x
keypoints[i].y = keypoints[first_valid_idx].y
keypoints[i].confidence = keypoints[first_valid_idx].confidence
# Copy the last valid keypoint's values to all succeeding invalid keypoints
for i in range(last_valid_idx + 1, len(keypoints)):
keypoints[i].x = keypoints[last_valid_idx].x
keypoints[i].y = keypoints[last_valid_idx].y
keypoints[i].confidence = keypoints[last_valid_idx].confidence
# Interpolate between valid keypoints
last_valid_idx = first_valid_idx
for i in range(first_valid_idx + 1, len(keypoints)):
if keypoints[i].confidence > 0.0:
next_valid_idx = i
# Linearly interpolate between last_valid_idx and next_valid_idx
for j in range(last_valid_idx + 1, next_valid_idx):
t = (j - last_valid_idx) / (next_valid_idx - last_valid_idx)
keypoints[j].x = keypoints[last_valid_idx].x * (1 - t) + keypoints[next_valid_idx].x * t
keypoints[j].y = keypoints[last_valid_idx].y * (1 - t) + keypoints[next_valid_idx].y * t
keypoints[j].confidence = keypoints[last_valid_idx].confidence * (1 - t) + keypoints[next_valid_idx].confidence * t
last_valid_idx = next_valid_idx
return keypoints
def process_avg_keypoints_folder(avg_keypoints_folder, output_folder):
os.makedirs(output_folder, exist_ok=True)
for file in os.listdir(avg_keypoints_folder):
json_path = os.path.join(avg_keypoints_folder, file)
with open(json_path, 'r') as f:
data = json.load(f)
skeleton1_keypoints = []
skeleton2_keypoints = []
for frame in data:
skeleton1_keypoints.append(skel.Keypoint(frame[0]['x'], frame[0]['y'], frame[0]['confidence']))
skeleton2_keypoints.append(skel.Keypoint(frame[1]['x'], frame[1]['y'], frame[1]['confidence']))
lerped_keypoints1 = cal_lerp_avg_keypoints(skeleton1_keypoints)
lerped_keypoints2 = cal_lerp_avg_keypoints(skeleton2_keypoints)
lerped = []
for i in range(len(lerped_keypoints1)):
lerped.append([lerped_keypoints1[i], lerped_keypoints2[i]])
with open(os.path.join(output_folder, file), 'w') as f:
json.dump(lerped, f, cls=skel.KeypointEncoder, indent=4)
def main():
descriptor = pd.read_csv('./ClipDescriptorKaggle_processed.csv')
os.makedirs('./avg_keypoints', exist_ok=True)
video_path = './video'
video_frame_path = './video_frames'
# print("Done processing all rows.")
# canvas = np.zeros((360, 640, 3), dtype=np.uint8)
# canvas = skel.draw_bodypose(canvas, frames[0][0].keypoints, skel.body_25_limbSeq, skel.body_25_colors)
#save the image
# cv2.imwrite('test.png', canvas)
if __name__ == '__main__':
main() |