import os import json import subprocess import sys import cv2 import numpy as np import pandas as pd from typing import List import skeleton_lib as skel import concurrent.futures sys.path.append('./') def json_to_keypoints_openpose(json_file: str) -> List[skel.Keypoint]: with open(json_file, 'r') as file: data = json.load(file) keypoints = data[0]['people'][0]['pose_keypoints_2d'] keypoints = [skel.Keypoint(keypoints[i], keypoints[i + 1]) for i in range(0, len(keypoints), 3)] return keypoints def array_json_to_Skeleton_Seqences(json_file: str) -> List[skel.Skeleton_Seqence]: with open(json_file, 'r') as file: data = json.load(file) skeleton_sequences = [] for frame in data: for i in range(len(frame)): while len(skeleton_sequences) <= i: skeleton_sequences.append(None) skeleton_sequences[i] = skel.Skeleton_Seqence([]) skeleton = skel.Skeleton([skel.Keypoint(keypoint[0], keypoint[1], keypoint[2]) for keypoint in frame[i]]) skeleton_sequences[i].add_frame(skeleton) return skeleton_sequences def read_new_yolo_keypoints(json_file: str) -> List[List[skel.Skeleton]]: with open(json_file, 'r') as file: data = json.load(file) frames = [] for frame in data: skeletons = [] for skeleton in frame: keypoints = [skel.Keypoint(point['x'], point['y'], point['confidence']) for point in skeleton['keypoints']] skeletons.append(skel.Skeleton(keypoints)) frames.append(skeletons) return frames def add_neck_to_og_coco(folder: str): for file in os.listdir(folder): with open(os.path.join(folder, file), 'r') as f: data = json.load(f) print(f"Processing {file}") for frame in data: for skeleton in frame: _skeleton = skeleton['keypoints'] # Add neck keypoint to the original COCO keypoints if len(_skeleton) > 6: neck = {'x': (_skeleton[6]['x'] + _skeleton[7]['x']) / 2, 'y': (_skeleton[6]['y'] + _skeleton[7]['y']) / 2, 'confidence': (_skeleton[6]['confidence'] + _skeleton[7]['confidence']) / 2} _skeleton.insert(6, neck) with open(os.path.join(folder, file), 'w') as f: json.dump(data, f, indent=4) def Skeleton_Seqences_save_to_array_json(skeleton_sequences: List[skel.Skeleton_Seqence], json_file: str): # Ensure the directory exists os.makedirs(os.path.dirname(json_file), exist_ok=True) data = [] for i in range(len(skeleton_sequences[0].skeletons_frame)): sliced = skel.get_time_slice_for_Skeleton_Seqences(skeleton_sequences, i) sequence_data = [] for skeleton in sliced: keypoints_data = [[kp.x, kp.y, kp.confidence] for kp in skeleton.keypoints] sequence_data.append(keypoints_data) data.append(sequence_data) with open(json_file, 'w') as file: json.dump(data, file, indent=4) def process_json_file(json_file, directory, output_directory): json_file = os.path.join(directory, json_file) # print(json_file) skeleton_sequences = array_json_to_Skeleton_Seqences(json_file) frame_count = max(len(skeleton_sequences[i].skeletons_frame) for i in range(len(skeleton_sequences)) if skeleton_sequences[i] is not None) sliced_list = [skel.get_time_slice_for_Skeleton_Seqences(skeleton_sequences, i) for i in range(frame_count)] for i in range(frame_count): last_sliced = sliced_list[i - 1] if i > 0 else None next_sliced = sliced_list[i + 1] if i < frame_count - 1 else None sliced = sliced_list[i] for j, skeleton in enumerate(sliced): last_keypoints = last_sliced[j].keypoints if last_sliced else None next_keypoints = next_sliced[j].keypoints if next_sliced else None keypoints = skeleton.keypoints keypoints = skel.fix_keypoints(keypoints, last_keypoints, next_keypoints) skeleton_sequences[j].get_frame(i).keypoints = keypoints Skeleton_Seqences_save_to_array_json(skeleton_sequences, output_directory + os.path.basename(json_file)) def process_json_files_chunk(json_files_chunk, directory, output_directory): for json_file in json_files_chunk: process_json_file(json_file, directory, output_directory) def process_json_files_multi_threaded(json_files, directory, output_directory): json_files = [f for f in os.listdir(directory) if f.endswith('.json')] if not json_files: print("No JSON files found in the directory.") return json_files_chunks = np.array_split(json_files, 64) with concurrent.futures.ThreadPoolExecutor() as executor: futures = [executor.submit(process_json_files_chunk, chunk, directory) for chunk in json_files_chunks] for future in concurrent.futures.as_completed(futures): try: future.result() except Exception as e: print(f"Error processing file chunk: {e}") def process_clip_descriptor(input_file_path, output_file_path): ClipDescriptorKaggle = pd.read_csv(input_file_path) # Add a new column to store the YouTube video ID by extracting it from the URL ClipDescriptorKaggle['video_id'] = ClipDescriptorKaggle['URL'].apply(lambda x: x.split('=')[1].split('&')[0]) # Save the processed DataFrame to a new CSV file with open(output_file_path, 'w') as file: ClipDescriptorKaggle.to_csv(file, index=False) def get_frames_from_fixed_json(json_file): frames = [] with open(json_file, 'r') as file: data = json.load(file) for index, frame in enumerate(data): two_skeletons = [] for i in range(2): # Assuming there are always 2 skeletons keypoints = [] for point in frame[i]: keypoint = skel.Keypoint(point[0], point[1], point[2]) keypoints.append(keypoint) two_skeletons.append(skel.Skeleton(keypoints)) frames.append(two_skeletons) return frames def get_avg_keypoints(keypoints): x = [point.x for point in keypoints] y = [point.y for point in keypoints] confidence = [point.confidence for point in keypoints] avg_x = sum(x) / len(x) if len(x) > 0 else 0 avg_y = sum(y) / len(y) if len(y) > 0 else 0 avg_confidence = sum(confidence) / len(confidence) if len(confidence) > 0 else 0 return skel.Keypoint(avg_x, avg_y, avg_confidence) def get_avg_keypoints_in_frames(frames): avg_keypoints_in_frames = [] for frame in frames: avg_keypoints = [] for skeleton in frame: avg_keypoints.append(get_avg_keypoints(skeleton.keypoints)) avg_keypoints_in_frames.append(avg_keypoints) return avg_keypoints_in_frames def process_avg_keypoints_row(row, output_dir): json_file = './fixed/' + row['ClipName'] + '.json' print(f"Processing {json_file}") frames = get_frames_from_fixed_json(json_file) avg_keypoints = get_avg_keypoints_in_frames(frames) with open(os.path.join(output_dir, row['ClipName'] + '.json'), 'w') as file: json.dump(avg_keypoints, file, indent=4, cls=skel.KeypointEncoder) def process_batch_avg_keypoints_row(batch, output_dir): print(f"Processing batch of {len(batch)} rows.") for _, row in batch.iterrows(): process_avg_keypoints_row(row, output_dir) def process_descriptor_save_avg_keypoints(descriptor: pd.DataFrame): num_threads = 64 batch_size = max(1, len(descriptor) // num_threads) output_dir = './avg_keypoints' if not os.path.exists(output_dir): os.makedirs(output_dir) batches = [descriptor.iloc[i:i + batch_size] for i in range(0, len(descriptor), batch_size)] with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor: executor.map(lambda batch: process_batch_avg_keypoints_row(batch, output_dir), batches) def download_video(youtube_url, video_file): if os.path.exists(video_file): return command = [ 'yt-dlp', '-f', 'best[height<=360]', '-o', video_file, youtube_url ] subprocess.run(command, check=True) def extract_frames(video_file, start_frame, end_frame, output_folder): if not os.path.exists(output_folder): os.makedirs(output_folder) if not os.path.exists(video_file): return if len(os.listdir(output_folder)) == end_frame - start_frame: return command = [ 'ffmpeg', '-i', video_file, '-vf', f"select='between(n\\,{start_frame}\\,{end_frame - 1})'", '-vsync', 'vfr', '-frame_pts', 'true', os.path.join(output_folder, '%08d.png') ] subprocess.run(command, check=True) def process_video_frames(row, video_path, video_frame_path): video_file = os.path.join(video_path, f"{row['video_id']}.mp4") start_frame = int(row['Start_frame']) end_frame = int(row['End_frame']) clip_name = row['ClipName'] output_folder = os.path.join(video_frame_path, clip_name) # if not os.path.exists(video_file): # download_video(row['URL'], video_file) extract_frames(video_file, start_frame, end_frame, output_folder) # remove the leading zeros from the frame names for filename in os.listdir(output_folder): os.rename(os.path.join(output_folder, filename), os.path.join(output_folder, filename.lstrip('0'))) def process_video_frames_multi_threaded(descriptor: pd.DataFrame, video_path, video_frame_path): with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor: futures = [ executor.submit(process_video_frames, row, video_path, video_frame_path) for _, row in descriptor.iterrows() ] for future in concurrent.futures.as_completed(futures): try: future.result() except Exception as e: print(f"Error processing row: {e}") def cal_lerp_avg_keypoints(keypoints: list[skel.Keypoint]): # in these keypoints, the confidence is 0.0 if the keypoint is not detected # lerp linearly from last valid keypoint to next valid keypoint, fill in the missing keypoint(s) # Find the first valid keypoint first_valid_idx = next((i for i, kp in enumerate(keypoints) if kp.confidence > 0.0), None) if first_valid_idx is None: return keypoints # No valid keypoints found # Find the last valid keypoint last_valid_idx = next((i for i, kp in reversed(list(enumerate(keypoints))) if kp.confidence > 0.0), None) # Copy the first valid keypoint's values to all preceding invalid keypoints for i in range(first_valid_idx): keypoints[i].x = keypoints[first_valid_idx].x keypoints[i].y = keypoints[first_valid_idx].y keypoints[i].confidence = keypoints[first_valid_idx].confidence # Copy the last valid keypoint's values to all succeeding invalid keypoints for i in range(last_valid_idx + 1, len(keypoints)): keypoints[i].x = keypoints[last_valid_idx].x keypoints[i].y = keypoints[last_valid_idx].y keypoints[i].confidence = keypoints[last_valid_idx].confidence # Interpolate between valid keypoints last_valid_idx = first_valid_idx for i in range(first_valid_idx + 1, len(keypoints)): if keypoints[i].confidence > 0.0: next_valid_idx = i # Linearly interpolate between last_valid_idx and next_valid_idx for j in range(last_valid_idx + 1, next_valid_idx): t = (j - last_valid_idx) / (next_valid_idx - last_valid_idx) keypoints[j].x = keypoints[last_valid_idx].x * (1 - t) + keypoints[next_valid_idx].x * t keypoints[j].y = keypoints[last_valid_idx].y * (1 - t) + keypoints[next_valid_idx].y * t keypoints[j].confidence = keypoints[last_valid_idx].confidence * (1 - t) + keypoints[next_valid_idx].confidence * t last_valid_idx = next_valid_idx return keypoints def process_avg_keypoints_folder(avg_keypoints_folder, output_folder): os.makedirs(output_folder, exist_ok=True) for file in os.listdir(avg_keypoints_folder): json_path = os.path.join(avg_keypoints_folder, file) with open(json_path, 'r') as f: data = json.load(f) skeleton1_keypoints = [] skeleton2_keypoints = [] for frame in data: skeleton1_keypoints.append(skel.Keypoint(frame[0]['x'], frame[0]['y'], frame[0]['confidence'])) skeleton2_keypoints.append(skel.Keypoint(frame[1]['x'], frame[1]['y'], frame[1]['confidence'])) lerped_keypoints1 = cal_lerp_avg_keypoints(skeleton1_keypoints) lerped_keypoints2 = cal_lerp_avg_keypoints(skeleton2_keypoints) lerped = [] for i in range(len(lerped_keypoints1)): lerped.append([lerped_keypoints1[i], lerped_keypoints2[i]]) with open(os.path.join(output_folder, file), 'w') as f: json.dump(lerped, f, cls=skel.KeypointEncoder, indent=4) def main(): descriptor = pd.read_csv('./ClipDescriptorKaggle_processed.csv') os.makedirs('./avg_keypoints', exist_ok=True) video_path = './video' video_frame_path = './video_frames' add_neck_to_og_coco('./new_yolo_keypoints') # print("Done processing all rows.") # canvas = np.zeros((360, 640, 3), dtype=np.uint8) # canvas = skel.draw_bodypose(canvas, frames[0][0].keypoints, skel.body_25_limbSeq, skel.body_25_colors) #save the image # cv2.imwrite('test.png', canvas) if __name__ == '__main__': main()