Image_Gen_Server/process_json_file.py

import os
import json
import subprocess
import sys
import cv2
import numpy as np
import pandas as pd
from typing import List
import skeleton_lib as skel
import concurrent.futures
sys.path.append('./')

def json_to_keypoints_openpose(json_file: str) -> List[skel.Keypoint]:
    with open(json_file, 'r') as file:
        data = json.load(file)
        keypoints = data[0]['people'][0]['pose_keypoints_2d']
        keypoints = [skel.Keypoint(keypoints[i], keypoints[i + 1]) for i in range(0, len(keypoints), 3)]
    return keypoints

def array_json_to_Skeleton_Seqences(json_file: str) -> List[skel.Skeleton_Seqence]:
    with open(json_file, 'r') as file:
        data = json.load(file)
    
    skeleton_sequences = []
    for frame in data:
        for i in range(len(frame)):
            while len(skeleton_sequences) <= i:
                skeleton_sequences.append(None)
                skeleton_sequences[i] = skel.Skeleton_Seqence([])
            skeleton = skel.Skeleton([skel.Keypoint(keypoint[0], keypoint[1], keypoint[2]) for keypoint in frame[i]])
            skeleton_sequences[i].add_frame(skeleton)
    return skeleton_sequences

def read_new_yolo_keypoints(json_file: str) -> List[List[skel.Skeleton]]:
    with open(json_file, 'r') as file:
        data = json.load(file)
    
    frames = []
    for frame in data:
        skeletons = []
        for skeleton in frame:
            keypoints = [skel.Keypoint(point['x'], point['y'], point['confidence']) for point in skeleton['keypoints']]
            skeletons.append(skel.Skeleton(keypoints))
        frames.append(skeletons)
    return frames

def add_neck_to_og_coco(folder: str):
    for file in os.listdir(folder):
        with open(os.path.join(folder, file), 'r') as f:
            data = json.load(f)
        print(f"Processing {file}")
        for frame in data:
            for skeleton in frame:
                _skeleton = skeleton['keypoints']
                # Add neck keypoint to the original COCO keypoints
                if len(_skeleton) > 6:
                    neck = {'x': (_skeleton[6]['x'] + _skeleton[7]['x']) / 2, 'y': (_skeleton[6]['y'] + _skeleton[7]['y']) / 2, 'confidence': (_skeleton[6]['confidence'] + _skeleton[7]['confidence']) / 2}
                    _skeleton.insert(6, neck)
        with open(os.path.join(folder, file), 'w') as f:
            json.dump(data, f, indent=4)

def Skeleton_Seqences_save_to_array_json(skeleton_sequences: List[skel.Skeleton_Seqence], json_file: str):
    # Ensure the directory exists
    os.makedirs(os.path.dirname(json_file), exist_ok=True)

    data = []

    for i in range(len(skeleton_sequences[0].skeletons_frame)):
        sliced = skel.get_time_slice_for_Skeleton_Seqences(skeleton_sequences, i)
        sequence_data = []
        for skeleton in sliced:
            keypoints_data = [[kp.x, kp.y, kp.confidence] for kp in skeleton.keypoints]
            sequence_data.append(keypoints_data)
        data.append(sequence_data)
    
    with open(json_file, 'w') as file:
        json.dump(data, file, indent=4)

def process_json_file(json_file, directory, output_directory):
    json_file = os.path.join(directory, json_file)
    # print(json_file)

    skeleton_sequences = array_json_to_Skeleton_Seqences(json_file)
    frame_count = max(len(skeleton_sequences[i].skeletons_frame) for i in range(len(skeleton_sequences)) if skeleton_sequences[i] is not None)
    sliced_list = [skel.get_time_slice_for_Skeleton_Seqences(skeleton_sequences, i) for i in range(frame_count)]

    for i in range(frame_count):
        last_sliced = sliced_list[i - 1] if i > 0 else None
        next_sliced = sliced_list[i + 1] if i < frame_count - 1 else None
        sliced = sliced_list[i]

        for j, skeleton in enumerate(sliced):
            last_keypoints = last_sliced[j].keypoints if last_sliced else None
            next_keypoints = next_sliced[j].keypoints if next_sliced else None
            keypoints = skeleton.keypoints
            keypoints = skel.fix_keypoints(keypoints, last_keypoints, next_keypoints)
            skeleton_sequences[j].get_frame(i).keypoints = keypoints

    Skeleton_Seqences_save_to_array_json(skeleton_sequences, output_directory + os.path.basename(json_file))

def process_json_files_chunk(json_files_chunk, directory, output_directory):
    for json_file in json_files_chunk:
        process_json_file(json_file, directory, output_directory)

def process_json_files_multi_threaded(json_files, directory, output_directory):
    json_files = [f for f in os.listdir(directory) if f.endswith('.json')]
    if not json_files:
        print("No JSON files found in the directory.")
        return

    json_files_chunks = np.array_split(json_files, 64)

    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = [executor.submit(process_json_files_chunk, chunk, directory) for chunk in json_files_chunks]
        for future in concurrent.futures.as_completed(futures):
            try:
                future.result()
            except Exception as e:
                print(f"Error processing file chunk: {e}")

def process_clip_descriptor(input_file_path, output_file_path):
    ClipDescriptorKaggle = pd.read_csv(input_file_path)
    
    # Add a new column to store the YouTube video ID by extracting it from the URL
    ClipDescriptorKaggle['video_id'] = ClipDescriptorKaggle['URL'].apply(lambda x: x.split('=')[1].split('&')[0])
    
    # Save the processed DataFrame to a new CSV file
    with open(output_file_path, 'w') as file:
        ClipDescriptorKaggle.to_csv(file, index=False)

def get_frames_from_fixed_json(json_file):
    frames = []
    with open(json_file, 'r') as file:
        data = json.load(file)
        for index, frame in enumerate(data):
            two_skeletons = []
            for i in range(2):  # Assuming there are always 2 skeletons
                keypoints = []
                for point in frame[i]:
                    keypoint = skel.Keypoint(point[0], point[1], point[2])
                    keypoints.append(keypoint)
                two_skeletons.append(skel.Skeleton(keypoints))
            frames.append(two_skeletons)

    return frames

def get_avg_keypoints(keypoints):
    x = [point.x for point in keypoints]
    y = [point.y for point in keypoints]
    confidence = [point.confidence for point in keypoints]
    avg_x = sum(x) / len(x) if len(x) > 0 else 0
    avg_y = sum(y) / len(y) if len(y) > 0 else 0
    avg_confidence = sum(confidence) / len(confidence) if len(confidence) > 0 else 0
    return skel.Keypoint(avg_x, avg_y, avg_confidence)

def get_avg_keypoints_in_frames(frames):
    avg_keypoints_in_frames = []
    for frame in frames:
        avg_keypoints = []
        for skeleton in frame:
            avg_keypoints.append(get_avg_keypoints(skeleton.keypoints))
        avg_keypoints_in_frames.append(avg_keypoints)
    return avg_keypoints_in_frames

def process_avg_keypoints_row(row, output_dir):
    json_file = './fixed/' + row['ClipName'] + '.json'
    print(f"Processing {json_file}")
    frames = get_frames_from_fixed_json(json_file)
    avg_keypoints = get_avg_keypoints_in_frames(frames)
    
    with open(os.path.join(output_dir, row['ClipName'] + '.json'), 'w') as file:
        json.dump(avg_keypoints, file, indent=4, cls=skel.KeypointEncoder)

def process_batch_avg_keypoints_row(batch, output_dir):
    print(f"Processing batch of {len(batch)} rows.")
    for _, row in batch.iterrows():
        process_avg_keypoints_row(row, output_dir)

def process_descriptor_save_avg_keypoints(descriptor: pd.DataFrame):
    num_threads = 64
    batch_size = max(1, len(descriptor) // num_threads)
    output_dir = './avg_keypoints'
    
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    batches = [descriptor.iloc[i:i + batch_size] for i in range(0, len(descriptor), batch_size)]
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
        executor.map(lambda batch: process_batch_avg_keypoints_row(batch, output_dir), batches)

def download_video(youtube_url, video_file):
    if os.path.exists(video_file):
        return
    
    command = [
        'yt-dlp', '-f', 'best[height<=360]', '-o', video_file, youtube_url
    ]
    subprocess.run(command, check=True)

def extract_frames(video_file, start_frame, end_frame, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    if not os.path.exists(video_file):
        return
    
    if len(os.listdir(output_folder)) == end_frame - start_frame:
        return
    
    command = [
        'ffmpeg', '-i', video_file, '-vf', f"select='between(n\\,{start_frame}\\,{end_frame - 1})'", 
        '-vsync', 'vfr', '-frame_pts', 'true', os.path.join(output_folder, '%08d.png')
    ]
    subprocess.run(command, check=True)

def process_video_frames(row, video_path, video_frame_path):
    video_file = os.path.join(video_path, f"{row['video_id']}.mp4")
    start_frame = int(row['Start_frame'])
    end_frame = int(row['End_frame'])
    clip_name = row['ClipName']
    output_folder = os.path.join(video_frame_path, clip_name)
    
    # if not os.path.exists(video_file):
    #     download_video(row['URL'], video_file)
    
    extract_frames(video_file, start_frame, end_frame, output_folder)
    # remove the leading zeros from the frame names
    for filename in os.listdir(output_folder):
        os.rename(os.path.join(output_folder, filename), os.path.join(output_folder, filename.lstrip('0')))

def process_video_frames_multi_threaded(descriptor: pd.DataFrame, video_path, video_frame_path):
    with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
        futures = [
            executor.submit(process_video_frames, row, video_path, video_frame_path)
            for _, row in descriptor.iterrows()
        ]
        for future in concurrent.futures.as_completed(futures):
            try:
                future.result()
            except Exception as e:
                print(f"Error processing row: {e}")


def cal_lerp_avg_keypoints(keypoints: list[skel.Keypoint]):
    # in these keypoints, the confidence is 0.0 if the keypoint is not detected
    # lerp linearly from last valid keypoint to next valid keypoint, fill in the missing keypoint(s)
    
    # Find the first valid keypoint
    first_valid_idx = next((i for i, kp in enumerate(keypoints) if kp.confidence > 0.0), None)
    if first_valid_idx is None:
        return keypoints  # No valid keypoints found

    # Find the last valid keypoint
    last_valid_idx = next((i for i, kp in reversed(list(enumerate(keypoints))) if kp.confidence > 0.0), None)

    # Copy the first valid keypoint's values to all preceding invalid keypoints
    for i in range(first_valid_idx):
        keypoints[i].x = keypoints[first_valid_idx].x
        keypoints[i].y = keypoints[first_valid_idx].y
        keypoints[i].confidence = keypoints[first_valid_idx].confidence

    # Copy the last valid keypoint's values to all succeeding invalid keypoints
    for i in range(last_valid_idx + 1, len(keypoints)):
        keypoints[i].x = keypoints[last_valid_idx].x
        keypoints[i].y = keypoints[last_valid_idx].y
        keypoints[i].confidence = keypoints[last_valid_idx].confidence

    # Interpolate between valid keypoints
    last_valid_idx = first_valid_idx
    for i in range(first_valid_idx + 1, len(keypoints)):
        if keypoints[i].confidence > 0.0:
            next_valid_idx = i
            # Linearly interpolate between last_valid_idx and next_valid_idx
            for j in range(last_valid_idx + 1, next_valid_idx):
                t = (j - last_valid_idx) / (next_valid_idx - last_valid_idx)
                keypoints[j].x = keypoints[last_valid_idx].x * (1 - t) + keypoints[next_valid_idx].x * t
                keypoints[j].y = keypoints[last_valid_idx].y * (1 - t) + keypoints[next_valid_idx].y * t
                keypoints[j].confidence = keypoints[last_valid_idx].confidence * (1 - t) + keypoints[next_valid_idx].confidence * t
            last_valid_idx = next_valid_idx

    return keypoints
    
def process_avg_keypoints_folder(avg_keypoints_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    
    for file in os.listdir(avg_keypoints_folder):
        json_path = os.path.join(avg_keypoints_folder, file)
        with open(json_path, 'r') as f:
            data = json.load(f)
        
        skeleton1_keypoints = []
        skeleton2_keypoints = []
        
        for frame in data:
            skeleton1_keypoints.append(skel.Keypoint(frame[0]['x'], frame[0]['y'], frame[0]['confidence']))
            skeleton2_keypoints.append(skel.Keypoint(frame[1]['x'], frame[1]['y'], frame[1]['confidence']))
        
        lerped_keypoints1 = cal_lerp_avg_keypoints(skeleton1_keypoints)
        lerped_keypoints2 = cal_lerp_avg_keypoints(skeleton2_keypoints)
        
        lerped = []
        
        for i in range(len(lerped_keypoints1)):
            lerped.append([lerped_keypoints1[i], lerped_keypoints2[i]])
            
        with open(os.path.join(output_folder, file), 'w') as f:
            json.dump(lerped, f, cls=skel.KeypointEncoder, indent=4)
    

def main():
    descriptor = pd.read_csv('./ClipDescriptorKaggle_processed.csv')
    os.makedirs('./avg_keypoints', exist_ok=True)
    
    video_path = './video'
    video_frame_path = './video_frames'
    
    add_neck_to_og_coco('./new_yolo_keypoints')


    # print("Done processing all rows.")
    # canvas = np.zeros((360, 640, 3), dtype=np.uint8)
    # canvas = skel.draw_bodypose(canvas, frames[0][0].keypoints, skel.body_25_limbSeq, skel.body_25_colors)
    
    #save the image
    # cv2.imwrite('test.png', canvas)
    
if __name__ == '__main__':
    main()
update 2024-10-03 09:37:37 +00:00			`import os`
			`import json`
yolo branch 2024-10-18 07:06:09 +00:00			`import subprocess`
update 2024-10-03 09:37:37 +00:00			`import sys`
fix upload 2024-10-16 03:01:19 +00:00			`import cv2`
update 2024-10-03 09:37:37 +00:00			`import numpy as np`
fix upload 2024-10-16 03:01:19 +00:00			`import pandas as pd`
update 2024-10-03 09:37:37 +00:00			`from typing import List`
			`import skeleton_lib as skel`
			`import concurrent.futures`
			`sys.path.append('./')`

			`def json_to_keypoints_openpose(json_file: str) -> List[skel.Keypoint]:`
			`with open(json_file, 'r') as file:`
			`data = json.load(file)`
			`keypoints = data[0]['people'][0]['pose_keypoints_2d']`
			`keypoints = [skel.Keypoint(keypoints[i], keypoints[i + 1]) for i in range(0, len(keypoints), 3)]`
			`return keypoints`

			`def array_json_to_Skeleton_Seqences(json_file: str) -> List[skel.Skeleton_Seqence]:`
			`with open(json_file, 'r') as file:`
			`data = json.load(file)`

			`skeleton_sequences = []`
			`for frame in data:`
			`for i in range(len(frame)):`
			`while len(skeleton_sequences) <= i:`
			`skeleton_sequences.append(None)`
			`skeleton_sequences[i] = skel.Skeleton_Seqence([])`
			`skeleton = skel.Skeleton([skel.Keypoint(keypoint[0], keypoint[1], keypoint[2]) for keypoint in frame[i]])`
			`skeleton_sequences[i].add_frame(skeleton)`
			`return skeleton_sequences`

gened yolo dataset 2024-10-21 04:38:29 +00:00			`def read_new_yolo_keypoints(json_file: str) -> List[List[skel.Skeleton]]:`
			`with open(json_file, 'r') as file:`
			`data = json.load(file)`

			`frames = []`
			`for frame in data:`
			`skeletons = []`
			`for skeleton in frame:`
			`keypoints = [skel.Keypoint(point['x'], point['y'], point['confidence']) for point in skeleton['keypoints']]`
			`skeletons.append(skel.Skeleton(keypoints))`
			`frames.append(skeletons)`
			`return frames`

			`def add_neck_to_og_coco(folder: str):`
			`for file in os.listdir(folder):`
			`with open(os.path.join(folder, file), 'r') as f:`
			`data = json.load(f)`
			`print(f"Processing {file}")`
			`for frame in data:`
			`for skeleton in frame:`
			`_skeleton = skeleton['keypoints']`
			`# Add neck keypoint to the original COCO keypoints`
			`if len(_skeleton) > 6:`
			`neck = {'x': (_skeleton[6]['x'] + _skeleton[7]['x']) / 2, 'y': (_skeleton[6]['y'] + _skeleton[7]['y']) / 2, 'confidence': (_skeleton[6]['confidence'] + _skeleton[7]['confidence']) / 2}`
			`_skeleton.insert(6, neck)`
			`with open(os.path.join(folder, file), 'w') as f:`
			`json.dump(data, f, indent=4)`

update 2024-10-03 09:37:37 +00:00			`def Skeleton_Seqences_save_to_array_json(skeleton_sequences: List[skel.Skeleton_Seqence], json_file: str):`
			`# Ensure the directory exists`
			`os.makedirs(os.path.dirname(json_file), exist_ok=True)`

			`data = []`

			`for i in range(len(skeleton_sequences[0].skeletons_frame)):`
			`sliced = skel.get_time_slice_for_Skeleton_Seqences(skeleton_sequences, i)`
			`sequence_data = []`
			`for skeleton in sliced:`
			`keypoints_data = [[kp.x, kp.y, kp.confidence] for kp in skeleton.keypoints]`
			`sequence_data.append(keypoints_data)`
			`data.append(sequence_data)`

			`with open(json_file, 'w') as file:`
			`json.dump(data, file, indent=4)`

fix upload 2024-10-16 03:01:19 +00:00			`def process_json_file(json_file, directory, output_directory):`
update 2024-10-03 09:37:37 +00:00			`json_file = os.path.join(directory, json_file)`
			`# print(json_file)`

			`skeleton_sequences = array_json_to_Skeleton_Seqences(json_file)`
			`frame_count = max(len(skeleton_sequences[i].skeletons_frame) for i in range(len(skeleton_sequences)) if skeleton_sequences[i] is not None)`
			`sliced_list = [skel.get_time_slice_for_Skeleton_Seqences(skeleton_sequences, i) for i in range(frame_count)]`

			`for i in range(frame_count):`
			`last_sliced = sliced_list[i - 1] if i > 0 else None`
			`next_sliced = sliced_list[i + 1] if i < frame_count - 1 else None`
			`sliced = sliced_list[i]`

			`for j, skeleton in enumerate(sliced):`
			`last_keypoints = last_sliced[j].keypoints if last_sliced else None`
			`next_keypoints = next_sliced[j].keypoints if next_sliced else None`
			`keypoints = skeleton.keypoints`
			`keypoints = skel.fix_keypoints(keypoints, last_keypoints, next_keypoints)`
			`skeleton_sequences[j].get_frame(i).keypoints = keypoints`

fix upload 2024-10-16 03:01:19 +00:00			`Skeleton_Seqences_save_to_array_json(skeleton_sequences, output_directory + os.path.basename(json_file))`
update 2024-10-03 09:37:37 +00:00
fix upload 2024-10-16 03:01:19 +00:00			`def process_json_files_chunk(json_files_chunk, directory, output_directory):`
update 2024-10-03 09:37:37 +00:00			`for json_file in json_files_chunk:`
fix upload 2024-10-16 03:01:19 +00:00			`process_json_file(json_file, directory, output_directory)`
update 2024-10-03 09:37:37 +00:00
fix upload 2024-10-16 03:01:19 +00:00			`def process_json_files_multi_threaded(json_files, directory, output_directory):`
update 2024-10-03 09:37:37 +00:00			`json_files = [f for f in os.listdir(directory) if f.endswith('.json')]`
			`if not json_files:`
			`print("No JSON files found in the directory.")`
			`return`

fix upload 2024-10-16 03:01:19 +00:00			`json_files_chunks = np.array_split(json_files, 64)`
update 2024-10-03 09:37:37 +00:00
			`with concurrent.futures.ThreadPoolExecutor() as executor:`
			`futures = [executor.submit(process_json_files_chunk, chunk, directory) for chunk in json_files_chunks]`
			`for future in concurrent.futures.as_completed(futures):`
			`try:`
			`future.result()`
			`except Exception as e:`
fix upload 2024-10-16 03:01:19 +00:00			`print(f"Error processing file chunk: {e}")`

			`def process_clip_descriptor(input_file_path, output_file_path):`
			`ClipDescriptorKaggle = pd.read_csv(input_file_path)`

			`# Add a new column to store the YouTube video ID by extracting it from the URL`
			`ClipDescriptorKaggle['video_id'] = ClipDescriptorKaggle['URL'].apply(lambda x: x.split('=')[1].split('&')[0])`

			`# Save the processed DataFrame to a new CSV file`
			`with open(output_file_path, 'w') as file:`
			`ClipDescriptorKaggle.to_csv(file, index=False)`

			`def get_frames_from_fixed_json(json_file):`
			`frames = []`
			`with open(json_file, 'r') as file:`
			`data = json.load(file)`
yolo branch 2024-10-18 07:06:09 +00:00			`for index, frame in enumerate(data):`
			`two_skeletons = []`
fix upload 2024-10-16 03:01:19 +00:00			`for i in range(2): # Assuming there are always 2 skeletons`
			`keypoints = []`
			`for point in frame[i]:`
			`keypoint = skel.Keypoint(point[0], point[1], point[2])`
			`keypoints.append(keypoint)`
yolo branch 2024-10-18 07:06:09 +00:00			`two_skeletons.append(skel.Skeleton(keypoints))`
			`frames.append(two_skeletons)`
fix upload 2024-10-16 03:01:19 +00:00
			`return frames`

yolo branch 2024-10-18 07:06:09 +00:00			`def get_avg_keypoints(keypoints):`
			`x = [point.x for point in keypoints]`
			`y = [point.y for point in keypoints]`
			`confidence = [point.confidence for point in keypoints]`
			`avg_x = sum(x) / len(x) if len(x) > 0 else 0`
			`avg_y = sum(y) / len(y) if len(y) > 0 else 0`
			`avg_confidence = sum(confidence) / len(confidence) if len(confidence) > 0 else 0`
			`return skel.Keypoint(avg_x, avg_y, avg_confidence)`

			`def get_avg_keypoints_in_frames(frames):`
			`avg_keypoints_in_frames = []`
			`for frame in frames:`
			`avg_keypoints = []`
			`for skeleton in frame:`
			`avg_keypoints.append(get_avg_keypoints(skeleton.keypoints))`
			`avg_keypoints_in_frames.append(avg_keypoints)`
			`return avg_keypoints_in_frames`

			`def process_avg_keypoints_row(row, output_dir):`
			`json_file = './fixed/' + row['ClipName'] + '.json'`
			`print(f"Processing {json_file}")`
			`frames = get_frames_from_fixed_json(json_file)`
			`avg_keypoints = get_avg_keypoints_in_frames(frames)`

			`with open(os.path.join(output_dir, row['ClipName'] + '.json'), 'w') as file:`
			`json.dump(avg_keypoints, file, indent=4, cls=skel.KeypointEncoder)`

			`def process_batch_avg_keypoints_row(batch, output_dir):`
			`print(f"Processing batch of {len(batch)} rows.")`
			`for _, row in batch.iterrows():`
			`process_avg_keypoints_row(row, output_dir)`
fix upload 2024-10-16 03:01:19 +00:00
yolo branch 2024-10-18 07:06:09 +00:00			`def process_descriptor_save_avg_keypoints(descriptor: pd.DataFrame):`
			`num_threads = 64`
			`batch_size = max(1, len(descriptor) // num_threads)`
			`output_dir = './avg_keypoints'`

			`if not os.path.exists(output_dir):`
			`os.makedirs(output_dir)`

			`batches = [descriptor.iloc[i:i + batch_size] for i in range(0, len(descriptor), batch_size)]`

			`with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:`
			`executor.map(lambda batch: process_batch_avg_keypoints_row(batch, output_dir), batches)`

			`def download_video(youtube_url, video_file):`
			`if os.path.exists(video_file):`
			`return`

			`command = [`
			`'yt-dlp', '-f', 'best[height<=360]', '-o', video_file, youtube_url`
			`]`
			`subprocess.run(command, check=True)`

			`def extract_frames(video_file, start_frame, end_frame, output_folder):`
			`if not os.path.exists(output_folder):`
			`os.makedirs(output_folder)`

			`if not os.path.exists(video_file):`
			`return`

			`if len(os.listdir(output_folder)) == end_frame - start_frame:`
			`return`

			`command = [`
			`'ffmpeg', '-i', video_file, '-vf', f"select='between(n\\,{start_frame}\\,{end_frame - 1})'",`
			`'-vsync', 'vfr', '-frame_pts', 'true', os.path.join(output_folder, '%08d.png')`
			`]`
			`subprocess.run(command, check=True)`

			`def process_video_frames(row, video_path, video_frame_path):`
			`video_file = os.path.join(video_path, f"{row['video_id']}.mp4")`
			`start_frame = int(row['Start_frame'])`
			`end_frame = int(row['End_frame'])`
			`clip_name = row['ClipName']`
			`output_folder = os.path.join(video_frame_path, clip_name)`

			`# if not os.path.exists(video_file):`
			`# download_video(row['URL'], video_file)`

			`extract_frames(video_file, start_frame, end_frame, output_folder)`
			`# remove the leading zeros from the frame names`
			`for filename in os.listdir(output_folder):`
			`os.rename(os.path.join(output_folder, filename), os.path.join(output_folder, filename.lstrip('0')))`

			`def process_video_frames_multi_threaded(descriptor: pd.DataFrame, video_path, video_frame_path):`
			`with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:`
			`futures = [`
			`executor.submit(process_video_frames, row, video_path, video_frame_path)`
			`for _, row in descriptor.iterrows()`
			`]`
			`for future in concurrent.futures.as_completed(futures):`
			`try:`
			`future.result()`
			`except Exception as e:`
			`print(f"Error processing row: {e}")`


			`def cal_lerp_avg_keypoints(keypoints: list[skel.Keypoint]):`
			`# in these keypoints, the confidence is 0.0 if the keypoint is not detected`
			`# lerp linearly from last valid keypoint to next valid keypoint, fill in the missing keypoint(s)`

			`# Find the first valid keypoint`
			`first_valid_idx = next((i for i, kp in enumerate(keypoints) if kp.confidence > 0.0), None)`
			`if first_valid_idx is None:`
			`return keypoints # No valid keypoints found`

			`# Find the last valid keypoint`
			`last_valid_idx = next((i for i, kp in reversed(list(enumerate(keypoints))) if kp.confidence > 0.0), None)`

			`# Copy the first valid keypoint's values to all preceding invalid keypoints`
			`for i in range(first_valid_idx):`
			`keypoints[i].x = keypoints[first_valid_idx].x`
			`keypoints[i].y = keypoints[first_valid_idx].y`
			`keypoints[i].confidence = keypoints[first_valid_idx].confidence`

			`# Copy the last valid keypoint's values to all succeeding invalid keypoints`
			`for i in range(last_valid_idx + 1, len(keypoints)):`
			`keypoints[i].x = keypoints[last_valid_idx].x`
			`keypoints[i].y = keypoints[last_valid_idx].y`
			`keypoints[i].confidence = keypoints[last_valid_idx].confidence`

			`# Interpolate between valid keypoints`
			`last_valid_idx = first_valid_idx`
			`for i in range(first_valid_idx + 1, len(keypoints)):`
			`if keypoints[i].confidence > 0.0:`
			`next_valid_idx = i`
			`# Linearly interpolate between last_valid_idx and next_valid_idx`
			`for j in range(last_valid_idx + 1, next_valid_idx):`
			`t = (j - last_valid_idx) / (next_valid_idx - last_valid_idx)`
			`keypoints[j].x = keypoints[last_valid_idx].x * (1 - t) + keypoints[next_valid_idx].x * t`
			`keypoints[j].y = keypoints[last_valid_idx].y * (1 - t) + keypoints[next_valid_idx].y * t`
			`keypoints[j].confidence = keypoints[last_valid_idx].confidence * (1 - t) + keypoints[next_valid_idx].confidence * t`
			`last_valid_idx = next_valid_idx`

			`return keypoints`

			`def process_avg_keypoints_folder(avg_keypoints_folder, output_folder):`
			`os.makedirs(output_folder, exist_ok=True)`

			`for file in os.listdir(avg_keypoints_folder):`
			`json_path = os.path.join(avg_keypoints_folder, file)`
			`with open(json_path, 'r') as f:`
			`data = json.load(f)`

			`skeleton1_keypoints = []`
			`skeleton2_keypoints = []`

			`for frame in data:`
			`skeleton1_keypoints.append(skel.Keypoint(frame[0]['x'], frame[0]['y'], frame[0]['confidence']))`
			`skeleton2_keypoints.append(skel.Keypoint(frame[1]['x'], frame[1]['y'], frame[1]['confidence']))`

			`lerped_keypoints1 = cal_lerp_avg_keypoints(skeleton1_keypoints)`
			`lerped_keypoints2 = cal_lerp_avg_keypoints(skeleton2_keypoints)`

			`lerped = []`

			`for i in range(len(lerped_keypoints1)):`
			`lerped.append([lerped_keypoints1[i], lerped_keypoints2[i]])`

			`with open(os.path.join(output_folder, file), 'w') as f:`
			`json.dump(lerped, f, cls=skel.KeypointEncoder, indent=4)`


			`def main():`
fix upload 2024-10-16 03:01:19 +00:00			`descriptor = pd.read_csv('./ClipDescriptorKaggle_processed.csv')`
yolo branch 2024-10-18 07:06:09 +00:00			`os.makedirs('./avg_keypoints', exist_ok=True)`

			`video_path = './video'`
			`video_frame_path = './video_frames'`
fix upload 2024-10-16 03:01:19 +00:00
gened yolo dataset 2024-10-21 04:38:29 +00:00			`add_neck_to_og_coco('./new_yolo_keypoints')`
yolo branch 2024-10-18 07:06:09 +00:00

fix upload 2024-10-16 03:01:19 +00:00
gened yolo dataset 2024-10-21 04:38:29 +00:00			`# print("Done processing all rows.")`
yolo branch 2024-10-18 07:06:09 +00:00			`# canvas = np.zeros((360, 640, 3), dtype=np.uint8)`
			`# canvas = skel.draw_bodypose(canvas, frames[0][0].keypoints, skel.body_25_limbSeq, skel.body_25_colors)`
fix upload 2024-10-16 03:01:19 +00:00
			`#save the image`
yolo branch 2024-10-18 07:06:09 +00:00			`# cv2.imwrite('test.png', canvas)`
fix upload 2024-10-16 03:01:19 +00:00
			`if __name__ == '__main__':`
			`main()`