yolo branch

2024-10-18 15:06:09 +08:00 · 2024-10-18 15:06:09 +08:00 · de2b354196
parent b30c015ea9
commit de2b354196
19 changed files with 18020 additions and 9387 deletions
--- a/.gitignore
+++ b/.gitignore
@ -165,4 +165,10 @@ FencersKeyPoints/*

 # output folder
 output/*
-fixed/*
+fixed/*
+FencersKeyPoints/*
+lerped_keypoints/*
+avg_keypoints/*
+video_frames/*
+video/*
+new_yolo_keypoints/*
--- a/ClipDescriptorKaggle_processed.csv
+++ b/ClipDescriptorKaggle_processed.csv
--- a/DeWatermark.ai_1728030918362.png
+++ b/DeWatermark.ai_1728030918362.png
--- a/app.py
+++ b/app.py
@ -55,21 +55,21 @@ def gen_group_pic():


 def gen_fencer_prompt(openpose_image_path, pid, comfyUI_address):
-    with open("fencerAPI.json", "r") as f:
+    with open("./prompts/fencerAPI.json", "r") as f:
        prompt_json = f.read()
        prompt = json.loads(prompt_json)

        openpose_image_name = opg.upload_image_circular_queue(openpose_image_path, 20, pid, comfyUI_address)
-        opg.upload_image("ref_black.png", "ref_black.png")
+        opg.upload_image("./images/ref_black.png", "ref_black.png")

        prompt["3"]["inputs"]["seed"] = random.randint(0, 10000000000)
-        prompt["29"]["inputs"]['image'] = "ref_black.png"
+        prompt["29"]["inputs"]['image'] = "./images/ref_black.png"
        prompt["17"]["inputs"]['image'] = openpose_image_name

        opg.queue_prompt(prompt, comfyUI_address)

 def gen_group_pic_prompt(openpose_image_path, base_image, pid, comfyUI_address):
-    with open("group_pic.json", "r") as f:
+    with open("./prompts/group_pic.json", "r") as f:
        prompt_json = f.read()
        prompt = json.loads(prompt_json)

--- a/body_pose_output_multi0000.png
+++ b/body_pose_output_multi0000.png
--- a/images/ref_black.png
+++ b/images/ref_black.png
--- a/jumping_05.png
+++ b/jumping_05.png
--- a/openpose_gen.py
+++ b/openpose_gen.py
@ -111,8 +111,11 @@ def main():
        print("No JSON files found in the directory.")
        return
    
-    json_file = os.path.join(directory, random.choice(json_files))
+    # json_file = os.path.join(directory, random.choice(json_files))
+    json_file = './fixed/0001_002_00_01_1.json'
    # json_file = './test_output.json'
+    # create ./output directory if it does not exist
+    os.makedirs('output', exist_ok=True)
    image_path = './output/test'
    print(json_file)

--- a/process_json_file.py
+++ b/process_json_file.py
@ -1,5 +1,6 @@
 import os
 import json
+import subprocess
 import sys
 import cv2
 import numpy as np
@ -103,30 +104,199 @@ def get_frames_from_fixed_json(json_file):
    frames = []
    with open(json_file, 'r') as file:
        data = json.load(file)
-        for frame in data:
-            skeletons = []
+        for index, frame in enumerate(data):
+            two_skeletons = []
            for i in range(2):  # Assuming there are always 2 skeletons
                keypoints = []
                for point in frame[i]:
                    keypoint = skel.Keypoint(point[0], point[1], point[2])
                    keypoints.append(keypoint)
-                skeletons.append(skel.Skeleton(keypoints))
-            frames.append(skeletons)
+                two_skeletons.append(skel.Skeleton(keypoints))
+            frames.append(two_skeletons)

    return frames

-def main():
+def get_avg_keypoints(keypoints):
+    x = [point.x for point in keypoints]
+    y = [point.y for point in keypoints]
+    confidence = [point.confidence for point in keypoints]
+    avg_x = sum(x) / len(x) if len(x) > 0 else 0
+    avg_y = sum(y) / len(y) if len(y) > 0 else 0
+    avg_confidence = sum(confidence) / len(confidence) if len(confidence) > 0 else 0
+    return skel.Keypoint(avg_x, avg_y, avg_confidence)

+def get_avg_keypoints_in_frames(frames):
+    avg_keypoints_in_frames = []
+    for frame in frames:
+        avg_keypoints = []
+        for skeleton in frame:
+            avg_keypoints.append(get_avg_keypoints(skeleton.keypoints))
+        avg_keypoints_in_frames.append(avg_keypoints)
+    return avg_keypoints_in_frames
+
+def process_avg_keypoints_row(row, output_dir):
+    json_file = './fixed/' + row['ClipName'] + '.json'
+    print(f"Processing {json_file}")
+    frames = get_frames_from_fixed_json(json_file)
+    avg_keypoints = get_avg_keypoints_in_frames(frames)
+    
+    with open(os.path.join(output_dir, row['ClipName'] + '.json'), 'w') as file:
+        json.dump(avg_keypoints, file, indent=4, cls=skel.KeypointEncoder)
+
+def process_batch_avg_keypoints_row(batch, output_dir):
+    print(f"Processing batch of {len(batch)} rows.")
+    for _, row in batch.iterrows():
+        process_avg_keypoints_row(row, output_dir)
+
+def process_descriptor_save_avg_keypoints(descriptor: pd.DataFrame):
+    num_threads = 64
+    batch_size = max(1, len(descriptor) // num_threads)
+    output_dir = './avg_keypoints'
+    
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    
+    batches = [descriptor.iloc[i:i + batch_size] for i in range(0, len(descriptor), batch_size)]
+    
+    with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
+        executor.map(lambda batch: process_batch_avg_keypoints_row(batch, output_dir), batches)
+
+def download_video(youtube_url, video_file):
+    if os.path.exists(video_file):
+        return
+    
+    command = [
+        'yt-dlp', '-f', 'best[height<=360]', '-o', video_file, youtube_url
+    ]
+    subprocess.run(command, check=True)
+
+def extract_frames(video_file, start_frame, end_frame, output_folder):
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+    
+    if not os.path.exists(video_file):
+        return
+    
+    if len(os.listdir(output_folder)) == end_frame - start_frame:
+        return
+    
+    command = [
+        'ffmpeg', '-i', video_file, '-vf', f"select='between(n\\,{start_frame}\\,{end_frame - 1})'", 
+        '-vsync', 'vfr', '-frame_pts', 'true', os.path.join(output_folder, '%08d.png')
+    ]
+    subprocess.run(command, check=True)
+
+def process_video_frames(row, video_path, video_frame_path):
+    video_file = os.path.join(video_path, f"{row['video_id']}.mp4")
+    start_frame = int(row['Start_frame'])
+    end_frame = int(row['End_frame'])
+    clip_name = row['ClipName']
+    output_folder = os.path.join(video_frame_path, clip_name)
+    
+    # if not os.path.exists(video_file):
+    #     download_video(row['URL'], video_file)
+    
+    extract_frames(video_file, start_frame, end_frame, output_folder)
+    # remove the leading zeros from the frame names
+    for filename in os.listdir(output_folder):
+        os.rename(os.path.join(output_folder, filename), os.path.join(output_folder, filename.lstrip('0')))
+
+def process_video_frames_multi_threaded(descriptor: pd.DataFrame, video_path, video_frame_path):
+    with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
+        futures = [
+            executor.submit(process_video_frames, row, video_path, video_frame_path)
+            for _, row in descriptor.iterrows()
+        ]
+        for future in concurrent.futures.as_completed(futures):
+            try:
+                future.result()
+            except Exception as e:
+                print(f"Error processing row: {e}")
+
+
+def cal_lerp_avg_keypoints(keypoints: list[skel.Keypoint]):
+    # in these keypoints, the confidence is 0.0 if the keypoint is not detected
+    # lerp linearly from last valid keypoint to next valid keypoint, fill in the missing keypoint(s)
+    
+    # Find the first valid keypoint
+    first_valid_idx = next((i for i, kp in enumerate(keypoints) if kp.confidence > 0.0), None)
+    if first_valid_idx is None:
+        return keypoints  # No valid keypoints found
+
+    # Find the last valid keypoint
+    last_valid_idx = next((i for i, kp in reversed(list(enumerate(keypoints))) if kp.confidence > 0.0), None)
+
+    # Copy the first valid keypoint's values to all preceding invalid keypoints
+    for i in range(first_valid_idx):
+        keypoints[i].x = keypoints[first_valid_idx].x
+        keypoints[i].y = keypoints[first_valid_idx].y
+        keypoints[i].confidence = keypoints[first_valid_idx].confidence
+
+    # Copy the last valid keypoint's values to all succeeding invalid keypoints
+    for i in range(last_valid_idx + 1, len(keypoints)):
+        keypoints[i].x = keypoints[last_valid_idx].x
+        keypoints[i].y = keypoints[last_valid_idx].y
+        keypoints[i].confidence = keypoints[last_valid_idx].confidence
+
+    # Interpolate between valid keypoints
+    last_valid_idx = first_valid_idx
+    for i in range(first_valid_idx + 1, len(keypoints)):
+        if keypoints[i].confidence > 0.0:
+            next_valid_idx = i
+            # Linearly interpolate between last_valid_idx and next_valid_idx
+            for j in range(last_valid_idx + 1, next_valid_idx):
+                t = (j - last_valid_idx) / (next_valid_idx - last_valid_idx)
+                keypoints[j].x = keypoints[last_valid_idx].x * (1 - t) + keypoints[next_valid_idx].x * t
+                keypoints[j].y = keypoints[last_valid_idx].y * (1 - t) + keypoints[next_valid_idx].y * t
+                keypoints[j].confidence = keypoints[last_valid_idx].confidence * (1 - t) + keypoints[next_valid_idx].confidence * t
+            last_valid_idx = next_valid_idx
+
+    return keypoints
+    
+def process_avg_keypoints_folder(avg_keypoints_folder, output_folder):
+    os.makedirs(output_folder, exist_ok=True)
+    
+    for file in os.listdir(avg_keypoints_folder):
+        json_path = os.path.join(avg_keypoints_folder, file)
+        with open(json_path, 'r') as f:
+            data = json.load(f)
+        
+        skeleton1_keypoints = []
+        skeleton2_keypoints = []
+        
+        for frame in data:
+            skeleton1_keypoints.append(skel.Keypoint(frame[0]['x'], frame[0]['y'], frame[0]['confidence']))
+            skeleton2_keypoints.append(skel.Keypoint(frame[1]['x'], frame[1]['y'], frame[1]['confidence']))
+        
+        lerped_keypoints1 = cal_lerp_avg_keypoints(skeleton1_keypoints)
+        lerped_keypoints2 = cal_lerp_avg_keypoints(skeleton2_keypoints)
+        
+        lerped = []
+        
+        for i in range(len(lerped_keypoints1)):
+            lerped.append([lerped_keypoints1[i], lerped_keypoints2[i]])
+            
+        with open(os.path.join(output_folder, file), 'w') as f:
+            json.dump(lerped, f, cls=skel.KeypointEncoder, indent=4)
+    
+
+def main():
    descriptor = pd.read_csv('./ClipDescriptorKaggle_processed.csv')
+    os.makedirs('./avg_keypoints', exist_ok=True)
    
-    frames = get_frames_from_fixed_json('./fixed/0050_001_08_08_1.json')
-    # print(frames[0][0].keypoints[0])
+    video_path = './video'
+    video_frame_path = './video_frames'
    
-    canvas = np.zeros((360, 640, 3), dtype=np.uint8)
-    canvas = skel.draw_bodypose(canvas, frames[0][0].keypoints, skel.body_25_limbSeq, skel.body_25_colors)
+
+
+
+    
+    print("Done processing all rows.")
+    # canvas = np.zeros((360, 640, 3), dtype=np.uint8)
+    # canvas = skel.draw_bodypose(canvas, frames[0][0].keypoints, skel.body_25_limbSeq, skel.body_25_colors)
    
    #save the image
-    cv2.imwrite('test.png', canvas)
+    # cv2.imwrite('test.png', canvas)
    
 if __name__ == '__main__':
    main()
--- a/prompts/fencerAPI.json
+++ b/prompts/fencerAPI.json
--- a/prompts/group_pic.json
+++ b/prompts/group_pic.json
--- a/ref.png
+++ b/ref.png
--- a/ref_tran.png
+++ b/ref_tran.png
--- a/skeleton_lib.py
+++ b/skeleton_lib.py
@ -1,3 +1,4 @@
+import json
 from typing import List
 import numpy as np
 import math
@ -62,9 +63,30 @@ class Keypoint:
        self.y = y
        self.confidence = confidence

+
    def __repr__(self):
        return f"Keypoint(x={self.x}, y={self.y}, confidence={self.confidence})"

+class KeypointEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, Keypoint):
+            return {'x': obj.x, 'y': obj.y, 'confidence': obj.confidence}
+        return super().default(obj)
+    
+class SkeletonEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, Skeleton):
+            return {'keypoints': obj.keypoints}
+        return super().default(obj)
+
+class Encoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, Skeleton):
+            return SkeletonEncoder().default(obj)
+        elif isinstance(obj, Keypoint):
+            return KeypointEncoder().default(obj)
+        return super().default(obj)
+
 class Skeleton:
    def __init__(self, keypoints: List[Keypoint]):
        self.keypoints = keypoints
--- a/test.json
+++ b/test.json
--- a/test.png
+++ b/test.png
--- a/yolo11n-pose.pt
+++ b/yolo11n-pose.pt
--- a/yolo11x-pose.pt
+++ b/yolo11x-pose.pt
--- a/yolo_main.py
+++ b/yolo_main.py
@ -0,0 +1,111 @@
+import json
+import concurrent
+import pandas as pd
+from ultralytics import YOLO
+import os
+import skeleton_lib as skel
+import torch
+
+def point_in_box(point, box):
+    x, y = point
+    x1, y1, x2, y2 = box
+    return x1 <= x <= x2 and y1 <= y <= y2
+
+def load_lerped_keypoints(lerped_keypoints_path):
+    with open(lerped_keypoints_path, 'r') as f:
+        return json.load(f)
+
+def get_valid_skeletons(data, data_i, boxes, keypoints):
+    valid_skeletons = [skel.Skeleton([]) for _ in range(2)]
+    for avg_i, avg in enumerate(data[data_i]):
+        for i, box in enumerate(boxes.xyxy.tolist()):
+            if point_in_box((avg['x'], avg['y']), box):
+                skeleton = skel.Skeleton([])
+                for j, keypoint in enumerate(keypoints.xy[i]):
+                    keypoint = keypoint.tolist() + [keypoints.conf[i][j].item()]
+                    skeleton.keypoints.append(skel.Keypoint(keypoint[0], keypoint[1], keypoint[2]))
+                valid_skeletons[avg_i] = skeleton
+                break
+    return valid_skeletons
+
+def get_yoloed_frames(results, lerped_keypoints_path):
+    frames = []
+    data = load_lerped_keypoints(lerped_keypoints_path)
+    for data_i, result in enumerate(results):
+        boxes = result.boxes  # Boxes object for bounding box outputs
+        keypoints = result.keypoints  # Keypoints object for pose outputs
+        frames.append(get_valid_skeletons(data, data_i, boxes, keypoints))
+    return frames
+
+def process_clip(row, model):
+    clip_name = row['ClipName']
+    input_video_path = f"video_frames/{clip_name}"
+    lerped_keypoints_path = f"./lerped_keypoints/{clip_name}.json"
+    output_keypoints_path = f"./new_yolo_keypoints/{clip_name}.json"
+
+    # Ensure the folders exist
+    os.makedirs(os.path.dirname(lerped_keypoints_path), exist_ok=True)
+    os.makedirs(os.path.dirname(output_keypoints_path), exist_ok=True)
+
+    # # return if the file already exists
+    # if os.path.exists(output_keypoints_path):
+    #     return
+
+    results = model(input_video_path)
+    frames = get_yoloed_frames(results, lerped_keypoints_path)
+
+    # Write to JSON file
+    with open(output_keypoints_path, 'w') as f:
+        json.dump(frames, f, cls=skel.Encoder, indent=4)
+
+def process_rows_on_gpu(rows, model, device):
+    for _, row in rows.iterrows():
+        for _ in range(5): 
+            try:
+                process_clip(row, model)
+            except Exception as e:
+                print(f"Error processing clip: {e}")
+                del model
+                model = YOLO("yolo11x-pose.pt").to(device)
+                continue
+            break
+            
+def gen_yolo_skeletons(descriptor):
+    num_gpus = torch.cuda.device_count()
+    
+    rows_per_gpu = len(descriptor) // num_gpus
+    
+    models = [YOLO("yolo11x-pose.pt").to(torch.device(f'cuda:{i}')) for i in range(num_gpus)]
+
+    with concurrent.futures.ThreadPoolExecutor(max_workers=num_gpus) as executor:
+        futures = []
+        for i in range(num_gpus):
+            start_idx = i * rows_per_gpu
+            end_idx = (i + 1) * rows_per_gpu if i != num_gpus - 1 else len(descriptor)
+            gpu_rows = descriptor.iloc[start_idx:end_idx]
+            futures.append(executor.submit(process_rows_on_gpu, gpu_rows, models[i], torch.device(f'cuda:{i}')))
+                    
+        for future in concurrent.futures.as_completed(futures):
+            try:
+                future.result()
+            except Exception as e:
+                print(f"Error processing rows on GPU: {e}")
+
+def gen_yolo_skeletons_single(descriptor):
+    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
+    model = YOLO("yolo11x-pose.pt").to(device)
+
+    process_rows_on_gpu(descriptor, model, device)
+
+def main():
+    model = YOLO("yolo11x-pose.pt")  # pretrained YOLO11n model
+    descriptor = pd.read_csv('./ClipDescriptorKaggle_processed.csv')
+    
+    avg_keypoints_folder = './avg_keypoints'
+    gen_yolo_skeletons(descriptor)
+    
+    # count number of files in the "./new_yolo_keypoints"
+    # print(f"Number of files in {"./new_yolo_keypoints"}: {len(os.listdir("./new_yolo_keypoints"))}")
+
+if __name__ == "__main__":
+    main()