Compare commits

..

14 Commits
main ... yolo

Author SHA1 Message Date
Kenny Cheng 624eba00b0 bump 2024-10-27 22:05:36 +08:00
Kenny Cheng 5268543b7a add keypoint prediction model 2024-10-26 02:07:17 +08:00
zaqxs123456 0b98303582 on_testing = False 2024-10-25 22:32:55 +08:00
zaqxs123456 4836fb3d6e more fix 2024-10-25 15:53:59 +08:00
zaqxs123456 56d930a854 fix edge cases 2024-10-25 11:43:28 +08:00
zaqxs123456 f5f947f6c2 expo_postprocess wait for sd finish 2024-10-25 11:32:59 +08:00
zaqxs123456 8a4ced5435 gited output for test 2024-10-25 11:18:25 +08:00
zaqxs123456 e52ba46512 add deviation 2024-10-25 10:03:23 +08:00
zaqxs123456 74ac2f0dba fix clear_images 2024-10-25 09:59:25 +08:00
yatman d5a2c6f2e2 Turn off on_testing in app.py 2024-10-24 18:13:57 +00:00
zaqxs123456 9282cbcc74 patch logic error from frontend, right fencer, TODO get_predicted_coordinates 2024-10-25 01:56:14 +08:00
zaqxs123456 fef7bbbc8e postprocess 2024-10-24 23:40:15 +08:00
zaqxs123456 8fb4f8753d gened yolo dataset 2024-10-21 12:38:29 +08:00
zaqxs123456 de2b354196 yolo branch 2024-10-18 15:06:09 +08:00
86 changed files with 19318 additions and 18786 deletions

1
.gitignore vendored
View File

@ -164,7 +164,6 @@ cython_debug/
FencersKeyPoints/* FencersKeyPoints/*
# output folder # output folder
output/*
fixed/* fixed/*
FencersKeyPoints/* FencersKeyPoints/*
lerped_keypoints/* lerped_keypoints/*

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Binary file not shown.

Before

Width:  |  Height:  |  Size: 180 KiB

BIN
Flow.pdf Normal file

Binary file not shown.

216
app.py
View File

@ -1,16 +1,205 @@
import asyncio
import base64 import base64
import hashlib import hashlib
import json import json
import random import random
import uuid import uuid
import cv2
from flask import Flask, request, jsonify from flask import Flask, request, jsonify
import sys import sys
import os
from PIL import Image
import io
import numpy as np
import websocket
import openpose_gen as opg import openpose_gen as opg
from comfy_socket import get_images
from postprocessing import expo_shuffle_image_steps, expo_add_to_background_image, expo_postprocess_main
import skeleton_lib as skel
import predict as pred
sys.path.append('./') sys.path.append('./')
app = Flask(__name__) app = Flask(__name__)
info = json.load(open('info.json'))
comfyui_address = info['comfyui_address']
expo_raw_sd_dir = info['expo_raw_sd_dir']
expo_openpose_dir = info['expo_openpose_dir']
on_postprocessing = False
on_testing = False
@app.route('/expo_fencing_pose', methods=['POST'])
def expo_fencing_pose():
if on_postprocessing:
return jsonify({"status": "error", "message": "Postprocessing in progress"}), 503
if request.is_json:
data = request.get_json()
coordinates = data['coordinates']
canvas_size = data['canvas_size']
batch = data['batch']
step = data['step']
if coordinates is None or canvas_size is None or 'batch' not in data or 'step' not in data:
return jsonify({"status": "error", "message": "Missing data"}), 422
right_fencer_coordinates = get_predicted_coordinates(coordinates, canvas_size[0], canvas_size[1])
left_fencer_dir = os.path.join(expo_openpose_dir, 'left_fencer')
os.makedirs(left_fencer_dir, exist_ok=True)
right_fencer_dir = os.path.join(expo_openpose_dir, 'right_fencer')
os.makedirs(right_fencer_dir, exist_ok=True)
left_openpose_image_path = opg.expo_save_bodypose(canvas_size[0], canvas_size[1], coordinates, batch, step, left_fencer_dir, skel.coco_limbSeq, skel.coco_colors)
right_openpose_image_path = opg.expo_save_bodypose(canvas_size[0], canvas_size[1], right_fencer_coordinates, batch, step, right_fencer_dir, skel.coco_limbSeq, skel.coco_colors)
left_fencer_raw_image_dir = os.path.join(expo_raw_sd_dir, 'left_fencer')
os.makedirs(left_fencer_raw_image_dir, exist_ok=True)
right_fencer_raw_image_dir = os.path.join(expo_raw_sd_dir, 'right_fencer')
os.makedirs(right_fencer_raw_image_dir, exist_ok=True)
expo_fencer_prompt(left_openpose_image_path, left_fencer_raw_image_dir, batch, step)
expo_fencer_prompt(right_openpose_image_path, right_fencer_raw_image_dir, batch, step)
return jsonify({"status": "success", "message": "Data received"}), 201
else:
return jsonify({"status": "error", "message": "Request must be JSON"}), 415
def get_predicted_coordinates(coordinates: list, width: int, height: int) -> list:
# TODO implement the model to predict the right fencer's coordinates
# coordinates = [x1, y1, c1, x2, y2, c2, ...],
# where x, y are the coordinates and c is the confidence score
# there should be 18 keypoints from 0 to 17
# they are not normalized, they are by the size of the width and height
# the the limbSeq and colors of points need to convert from and to skel.coco_limbSeq, skel.coco_colors
# those are in skeleton_lib.py
# when testing, can visualize with the method expo_save_bodypose in openpose_gen.py
#try:
# predicted = pred.predict_pose_keypoints(np.array(coordinates, dtype=np.float32).copy().reshape(1, 18, 3))
# # add confidence score
# predicted = np.concatenate((predicted, np.ones((1, 18, 1))), axis=2)
# print(predicted.flatten().tolist())
# print('done')
# return predicted.flatten().tolist()
#except Exception as e:
# print(e)
# pass
#predicted[:, :, 3] = 1
#return predicted.flatten().tolist()
# for now, just mirror the coordinates and add some random deviation
predicted_coordinates = mirror_coordinates(coordinates, width)
for i in range(0, len(predicted_coordinates), 3):
deviation = random.randint(-10, 10)
predicted_coordinates[i] += deviation
deviation = random.randint(-10, 10)
predicted_coordinates[i + 1] += deviation
return predicted_coordinates
def mirror_coordinates(coordinates: list, width: int) -> list:
mirrored = coordinates.copy()
for i in range(0, len(coordinates), 3):
mirrored[i] = width - coordinates[i]
return mirrored
def expo_fencer_prompt(openpose_image_path, save_dir, batch, step):
prompt = json.loads(open("./prompts/fencer_03.json", "r", encoding="utf-8").read())
openpose_image_name = opg.upload_image(openpose_image_path)
opg.upload_image("./images/ref_black.png", "ref_black.png")
print(openpose_image_name)
prompt["3"]["inputs"]["seed"] = random.randint(0, 10000000000)
prompt["29"]["inputs"]['image'] = "ref_black.png"
prompt["17"]["inputs"]['image'] = openpose_image_name
client_id = hashlib.sha256(str(random.getrandbits(256)).encode('utf-8')).hexdigest()
ws = websocket.WebSocket()
ws.connect("ws://{}/ws?clientId={}".format(comfyui_address, client_id))
images = get_images(ws, prompt, client_id)
for node_id in images:
for idx, image_data in enumerate(images[node_id]):
image = Image.open(io.BytesIO(image_data))
image_path = os.path.join(save_dir, f"{batch}_{step}.png")
image.save(image_path)
def expo_clear_images():
if on_testing:
return
for root, dirs, files in os.walk(expo_openpose_dir):
for file in files:
os.remove(os.path.join(root, file))
for root, dirs, files in os.walk(expo_raw_sd_dir):
for file in files:
os.remove(os.path.join(root, file))
@app.route('/expo_postprocess', methods=['POST'])
async def expo_postprocess():
global on_postprocessing
try:
if on_postprocessing:
return jsonify({"status": "error", "message": "Postprocessing in progress"}), 503
on_postprocessing = True
print("pending postprocessing")
# Wait until the directories have the same files or timeout
if not await wait_for_files_to_match(expo_openpose_dir, expo_raw_sd_dir):
print("Timeout reached, proceeding with postprocessing")
# Check if directories exist and are not empty
if not os.path.exists(expo_openpose_dir) or not os.listdir(expo_openpose_dir):
on_postprocessing = False
return jsonify({"status": "error", "message": "No images to process in expo_openpose_dir"}), 404
if not os.path.exists(expo_raw_sd_dir) or not os.listdir(expo_raw_sd_dir):
on_postprocessing = False
return jsonify({"status": "error", "message": "No images to process in expo_raw_sd_dir"}), 404
print("Postprocessing")
await asyncio.to_thread(expo_postprocess_main)
return jsonify({"status": "success", "message": "Postprocessing completed"}), 200
except Exception as e:
on_postprocessing = False
print(e)
return jsonify({"status": "error", "message": "An error occurred during postprocessing"}), 500
finally:
on_postprocessing = False
await asyncio.to_thread(expo_clear_images)
print("Postprocessing completed")
async def wait_for_files_to_match(dir1: str, dir2: str, timeout: int = 180, interval: int = 1) -> bool:
start_time = asyncio.get_event_loop().time()
while asyncio.get_event_loop().time() - start_time < timeout:
files1 = get_all_files(dir1)
files2 = get_all_files(dir2)
if files1 == files2:
return True
await asyncio.sleep(interval)
return False
def get_all_files(directory):
all_files = set()
for root, _, files in os.walk(directory):
for file in files:
# Store the relative path of the file
relative_path = os.path.relpath(os.path.join(root, file), directory)
all_files.add(relative_path)
return all_files
@app.route('/gen_image', methods=['POST']) @app.route('/gen_image', methods=['POST'])
def gen_image(): def gen_image():
if request.is_json: if request.is_json:
@ -23,7 +212,7 @@ def gen_image():
return jsonify({"status": "error", "message": "Missing data"}), 422 return jsonify({"status": "error", "message": "Missing data"}), 422
openpose_image_path = opg.save_bodypose(canvas_size[0], canvas_size[1], coordinates, pid) openpose_image_path = opg.save_bodypose(canvas_size[0], canvas_size[1], coordinates, pid)
# gen_fencer_prompt(openpose_image_path, pid, opg.server_address) # gen_fencer_prompt(openpose_image_path, pid, comfyui_address)
return jsonify({"status": "success", "message": "Data received"}), 201 return jsonify({"status": "success", "message": "Data received"}), 201
else: else:
@ -38,6 +227,10 @@ def gen_group_pic():
canvas_size = data['canvas_size'] canvas_size = data['canvas_size']
pid = data['pid'] pid = data['pid']
base_image = base64.b64decode(data['base_image']) base_image = base64.b64decode(data['base_image'])
# resize base image to 1280x720
#base_image = cv2.imdecode(np.frombuffer(base_image, np.uint8), cv2.IMREAD_COLOR)
#base_image = cv2.resize(base_image, (1280, 720))
#base_image = cv2.imencode('.png', base_image)[1].tobytes()
if not coordinates_list or not canvas_size or not base_image or not pid: if not coordinates_list or not canvas_size or not base_image or not pid:
return jsonify({"status": "error", "message": "Missing data"}), 422 return jsonify({"status": "error", "message": "Missing data"}), 422
@ -46,30 +239,28 @@ def gen_group_pic():
coordinates_list[i] = coordinates_list[i]['coordinates'] coordinates_list[i] = coordinates_list[i]['coordinates']
openpose_image_path = opg.save_bodypose_mulit(canvas_size[0], canvas_size[1], coordinates_list, pid) openpose_image_path = opg.save_bodypose_mulit(canvas_size[0], canvas_size[1], coordinates_list, pid)
gen_group_pic_prompt(openpose_image_path, base_image, pid, opg.server_address) gen_group_pic_prompt(openpose_image_path, base_image, pid, comfyui_address)
return jsonify({"status": "success", "message": "Data received"}), 201 return jsonify({"status": "success", "message": "Data received"}), 201
else: else:
return jsonify({"status": "error", "message": "Request must be JSON"}), 415 return jsonify({"status": "error", "message": "Request must be JSON"}), 415
def gen_fencer_prompt(openpose_image_path, pid, comfyUI_address): def gen_fencer_prompt(openpose_image_path, pid, comfyUI_address):
with open("fencerAPI.json", "r") as f: with open("./prompts/fencerAPI.json", "r") as f:
prompt_json = f.read() prompt_json = f.read()
prompt = json.loads(prompt_json) prompt = json.loads(prompt_json)
openpose_image_name = opg.upload_image_circular_queue(openpose_image_path, 20, pid, comfyUI_address) openpose_image_name = opg.upload_image_circular_queue(openpose_image_path, 20, pid, comfyUI_address)
opg.upload_image("ref_black.png", "ref_black.png") opg.upload_image("./images/ref_black.png", "ref_black.png")
prompt["3"]["inputs"]["seed"] = random.randint(0, 10000000000) prompt["3"]["inputs"]["seed"] = random.randint(0, 10000000000)
prompt["29"]["inputs"]['image'] = "ref_black.png" prompt["29"]["inputs"]['image'] = "./images/ref_black.png"
prompt["17"]["inputs"]['image'] = openpose_image_name prompt["17"]["inputs"]['image'] = openpose_image_name
opg.queue_prompt(prompt, comfyUI_address) opg.queue_prompt(prompt, comfyUI_address)
def gen_group_pic_prompt(openpose_image_path, base_image, pid, comfyUI_address): def gen_group_pic_prompt(openpose_image_path, base_image, pid, comfyUI_address):
with open("group_pic.json", "r") as f: with open("./prompts/group_pic.json", "r") as f:
prompt_json = f.read() prompt_json = f.read()
prompt = json.loads(prompt_json) prompt = json.loads(prompt_json)
@ -81,6 +272,9 @@ def gen_group_pic_prompt(openpose_image_path, base_image, pid, comfyUI_address):
prompt["14"]["inputs"]['image'] = base_image_name prompt["14"]["inputs"]['image'] = base_image_name
opg.queue_prompt(prompt, comfyUI_address) opg.queue_prompt(prompt, comfyUI_address)
if __name__ == '__main__': if __name__ == '__main__':
app.run(debug=True) expo_postprocess()
# app.run(debug=True)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

48
comfy_socket.py Normal file
View File

@ -0,0 +1,48 @@
import random
import websocket #NOTE: websocket-client (https://github.com/websocket-client/websocket-client)
import uuid
import json
import urllib.request
import urllib.parse
server_address = "127.0.0.1:8188"
def queue_prompt(prompt, client_id):
p = {"prompt": prompt, "client_id": client_id}
data = json.dumps(p).encode('utf-8')
req = urllib.request.Request("http://{}/prompt".format(server_address), data=data)
return json.loads(urllib.request.urlopen(req).read())
def get_image(filename, subfolder, folder_type):
data = {"filename": filename, "subfolder": subfolder, "type": folder_type}
url_values = urllib.parse.urlencode(data)
with urllib.request.urlopen("http://{}/view?{}".format(server_address, url_values)) as response:
return response.read()
def get_history(prompt_id):
with urllib.request.urlopen("http://{}/history/{}".format(server_address, prompt_id)) as response:
return json.loads(response.read())
def get_images(ws, prompt, client_id):
prompt_id = queue_prompt(prompt, client_id)['prompt_id']
output_images = {}
current_node = ""
while True:
out = ws.recv()
if isinstance(out, str):
message = json.loads(out)
if message['type'] == 'executing':
data = message['data']
if data['prompt_id'] == prompt_id:
if data['node'] is None:
break #Execution is done
else:
current_node = data['node']
else:
if current_node == 'save_image_websocket_node':
images_output = output_images.get(current_node, [])
images_output.append(out[8:])
output_images[current_node] = images_output
return output_images

View File

Before

Width:  |  Height:  |  Size: 171 KiB

After

Width:  |  Height:  |  Size: 171 KiB

7
info.json Normal file
View File

@ -0,0 +1,7 @@
{
"comfyui_address": "localhost:8188",
"expo_raw_sd_dir": "output/expo_raw_sd",
"expo_openpose_dir": "output/expo_openpose",
"expo_postprocessed_dir": "output/expo_postprocessed",
"expo_postprocess_temp_dir": "output/expo_postprocess_temp"
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

View File

@ -13,12 +13,29 @@ import sys
import hashlib import hashlib
sys.path.append('./') sys.path.append('./')
server_address = "localhost:8188" # read json from info.json
info = json.load(open('info.json'))
server_address = info['comfyui_address']
expo_openpose_dir = info['expo_openpose_dir']
def coordinates_to_keypoints(coordinates: list) -> List[skel.Keypoint]: def coordinates_to_keypoints(coordinates: list) -> List[skel.Keypoint]:
keypoints = [skel.Keypoint(coordinates[i], coordinates[i + 1]) for i in range(0, len(coordinates), 3)] keypoints = [skel.Keypoint(coordinates[i], coordinates[i + 1]) for i in range(0, len(coordinates), 3)]
return keypoints return keypoints
def expo_save_bodypose(width: int, height: int, coordinates: list, batch: int, step: int, save_dir: str, limbSeq: list[int], colors: list[int]) -> str:
canvas = np.zeros((height, width, 3), dtype=np.uint8)
keypoints = coordinates_to_keypoints(coordinates)
canvas = skel.draw_bodypose(canvas, keypoints, limbSeq, colors)
# Save as {batch}_{step}.png, {batch}_{step}.png, ...
if not os.path.exists(save_dir):
os.makedirs(save_dir)
image_path = os.path.join(save_dir, '%d_%d.png' % (batch, step))
image_path = image_path.replace('\\', '/')
cv2.imwrite(image_path, canvas)
return image_path
def save_bodypose(width: int, height: int, coordinates: list, pid: str) -> None: def save_bodypose(width: int, height: int, coordinates: list, pid: str) -> None:
if not hasattr(save_bodypose, 'counter'): if not hasattr(save_bodypose, 'counter'):
save_bodypose.counter = 0 # Initialize the counter attribute save_bodypose.counter = 0 # Initialize the counter attribute
@ -57,13 +74,17 @@ def save_bodypose_mulit(width: int, height: int, coordinates_list: list, pid: st
return image_path return image_path
def queue_prompt(prompt, server_address): def queue_prompt(prompt):
p = {"prompt": prompt} p = {"prompt": prompt}
data = json.dumps(p).encode('utf-8') data = json.dumps(p).encode('utf-8')
req = request.Request("http://{}/prompt".format(server_address), data=data) req = request.Request("http://{}/prompt".format(server_address), data=data)
request.urlopen(req) request.urlopen(req)
def upload_image(input_image, name, server_address, image_type="input", overwrite=True): def upload_image(input_image, image_name="", image_type="input", overwrite=True) -> str:
if image_name == "":
# generate a random name here
image_name = hashlib.sha256(str(random.getrandbits(256)).encode('utf-8')).hexdigest() + ".png"
# Check if input_image is a valid file path # Check if input_image is a valid file path
if isinstance(input_image, str) and os.path.isfile(input_image): if isinstance(input_image, str) and os.path.isfile(input_image):
file = open(input_image, 'rb') file = open(input_image, 'rb')
@ -75,7 +96,7 @@ def upload_image(input_image, name, server_address, image_type="input", overwrit
try: try:
multipart_data = MultipartEncoder( multipart_data = MultipartEncoder(
fields={ fields={
'image': (name, file, 'image/png'), 'image': (image_name, file, 'image/png'),
'type': image_type, 'type': image_type,
'overwrite': str(overwrite).lower() 'overwrite': str(overwrite).lower()
} }
@ -85,12 +106,12 @@ def upload_image(input_image, name, server_address, image_type="input", overwrit
headers = {'Content-Type': multipart_data.content_type} headers = {'Content-Type': multipart_data.content_type}
request = urllib.request.Request("http://{}/upload/image".format(server_address), data=data, headers=headers) request = urllib.request.Request("http://{}/upload/image".format(server_address), data=data, headers=headers)
with urllib.request.urlopen(request) as response: with urllib.request.urlopen(request) as response:
return response.read() return json.loads(response.read().decode('utf-8'))["name"]
finally: finally:
if close_file: if close_file:
file.close() file.close()
def upload_image_circular_queue(image_path, size, unqiue_id, server_address): def upload_image_circular_queue(image_path, size, unqiue_id):
# create a dict in this function to store the counter for each unique_id, key is the unique_id, value is the counter # create a dict in this function to store the counter for each unique_id, key is the unique_id, value is the counter
if not hasattr(upload_image_circular_queue, 'id_counter_dict'): if not hasattr(upload_image_circular_queue, 'id_counter_dict'):
upload_image_circular_queue.id_counter_dict = {} upload_image_circular_queue.id_counter_dict = {}
@ -100,19 +121,22 @@ def upload_image_circular_queue(image_path, size, unqiue_id, server_address):
image_name = hashlib.sha256((unqiue_id + str(upload_image_circular_queue.id_counter_dict[unqiue_id])).encode('utf-8')).hexdigest() + ".png" image_name = hashlib.sha256((unqiue_id + str(upload_image_circular_queue.id_counter_dict[unqiue_id])).encode('utf-8')).hexdigest() + ".png"
upload_image_circular_queue.id_counter_dict[unqiue_id] += 1 % size upload_image_circular_queue.id_counter_dict[unqiue_id] += 1 % size
upload_image(image_path, image_name, server_address) upload_image(image_path, image_name)
return image_name return image_name
def main(): def visualize_for_fixed_dataset(json_file: str):
directory = './fixed' directory = './fixed'
json_files = [f for f in os.listdir(directory) if f.endswith('.json')] json_files = [f for f in os.listdir(directory) if f.endswith('.json')]
if not json_files: if not json_files:
print("No JSON files found in the directory.") print("No JSON files found in the directory.")
return return
json_file = os.path.join(directory, random.choice(json_files)) # json_file = os.path.join(directory, random.choice(json_files))
json_file = './fixed/0001_002_00_01_1.json'
# json_file = './test_output.json' # json_file = './test_output.json'
# create ./output directory if it does not exist
os.makedirs('output', exist_ok=True)
image_path = './output/test' image_path = './output/test'
print(json_file) print(json_file)
@ -131,5 +155,30 @@ def main():
cv2.imwrite(image_path + '_' + str(i) + '.png', canvas) cv2.imwrite(image_path + '_' + str(i) + '.png', canvas)
def visualize_for_new_yolo_dataset():
directory = './new_yolo_keypoints'
json_files = [f for f in os.listdir(directory) if f.endswith('.json')]
if not json_files:
print("No JSON files found in the directory.")
return
json_file = os.path.join(directory, random.choice(json_files))
# create ./output directory if it does not exist
os.makedirs('output', exist_ok=True)
frames = pjf.read_new_yolo_keypoints(json_file)
for i, skeletons in enumerate(frames):
canvas = np.zeros((360, 640, 3), dtype=np.uint8)
for skeleton in skeletons:
keypoints = skeleton.keypoints
canvas = skel.draw_bodypose(canvas, keypoints, skel.yolo_coco_limbSeq, skel.yolo_coco_colors)
print(os.path.join('./output', os.path.basename(json_file) + '_' + str(i) + '.png'))
cv2.imwrite(os.path.join('./output', 'a_test' + '_' + str(i) + '.png'), canvas)
def main():
visualize_for_new_yolo_dataset()
if __name__ == '__main__': if __name__ == '__main__':
main() main()

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 517 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 318 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 518 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 513 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 164 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 147 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 172 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 146 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 281 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 250 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 269 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 290 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 291 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 252 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 275 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 276 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 281 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 250 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 269 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 290 KiB

BIN
output/expo_raw_sd/0_0.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 825 KiB

BIN
output/expo_raw_sd/0_1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 808 KiB

BIN
output/expo_raw_sd/0_2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 801 KiB

BIN
output/expo_raw_sd/0_3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 790 KiB

BIN
output/expo_raw_sd/1_0.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 785 KiB

BIN
output/expo_raw_sd/1_1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 808 KiB

BIN
output/expo_raw_sd/1_2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 833 KiB

BIN
output/expo_raw_sd/1_3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 780 KiB

BIN
output/expo_raw_sd/2_0.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 738 KiB

BIN
output/expo_raw_sd/2_1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 744 KiB

BIN
output/expo_raw_sd/2_2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 760 KiB

BIN
output/expo_raw_sd/2_3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 781 KiB

BIN
output/expo_raw_sd/3_0.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 700 KiB

BIN
output/expo_raw_sd/3_1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 749 KiB

BIN
output/expo_raw_sd/3_2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 785 KiB

BIN
output/expo_raw_sd/3_3.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 794 KiB

256
postprocessing.py Normal file
View File

@ -0,0 +1,256 @@
import json
import math
import os
import random
import shutil
import subprocess
from concurrent.futures import ThreadPoolExecutor, as_completed
import cv2
import numpy as np
info = json.load(open('info.json'))
expo_raw_sd_dir = info['expo_raw_sd_dir']
expo_postprocessed_dir = info['expo_postprocessed_dir']
expo_postprocess_temp_dir = info['expo_postprocess_temp_dir']
def expo_get_step_by_name(image_name: str) -> int:
return int(image_name.split('_')[1].split('.')[0])
def expo_get_batch_by_name(image_name: str) -> int:
return int(image_name.split('_')[0])
def expo_shuffle_image_steps(image_dir) -> list[list[str]]:
images = {}
# Read and categorize image paths by step
for image_name in os.listdir(image_dir):
step = expo_get_step_by_name(image_name)
image_path = os.path.join(image_dir, image_name)
if step in images:
images[step].append(image_path)
else:
images[step] = [image_path]
# Shuffle the image paths for each step
for step_images in images.values():
random.shuffle(step_images)
# Convert the dictionary to a 2D list and find the minimum length
shuffled_images = list(images.values())
min_length = min(len(step_images) for step_images in shuffled_images)
# Crop each list to the minimum length
shuffled_images = [step_images[:min_length] for step_images in shuffled_images]
# finally, get the first image of each step, put them in a list, then the second image of each step, basically transpose the list
shuffled_images = list(map(list, zip(*shuffled_images)))
return shuffled_images
def expo_add_to_background_image(background_path: str, image_path: str, output_path: str, x: int, y: int) -> str:
# Use ImageMagick to blend the image with the background using Linear Light blend mode
command = [
"magick",
background_path,
image_path,
"-geometry", f"+{x}+{y}",
"-compose", "LinearLight",
"-composite",
output_path
]
subprocess.run(command, check=True)
return output_path
def expo_add_logo(background_path: str, image_path: str, output_path: str, x: int, y: int) -> str:
# Use ImageMagick to blend the image with the background using normal blend mode
command = [
"magick",
background_path,
image_path,
"-geometry", f"+{x}+{y}",
"-compose", "Over",
"-composite",
output_path
]
subprocess.run(command, check=True)
return output_path
def expo_resize_fencer(image_path: str, output_path: str, width: int, height: int) -> str:
# Use ImageMagick to resize the image
command = [
"magick",
image_path,
"-resize", f"{width}x{height}",
output_path
]
subprocess.run(command, check=True)
return output_path
def expo_resize_fencers(path_list: list[str], is_left: bool, width: int, height: int) -> list[str]:
output_dir = os.path.join(expo_postprocess_temp_dir, f"{'left' if is_left else 'right'}_fencers")
os.makedirs(output_dir, exist_ok=True)
resized_paths = [os.path.join(output_dir, f"{i}.png") for i in range(len(path_list))]
futures_to_index = {}
with ThreadPoolExecutor() as executor:
for i, image_path in enumerate(path_list):
output_path = resized_paths[i]
future = executor.submit(expo_resize_fencer, image_path, output_path, width, height)
futures_to_index[future] = i
for future in as_completed(futures_to_index):
index = futures_to_index[future]
resized_paths[index] = future.result()
return resized_paths
def expo_motion_blur_fencer(image_path: str, output_path: str, sigma: float, direction: float) -> str:
# Use ImageMagick to apply motion blur to the image with the specified direction
command = [
"magick",
image_path,
"-motion-blur", f"0x{sigma}+{direction}",
output_path
]
subprocess.run(command, check=True)
return output_path
def expo_motion_blur_fencers(path_list: list[str]) -> list[str]:
futures = []
with ThreadPoolExecutor() as executor:
for i, image_path in enumerate(path_list):
sigma = 15 - 15 * i / (len(path_list) - 1)
direction = 0
future = executor.submit(expo_motion_blur_fencer, image_path, image_path, sigma, direction)
futures.append(future)
for future in as_completed(futures):
future.result()
def expo_overlay_bg_gradient(image_path: str, output_path: str, bg_gradient_path: str) -> str:
# Use ImageMagick to overlay the image with a background gradient
command = [
"magick",
image_path,
bg_gradient_path,
"-compose", "Overlay",
"-composite",
output_path
]
subprocess.run(command, check=True)
return output_path
def expo_decrese_opacity(image_path: str, output_path: str, opacity: int) -> str:
# Use ImageMagick to decrease the opacity of the image
command = [
"magick",
image_path,
"-channel", "A",
"-evaluate", "multiply", f"{opacity/100}",
output_path
]
subprocess.run(command, check=True)
return output_path
def expo_decrese_opacities(path_list: list[str]) -> list[str]:
futures = []
with ThreadPoolExecutor() as executor:
for i, image_path in enumerate(path_list):
opacity = 30 + 70 * i / (len(path_list) - 1)
future = executor.submit(expo_decrese_opacity, image_path, image_path, opacity)
futures.append(future)
for future in as_completed(futures):
future.result()
def output_to_display_folder(output_image_paths):
# copy the output images to the display folder (expo_postprocessed_dir)
# the format is {session}_{candidate}.png, this session should be the max session from expo_postprocess_dir, the candidate should be the index of the output_image_paths
session = str(current_session()).zfill(5)
for i, image_path in enumerate(output_image_paths):
candidate = str(i).zfill(5)
output_image_path = os.path.join(expo_postprocessed_dir, f"{session}_{candidate}.png")
# copy the image
shutil.copy(image_path, output_image_path)
def current_session():
max_session = 0
for file in os.listdir(expo_postprocessed_dir):
if file.endswith(".png"):
session = int(file.split("_")[0])
if session > max_session:
max_session = session
return max_session + 1
def expo_postprocess_main():
print("Postprocessing")
os.makedirs(expo_postprocessed_dir, exist_ok=True)
os.makedirs(expo_postprocess_temp_dir, exist_ok=True)
left_fencer_raw_image_dir = os.path.join(expo_raw_sd_dir, 'left_fencer')
right_fencer_raw_image_dir = os.path.join(expo_raw_sd_dir, 'right_fencer')
if not os.path.exists(left_fencer_raw_image_dir) or not os.path.exists(right_fencer_raw_image_dir):
print("Raw images not found")
return
left_shuffled_images_paths = expo_shuffle_image_steps(left_fencer_raw_image_dir)
right_shuffled_images_paths = expo_shuffle_image_steps(right_fencer_raw_image_dir)
background_path = os.path.join(expo_postprocess_temp_dir, 'background.png')
logo_path = os.path.join(expo_postprocess_temp_dir, 'logo.png')
if not os.path.exists(background_path):
background = np.zeros((720, 1080, 3), dtype=np.uint8)
cv2.imwrite(background_path, background)
bg_gradient_folder = os.path.join(expo_postprocess_temp_dir, 'bg_gradient')
bg_gradients = [os.path.join(bg_gradient_folder, f"{i:02d}.png") for i in range(4)]
output_files = []
for i, candidate_list in enumerate(left_shuffled_images_paths):
left_fencer_paths = expo_resize_fencers(candidate_list, True, 450, 450)
expo_motion_blur_fencers(left_fencer_paths)
expo_decrese_opacities(left_fencer_paths)
temp_output_path = os.path.join(expo_postprocess_temp_dir, f"temp_{i}.png")
output_files.append(temp_output_path)
temp_background_path = background_path
for j, left_fencer_path in enumerate(left_fencer_paths):
x_position = 34 * math.pow(j, 1.3) - 100
y_position = 170
expo_add_to_background_image(temp_background_path, left_fencer_path, temp_output_path, x_position, y_position)
temp_background_path = temp_output_path
for i, candidate_list in enumerate(right_shuffled_images_paths):
if i > len(left_shuffled_images_paths) - 1:
break
right_fencer_paths = expo_resize_fencers(candidate_list, False, 450, 450)
expo_motion_blur_fencers(right_fencer_paths)
expo_decrese_opacities(right_fencer_paths)
temp_output_path = os.path.join(expo_postprocess_temp_dir, f"temp_{i}.png")
if not os.path.exists(temp_output_path):
break
for j, right_fencer_path in enumerate(right_fencer_paths):
x_position = 540 - (34 * math.pow(j, 1.3) - 170)
y_position = 170
expo_add_to_background_image(temp_output_path, right_fencer_path, temp_output_path, x_position, y_position)
temp_background_path = temp_output_path
expo_overlay_bg_gradient(temp_output_path, temp_output_path, bg_gradients[i % len(bg_gradients)])
expo_add_logo(temp_output_path, logo_path, temp_output_path, 650, 630)
output_to_display_folder(output_files)
if __name__ == '__main__':
expo_postprocess_main()

140
predict.py Normal file
View File

@ -0,0 +1,140 @@
# import cv2
from glob import glob
import matplotlib.pyplot as plt
import numpy as np
import os
import torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'
use_amp = True
model_path = './models/loss8.517782751325285.pt'
# define helper functions
def load_dataset_from_npy(dir_path):
npy_files = glob(os.path.join(os.path.realpath(dir_path), '*.npy'))
return npy_files
def find_bbox(keypoints):
keypoints_copy = keypoints
to_delete = []
cnt = 0
for kp in keypoints_copy:
# print(kp.shape)x
pos = (kp[0], kp[1])
if (pos == (0, 0)) or (pos == (1, 0)):
to_delete.append(cnt)
cnt += 1
keypoints_copy = np.delete(keypoints_copy, to_delete, 0)
return [min(keypoints_copy[:, 0]), max(keypoints_copy[:, 0]), min(keypoints_copy[:, 1]), max(keypoints_copy[:, 1])]
def get_dist_grid(dist_ref, grid_dim=[30, 1], offsets=[0, 0]):
dist_grid = torch.zeros([grid_dim[0], grid_dim[1], 2]).to(device)
offsetX = torch.tensor([offsets[0], 0.0]).float().to(device)
offsetY = torch.tensor([0.0, offsets[1]]).float().to(device)
for i in range(grid_dim[0]):
for j in range(grid_dim[1]):
dist_grid[i, j, :] = dist_ref + \
offsetX * (i - int((grid_dim[0]) / 2)) + \
offsetY * (j - int((grid_dim[1]) / 2))
return dist_grid
def bbox_center(bbox):
return ((bbox[0] + bbox[1])/2, (bbox[2] + bbox[3])/2)
def bbox_dists(bbox1, bbox2):
return (np.array(bbox_center(bbox1)) - np.array(bbox_center(bbox2)))
def openpose17_colors():
return ['#ff0000', '#ff5500', '#ffaa00', '#ffff00', '#aaff00', '#55ff00', '#00ff00', '#00ff55', '#00ffaa', '#00ffff', '#00aaff', '#0055ff', '#0000ff', '#5500ff', '#aa00ff', '#ff00ff', '#ff00aa', '#ff0055']
def keypoints25to18(keypoints):
return np.delete(keypoints, [8, 19, 20, 21, 22, 23, 24], 0)
def get_keypoints_linkage():
return np.array([[0, 1], [1, 2], [2, 3], [3, 4], [1, 5], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10], [1, 11], [11, 12], [12, 13], [0, 14], [14, 16], [0, 15], [15, 17]])
@torch.compile
def batch_pose_confidence_mat(batch_pose_keypoints_with_confidence):
keypoints_conf = batch_pose_keypoints_with_confidence[:, :, 2]
confidence_mat = torch.zeros([keypoints_conf.shape[0], get_keypoints_linkage().shape[0]]).to(device)
for i in range(get_keypoints_linkage().shape[0]):
for j in range(keypoints_conf.shape[0]):
if keypoints_conf[j, get_keypoints_linkage()[i, 0]] == 0 or keypoints_conf[j, get_keypoints_linkage()[i, 1]] == 0:
confidence_mat[j, i] = 0
else:
confidence_mat[j, i] = (keypoints_conf[j, get_keypoints_linkage()[i, 0]] + keypoints_conf[j, get_keypoints_linkage()[i, 1]]) / 2
return confidence_mat
@torch.compile
def pose_diff(output_pose_keypoints, target_pose_keypoints, confidence_mat):
link_open = get_keypoints_linkage()[:, 0]
link_end = get_keypoints_linkage()[:, 1]
p1 = (output_pose_keypoints[:, link_open, :2] - target_pose_keypoints[:, link_open, :2]).reshape(output_pose_keypoints.shape[0], get_keypoints_linkage().shape[0], 2)
p2 = (output_pose_keypoints[:, link_end, :2] - target_pose_keypoints[:, link_end, :2]).reshape(output_pose_keypoints.shape[0], get_keypoints_linkage().shape[0], 2)
return torch.sum(torch.sum(torch.pow(p1, 2) + torch.pow(p2, 2), axis=2) * confidence_mat) / get_keypoints_linkage().shape[0] / output_pose_keypoints.shape[0]
@torch.compile
def pose_loss(outputs, batch_target_pose_keypoints_with_confidence):
err = pose_diff(outputs, batch_target_pose_keypoints_with_confidence, batch_pose_confidence_mat(batch_target_pose_keypoints_with_confidence))
return torch.abs(torch.sum(err))
@torch.compile
def zscore_normalization(data):
return torch.std(data), torch.mean(data), (data - torch.mean(data)) / torch.std(data)
model = torch.nn.Sequential(
torch.nn.Linear(36, 256),
torch.nn.Tanh(),
torch.nn.Dropout(0.1),
torch.nn.Linear(256, 512),
torch.nn.Tanh(),
torch.nn.Dropout(0.1),
torch.nn.Linear(512, 256),
torch.nn.Tanh(),
torch.nn.Dropout(0.1),
torch.nn.Linear(256, 36)
).to(device)
loss_fn = pose_loss #torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
checkpoint = torch.load(model_path)
model.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
scaler.load_state_dict(checkpoint['scaler'])
sample_data = torch.Tensor([[[-0.9695, -1.6531, 2.2570],
[-0.9758, -1.5557, 2.5996],
[-1.0910, -1.5669, 2.2916],
[-1.1820, -1.3080, 2.4095],
[-1.0606, -1.2970, 2.6237],
[-0.8728, -1.5446, 2.4116],
[-0.7996, -1.2856, 2.2992],
[-0.6417, -1.3074, 2.2848],
[-1.1578, -1.0483, 2.3292],
[-1.2732, -0.6165, 2.3635],
[-1.3583, -0.2720, 2.3981],
[-1.0120, -1.0378, 2.3269],
[-0.8237, -0.7680, 2.5688],
[-0.7751, -0.3148, 2.4276],
[-0.9878, -1.7177, 2.1040],
[-0.9453, -1.7068, 1.8512],
[-1.0184, -1.7280, 1.7790],
[-0.9146, -1.6959, 0.9578]]]).to(device)
model.eval()
def predict_pose_keypoints(data):
std, mean, data = zscore_normalization(data)
data = data[:,:,:2].reshape(1, 36).to(device)
with torch.cuda.amp.autocast(enabled=use_amp):
outputs = model(data)
outputs = (outputs * std + mean).reshape(18, 2).cpu().detach().numpy()
return outputs

View File

@ -1,5 +1,6 @@
import os import os
import json import json
import subprocess
import sys import sys
import cv2 import cv2
import numpy as np import numpy as np
@ -30,6 +31,34 @@ def array_json_to_Skeleton_Seqences(json_file: str) -> List[skel.Skeleton_Seqenc
skeleton_sequences[i].add_frame(skeleton) skeleton_sequences[i].add_frame(skeleton)
return skeleton_sequences return skeleton_sequences
def read_new_yolo_keypoints(json_file: str) -> List[List[skel.Skeleton]]:
with open(json_file, 'r') as file:
data = json.load(file)
frames = []
for frame in data:
skeletons = []
for skeleton in frame:
keypoints = [skel.Keypoint(point['x'], point['y'], point['confidence']) for point in skeleton['keypoints']]
skeletons.append(skel.Skeleton(keypoints))
frames.append(skeletons)
return frames
def add_neck_to_og_coco(folder: str):
for file in os.listdir(folder):
with open(os.path.join(folder, file), 'r') as f:
data = json.load(f)
print(f"Processing {file}")
for frame in data:
for skeleton in frame:
_skeleton = skeleton['keypoints']
# Add neck keypoint to the original COCO keypoints
if len(_skeleton) > 6:
neck = {'x': (_skeleton[6]['x'] + _skeleton[7]['x']) / 2, 'y': (_skeleton[6]['y'] + _skeleton[7]['y']) / 2, 'confidence': (_skeleton[6]['confidence'] + _skeleton[7]['confidence']) / 2}
_skeleton.insert(6, neck)
with open(os.path.join(folder, file), 'w') as f:
json.dump(data, f, indent=4)
def Skeleton_Seqences_save_to_array_json(skeleton_sequences: List[skel.Skeleton_Seqence], json_file: str): def Skeleton_Seqences_save_to_array_json(skeleton_sequences: List[skel.Skeleton_Seqence], json_file: str):
# Ensure the directory exists # Ensure the directory exists
os.makedirs(os.path.dirname(json_file), exist_ok=True) os.makedirs(os.path.dirname(json_file), exist_ok=True)
@ -103,30 +132,199 @@ def get_frames_from_fixed_json(json_file):
frames = [] frames = []
with open(json_file, 'r') as file: with open(json_file, 'r') as file:
data = json.load(file) data = json.load(file)
for frame in data: for index, frame in enumerate(data):
skeletons = [] two_skeletons = []
for i in range(2): # Assuming there are always 2 skeletons for i in range(2): # Assuming there are always 2 skeletons
keypoints = [] keypoints = []
for point in frame[i]: for point in frame[i]:
keypoint = skel.Keypoint(point[0], point[1], point[2]) keypoint = skel.Keypoint(point[0], point[1], point[2])
keypoints.append(keypoint) keypoints.append(keypoint)
skeletons.append(skel.Skeleton(keypoints)) two_skeletons.append(skel.Skeleton(keypoints))
frames.append(skeletons) frames.append(two_skeletons)
return frames return frames
def main(): def get_avg_keypoints(keypoints):
x = [point.x for point in keypoints]
y = [point.y for point in keypoints]
confidence = [point.confidence for point in keypoints]
avg_x = sum(x) / len(x) if len(x) > 0 else 0
avg_y = sum(y) / len(y) if len(y) > 0 else 0
avg_confidence = sum(confidence) / len(confidence) if len(confidence) > 0 else 0
return skel.Keypoint(avg_x, avg_y, avg_confidence)
def get_avg_keypoints_in_frames(frames):
avg_keypoints_in_frames = []
for frame in frames:
avg_keypoints = []
for skeleton in frame:
avg_keypoints.append(get_avg_keypoints(skeleton.keypoints))
avg_keypoints_in_frames.append(avg_keypoints)
return avg_keypoints_in_frames
def process_avg_keypoints_row(row, output_dir):
json_file = './fixed/' + row['ClipName'] + '.json'
print(f"Processing {json_file}")
frames = get_frames_from_fixed_json(json_file)
avg_keypoints = get_avg_keypoints_in_frames(frames)
with open(os.path.join(output_dir, row['ClipName'] + '.json'), 'w') as file:
json.dump(avg_keypoints, file, indent=4, cls=skel.KeypointEncoder)
def process_batch_avg_keypoints_row(batch, output_dir):
print(f"Processing batch of {len(batch)} rows.")
for _, row in batch.iterrows():
process_avg_keypoints_row(row, output_dir)
def process_descriptor_save_avg_keypoints(descriptor: pd.DataFrame):
num_threads = 64
batch_size = max(1, len(descriptor) // num_threads)
output_dir = './avg_keypoints'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
batches = [descriptor.iloc[i:i + batch_size] for i in range(0, len(descriptor), batch_size)]
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
executor.map(lambda batch: process_batch_avg_keypoints_row(batch, output_dir), batches)
def download_video(youtube_url, video_file):
if os.path.exists(video_file):
return
command = [
'yt-dlp', '-f', 'best[height<=360]', '-o', video_file, youtube_url
]
subprocess.run(command, check=True)
def extract_frames(video_file, start_frame, end_frame, output_folder):
if not os.path.exists(output_folder):
os.makedirs(output_folder)
if not os.path.exists(video_file):
return
if len(os.listdir(output_folder)) == end_frame - start_frame:
return
command = [
'ffmpeg', '-i', video_file, '-vf', f"select='between(n\\,{start_frame}\\,{end_frame - 1})'",
'-vsync', 'vfr', '-frame_pts', 'true', os.path.join(output_folder, '%08d.png')
]
subprocess.run(command, check=True)
def process_video_frames(row, video_path, video_frame_path):
video_file = os.path.join(video_path, f"{row['video_id']}.mp4")
start_frame = int(row['Start_frame'])
end_frame = int(row['End_frame'])
clip_name = row['ClipName']
output_folder = os.path.join(video_frame_path, clip_name)
# if not os.path.exists(video_file):
# download_video(row['URL'], video_file)
extract_frames(video_file, start_frame, end_frame, output_folder)
# remove the leading zeros from the frame names
for filename in os.listdir(output_folder):
os.rename(os.path.join(output_folder, filename), os.path.join(output_folder, filename.lstrip('0')))
def process_video_frames_multi_threaded(descriptor: pd.DataFrame, video_path, video_frame_path):
with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
futures = [
executor.submit(process_video_frames, row, video_path, video_frame_path)
for _, row in descriptor.iterrows()
]
for future in concurrent.futures.as_completed(futures):
try:
future.result()
except Exception as e:
print(f"Error processing row: {e}")
def cal_lerp_avg_keypoints(keypoints: list[skel.Keypoint]):
# in these keypoints, the confidence is 0.0 if the keypoint is not detected
# lerp linearly from last valid keypoint to next valid keypoint, fill in the missing keypoint(s)
# Find the first valid keypoint
first_valid_idx = next((i for i, kp in enumerate(keypoints) if kp.confidence > 0.0), None)
if first_valid_idx is None:
return keypoints # No valid keypoints found
# Find the last valid keypoint
last_valid_idx = next((i for i, kp in reversed(list(enumerate(keypoints))) if kp.confidence > 0.0), None)
# Copy the first valid keypoint's values to all preceding invalid keypoints
for i in range(first_valid_idx):
keypoints[i].x = keypoints[first_valid_idx].x
keypoints[i].y = keypoints[first_valid_idx].y
keypoints[i].confidence = keypoints[first_valid_idx].confidence
# Copy the last valid keypoint's values to all succeeding invalid keypoints
for i in range(last_valid_idx + 1, len(keypoints)):
keypoints[i].x = keypoints[last_valid_idx].x
keypoints[i].y = keypoints[last_valid_idx].y
keypoints[i].confidence = keypoints[last_valid_idx].confidence
# Interpolate between valid keypoints
last_valid_idx = first_valid_idx
for i in range(first_valid_idx + 1, len(keypoints)):
if keypoints[i].confidence > 0.0:
next_valid_idx = i
# Linearly interpolate between last_valid_idx and next_valid_idx
for j in range(last_valid_idx + 1, next_valid_idx):
t = (j - last_valid_idx) / (next_valid_idx - last_valid_idx)
keypoints[j].x = keypoints[last_valid_idx].x * (1 - t) + keypoints[next_valid_idx].x * t
keypoints[j].y = keypoints[last_valid_idx].y * (1 - t) + keypoints[next_valid_idx].y * t
keypoints[j].confidence = keypoints[last_valid_idx].confidence * (1 - t) + keypoints[next_valid_idx].confidence * t
last_valid_idx = next_valid_idx
return keypoints
def process_avg_keypoints_folder(avg_keypoints_folder, output_folder):
os.makedirs(output_folder, exist_ok=True)
for file in os.listdir(avg_keypoints_folder):
json_path = os.path.join(avg_keypoints_folder, file)
with open(json_path, 'r') as f:
data = json.load(f)
skeleton1_keypoints = []
skeleton2_keypoints = []
for frame in data:
skeleton1_keypoints.append(skel.Keypoint(frame[0]['x'], frame[0]['y'], frame[0]['confidence']))
skeleton2_keypoints.append(skel.Keypoint(frame[1]['x'], frame[1]['y'], frame[1]['confidence']))
lerped_keypoints1 = cal_lerp_avg_keypoints(skeleton1_keypoints)
lerped_keypoints2 = cal_lerp_avg_keypoints(skeleton2_keypoints)
lerped = []
for i in range(len(lerped_keypoints1)):
lerped.append([lerped_keypoints1[i], lerped_keypoints2[i]])
with open(os.path.join(output_folder, file), 'w') as f:
json.dump(lerped, f, cls=skel.KeypointEncoder, indent=4)
def main():
descriptor = pd.read_csv('./ClipDescriptorKaggle_processed.csv') descriptor = pd.read_csv('./ClipDescriptorKaggle_processed.csv')
os.makedirs('./avg_keypoints', exist_ok=True)
frames = get_frames_from_fixed_json('./fixed/0050_001_08_08_1.json') video_path = './video'
# print(frames[0][0].keypoints[0]) video_frame_path = './video_frames'
canvas = np.zeros((360, 640, 3), dtype=np.uint8) add_neck_to_og_coco('./new_yolo_keypoints')
canvas = skel.draw_bodypose(canvas, frames[0][0].keypoints, skel.body_25_limbSeq, skel.body_25_colors)
# print("Done processing all rows.")
# canvas = np.zeros((360, 640, 3), dtype=np.uint8)
# canvas = skel.draw_bodypose(canvas, frames[0][0].keypoints, skel.body_25_limbSeq, skel.body_25_colors)
#save the image #save the image
cv2.imwrite('test.png', canvas) # cv2.imwrite('test.png', canvas)
if __name__ == '__main__': if __name__ == '__main__':
main() main()

218
prompts/fencer_02.json Normal file
View File

@ -0,0 +1,218 @@
{
"3": {
"inputs": {
"seed": 695262830308132,
"steps": 3,
"cfg": 2,
"sampler_name": "dpmpp_sde",
"scheduler": "karras",
"denoise": 1,
"model": [
"32",
0
],
"positive": [
"22",
0
],
"negative": [
"22",
1
],
"latent_image": [
"5",
0
]
},
"class_type": "KSampler",
"_meta": {
"title": "KSampler"
}
},
"4": {
"inputs": {
"ckpt_name": "dreamshaperXL_sfwLightningDPMSDE.safetensors"
},
"class_type": "CheckpointLoaderSimple",
"_meta": {
"title": "Load Checkpoint"
}
},
"5": {
"inputs": {
"width": 1024,
"height": 1024,
"batch_size": 1
},
"class_type": "EmptyLatentImage",
"_meta": {
"title": "Empty Latent Image"
}
},
"6": {
"inputs": {
"text": "A fencer in full gear, fencing sword, 1 human, empty background, dark background, dark, empty, 1 sword, sword in hand",
"clip": [
"4",
1
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Positive)"
}
},
"8": {
"inputs": {
"samples": [
"3",
0
],
"vae": [
"4",
2
]
},
"class_type": "VAEDecode",
"_meta": {
"title": "VAE Decode"
}
},
"17": {
"inputs": {
"image": "3bdafb967cede879cabdc2f1277ce5ae8fde8f4a1ff1f0c821fb9b7890bfa252.png",
"upload": "image"
},
"class_type": "LoadImage",
"_meta": {
"title": "Load Image"
}
},
"22": {
"inputs": {
"strength": 0.98,
"start_percent": 0,
"end_percent": 1,
"positive": [
"6",
0
],
"negative": [
"40",
0
],
"control_net": [
"43",
0
],
"image": [
"17",
0
],
"vae": [
"4",
2
]
},
"class_type": "ControlNetApplyAdvanced",
"_meta": {
"title": "Apply ControlNet"
}
},
"28": {
"inputs": {
"ipadapter_file": "ip-adapter-plus_sdxl_vit-h.safetensors"
},
"class_type": "IPAdapterModelLoader",
"_meta": {
"title": "IPAdapter Model Loader"
}
},
"29": {
"inputs": {
"image": "ref_black.png",
"upload": "image"
},
"class_type": "LoadImage",
"_meta": {
"title": "Load Image"
}
},
"31": {
"inputs": {
"clip_name": "CLIP-ViT-H-14-laion2B-s32B-b79K.safetensors"
},
"class_type": "CLIPVisionLoader",
"_meta": {
"title": "Load CLIP Vision"
}
},
"32": {
"inputs": {
"weight": 1.3,
"weight_type": "style and composition",
"combine_embeds": "norm average",
"start_at": 0,
"end_at": 1,
"embeds_scaling": "K+V w/ C penalty",
"model": [
"4",
0
],
"ipadapter": [
"28",
0
],
"image": [
"29",
0
],
"clip_vision": [
"31",
0
]
},
"class_type": "IPAdapterAdvanced",
"_meta": {
"title": "IPAdapter Advanced"
}
},
"40": {
"inputs": {
"text": "blurry, drawing, horror, distorted, malformed, naked, cartoon, anime, out of focus, dull, muted colors, boring pose, no action, distracting background, colorful, (face:5.0), bad hand, (bad anatomy:5.0), worst quality, ai generated images, low quality, average quality, smoke, background, three arms, three hands, white light, (light:5.0), (shadow:5.0), (floor:5.0), 2 sword, multiple sword\n\nembedding:ac_neg1, embedding:ac_neg2, embedding:badhandv4, embedding:DeepNegative_xl_v1, embedding:NEGATIVE_HANDS, embedding:negativeXL_D, embedding:'unaestheticXL_cbp62 -neg.safetensors', embedding:verybadimagenegative_v1.3, embedding:ziprealism_neg, ",
"clip": [
"4",
1
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Negative)"
}
},
"43": {
"inputs": {
"control_net_name": "diffusion_pytorch_model.safetensors",
"model": [
"4",
0
]
},
"class_type": "DiffControlNetLoader",
"_meta": {
"title": "Load ControlNet Model (diff)"
}
},
"save_image_websocket_node": {
"inputs": {
"images": [
"8",
0
]
},
"class_type": "SaveImageWebsocket",
"_meta": {
"title": "SaveImageWebsocket"
}
}
}

236
prompts/fencer_03.json Normal file
View File

@ -0,0 +1,236 @@
{
"3": {
"inputs": {
"seed": 695262830308132,
"steps": 3,
"cfg": 2,
"sampler_name": "dpmpp_sde",
"scheduler": "karras",
"denoise": 1,
"model": [
"32",
0
],
"positive": [
"22",
0
],
"negative": [
"22",
1
],
"latent_image": [
"5",
0
]
},
"class_type": "KSampler",
"_meta": {
"title": "KSampler"
}
},
"4": {
"inputs": {
"ckpt_name": "dreamshaperXL_sfwLightningDPMSDE.safetensors"
},
"class_type": "CheckpointLoaderSimple",
"_meta": {
"title": "Load Checkpoint"
}
},
"5": {
"inputs": {
"width": 1024,
"height": 1024,
"batch_size": 1
},
"class_type": "EmptyLatentImage",
"_meta": {
"title": "Empty Latent Image"
}
},
"6": {
"inputs": {
"text": "A fencer in full gear, fencing sword, 1 human, empty background, dark background, dark, empty, 1 sword, sword in hand",
"clip": [
"4",
1
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Positive)"
}
},
"8": {
"inputs": {
"samples": [
"3",
0
],
"vae": [
"4",
2
]
},
"class_type": "VAEDecode",
"_meta": {
"title": "VAE Decode"
}
},
"17": {
"inputs": {
"image": "ef2d127de37b942baad06145e54b0c619a1f22327b2ebbcfbec78f5564afe39d.png",
"upload": "image"
},
"class_type": "LoadImage",
"_meta": {
"title": "Load Image"
}
},
"22": {
"inputs": {
"strength": 0.98,
"start_percent": 0,
"end_percent": 1,
"positive": [
"6",
0
],
"negative": [
"40",
0
],
"control_net": [
"43",
0
],
"image": [
"17",
0
],
"vae": [
"4",
2
]
},
"class_type": "ControlNetApplyAdvanced",
"_meta": {
"title": "Apply ControlNet"
}
},
"28": {
"inputs": {
"ipadapter_file": "ip-adapter-plus_sdxl_vit-h.safetensors"
},
"class_type": "IPAdapterModelLoader",
"_meta": {
"title": "IPAdapter Model Loader"
}
},
"29": {
"inputs": {
"image": "ref_black.png",
"upload": "image"
},
"class_type": "LoadImage",
"_meta": {
"title": "Load Image"
}
},
"31": {
"inputs": {
"clip_name": "CLIP-ViT-H-14-laion2B-s32B-b79K.safetensors"
},
"class_type": "CLIPVisionLoader",
"_meta": {
"title": "Load CLIP Vision"
}
},
"32": {
"inputs": {
"weight": 1.3,
"weight_type": "style and composition",
"combine_embeds": "norm average",
"start_at": 0,
"end_at": 1,
"embeds_scaling": "K+V w/ C penalty",
"model": [
"4",
0
],
"ipadapter": [
"28",
0
],
"image": [
"29",
0
],
"clip_vision": [
"31",
0
]
},
"class_type": "IPAdapterAdvanced",
"_meta": {
"title": "IPAdapter Advanced"
}
},
"40": {
"inputs": {
"text": "blurry, drawing, horror, distorted, malformed, naked, cartoon, anime, out of focus, dull, muted colors, boring pose, no action, distracting background, colorful, (face:5.0), bad hand, (bad anatomy:5.0), worst quality, ai generated images, low quality, average quality, smoke, background, three arms, three hands, white light, (light:5.0), (shadow:5.0), (floor:5.0), 2 sword, multiple sword\n\nembedding:ac_neg1, embedding:ac_neg2, embedding:badhandv4, embedding:DeepNegative_xl_v1, embedding:NEGATIVE_HANDS, embedding:negativeXL_D, embedding:'unaestheticXL_cbp62 -neg.safetensors', embedding:verybadimagenegative_v1.3, embedding:ziprealism_neg, ",
"clip": [
"4",
1
]
},
"class_type": "CLIPTextEncode",
"_meta": {
"title": "CLIP Text Encode (Negative)"
}
},
"43": {
"inputs": {
"control_net_name": "diffusion_pytorch_model.safetensors",
"model": [
"4",
0
]
},
"class_type": "DiffControlNetLoader",
"_meta": {
"title": "Load ControlNet Model (diff)"
}
},
"save_image_websocket_node": {
"inputs": {
"images": [
"51",
0
]
},
"class_type": "SaveImageWebsocket",
"_meta": {
"title": "SaveImageWebsocket"
}
},
"51": {
"inputs": {
"model": "silueta",
"alpha_matting": "true",
"alpha_matting_foreground_threshold": 240,
"alpha_matting_background_threshold": 20,
"alpha_matting_erode_size": 10,
"post_process_mask": "false",
"images": [
"8",
0
]
},
"class_type": "ImageSegmentation",
"_meta": {
"title": "ImageSegmentation"
}
}
}

85
prompts/test.json Normal file
View File

@ -0,0 +1,85 @@
{
"3": {
"class_type": "KSampler",
"inputs": {
"cfg": 8,
"denoise": 1,
"latent_image": [
"5",
0
],
"model": [
"4",
0
],
"negative": [
"7",
0
],
"positive": [
"6",
0
],
"sampler_name": "euler",
"scheduler": "normal",
"seed": 8566257,
"steps": 20
}
},
"4": {
"class_type": "CheckpointLoaderSimple",
"inputs": {
"ckpt_name": "counterfeitxl_v25.safetensors"
}
},
"5": {
"class_type": "EmptyLatentImage",
"inputs": {
"batch_size": 1,
"height": 1024,
"width": 1024
}
},
"6": {
"class_type": "CLIPTextEncode",
"inputs": {
"clip": [
"4",
1
],
"text": "masterpiece best quality girl"
}
},
"7": {
"class_type": "CLIPTextEncode",
"inputs": {
"clip": [
"4",
1
],
"text": "bad hands"
}
},
"8": {
"class_type": "VAEDecode",
"inputs": {
"samples": [
"3",
0
],
"vae": [
"4",
2
]
}
},
"save_image_websocket_node": {
"class_type": "SaveImageWebsocket",
"inputs": {
"images": [
"8",
0
]
}
}
}

BIN
ref.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 192 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 178 KiB

View File

@ -0,0 +1,8 @@
ws = websocket.WebSocket()
ws.connect("ws://{}/ws?clientId={}".format(server_address, client_id))
images = get_images(ws, prompt)
for node_id in images:
for idx, image_data in enumerate(images[node_id]):
image = Image.open(io.BytesIO(image_data))
image_path = os.path.join(info['expo_raw_sd_dir'], f"{node_id}_{idx}.png")
image.save(image_path)

View File

@ -1,13 +1,15 @@
#This is an example that uses the websockets api and the SaveImageWebsocket node to get images directly without #This is an example that uses the websockets api and the SaveImageWebsocket node to get images directly without
#them being saved to disk #them being saved to disk
import random
import websocket #NOTE: websocket-client (https://github.com/websocket-client/websocket-client) import websocket #NOTE: websocket-client (https://github.com/websocket-client/websocket-client)
import uuid import uuid
import json import json
import urllib.request import urllib.request
import urllib.parse import urllib.parse
server_address = "127.0.0.1:8188" info = json.load(open('info.json'))
server_address = info['comfyui_address']
client_id = str(uuid.uuid4()) client_id = str(uuid.uuid4())
def queue_prompt(prompt): def queue_prompt(prompt):
@ -81,15 +83,15 @@ prompt_text = """
"4": { "4": {
"class_type": "CheckpointLoaderSimple", "class_type": "CheckpointLoaderSimple",
"inputs": { "inputs": {
"ckpt_name": "v1-5-pruned-emaonly.safetensors" "ckpt_name": "counterfeitxl_v25.safetensors"
} }
}, },
"5": { "5": {
"class_type": "EmptyLatentImage", "class_type": "EmptyLatentImage",
"inputs": { "inputs": {
"batch_size": 1, "batch_size": 1,
"height": 512, "height": 1024,
"width": 512 "width": 1024
} }
}, },
"6": { "6": {
@ -137,23 +139,33 @@ prompt_text = """
} }
""" """
prompt = json.loads(prompt_text)
#set the text prompt for our positive CLIPTextEncode #set the text prompt for our positive CLIPTextEncode
prompt["6"]["inputs"]["text"] = "masterpiece best quality man"
#set the seed for our KSampler node #set the seed for our KSampler node
prompt["3"]["inputs"]["seed"] = 5
import os
from PIL import Image
import io
prompt = json.loads(open("./prompts/fencer_02.json", "r", encoding="utf-8").read())
# prompt["6"]["inputs"]["text"] = "masterpiece best quality man"
prompt["3"]["inputs"]["seed"] = random.randint(0, 10000000000)
ws = websocket.WebSocket() ws = websocket.WebSocket()
ws.connect("ws://{}/ws?clientId={}".format(server_address, client_id)) ws.connect("ws://{}/ws?clientId={}".format(server_address, client_id))
images = get_images(ws, prompt) images = get_images(ws, prompt)
for node_id in images:
for idx, image_data in enumerate(images[node_id]):
image = Image.open(io.BytesIO(image_data))
image_path = os.path.join(info['expo_raw_sd_dir'], f"{node_id}_{idx}.png")
image.save(image_path)
#Commented out code to display the output images: # #Commented out code to display the output images:
# for node_id in images: # for node_id in images:
# for image_data in images[node_id]: # for idx, image_data in enumerate(images[node_id]):
# from PIL import Image # from PIL import Image
# import io # import io
# image = Image.open(io.BytesIO(image_data)) # image = Image.open(io.BytesIO(image_data))
# image.show() # image_path = os.path.join(output_dir, f"{node_id}_{idx}.png")
# image.save(image_path)

View File

@ -1,3 +1,4 @@
import json
from typing import List from typing import List
import numpy as np import numpy as np
import math import math
@ -62,9 +63,30 @@ class Keypoint:
self.y = y self.y = y
self.confidence = confidence self.confidence = confidence
def __repr__(self): def __repr__(self):
return f"Keypoint(x={self.x}, y={self.y}, confidence={self.confidence})" return f"Keypoint(x={self.x}, y={self.y}, confidence={self.confidence})"
class KeypointEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, Keypoint):
return {'x': obj.x, 'y': obj.y, 'confidence': obj.confidence}
return super().default(obj)
class SkeletonEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, Skeleton):
return {'keypoints': obj.keypoints}
return super().default(obj)
class Encoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, Skeleton):
return SkeletonEncoder().default(obj)
elif isinstance(obj, Keypoint):
return KeypointEncoder().default(obj)
return super().default(obj)
class Skeleton: class Skeleton:
def __init__(self, keypoints: List[Keypoint]): def __init__(self, keypoints: List[Keypoint]):
self.keypoints = keypoints self.keypoints = keypoints
@ -158,7 +180,7 @@ def draw_bodypose(canvas: ndarray, keypoints: List[Keypoint], limbSeq, colors, x
keypoint1 = keypoints[k1_index] keypoint1 = keypoints[k1_index]
keypoint2 = keypoints[k2_index] keypoint2 = keypoints[k2_index]
if keypoint1 is None or keypoint2 is None or keypoint1.confidence == 0 or keypoint2.confidence == 0: if keypoint1 is None or keypoint2 is None or keypoint1.confidence == 0 or keypoint2.confidence == 0 or keypoint1.x <= 0 or keypoint1.y <= 0 or keypoint2.x <= 0 or keypoint2.y <= 0:
# if keypoint1 is None or keypoint1.confidence == 0: # if keypoint1 is None or keypoint1.confidence == 0:
# print(f"keypoint failed: {k1_index}") # print(f"keypoint failed: {k1_index}")
# if keypoint2 is None or keypoint2.confidence == 0: # if keypoint2 is None or keypoint2.confidence == 0:
@ -175,7 +197,7 @@ def draw_bodypose(canvas: ndarray, keypoints: List[Keypoint], limbSeq, colors, x
cv2.fillConvexPoly(canvas, polygon, [int(float(c) * 0.6) for c in color]) cv2.fillConvexPoly(canvas, polygon, [int(float(c) * 0.6) for c in color])
for keypoint, color in zip(keypoints, colors): for keypoint, color in zip(keypoints, colors):
if keypoint is None or keypoint.confidence == 0: if keypoint is None or keypoint.confidence == 0 or keypoint.x <= 0 or keypoint.y <= 0:
continue continue
x, y = keypoint.x, keypoint.y x, y = keypoint.x, keypoint.y

1
tempCodeRunnerFile.py Normal file
View File

@ -0,0 +1 @@
750

8321
test.json Normal file

File diff suppressed because it is too large Load Diff

BIN
test.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.5 MiB

BIN
yolo11n-pose.pt Normal file

Binary file not shown.

BIN
yolo11x-pose.pt Normal file

Binary file not shown.

109
yolo_main.py Normal file
View File

@ -0,0 +1,109 @@
import json
import concurrent
import pandas as pd
from ultralytics import YOLO
import os
import skeleton_lib as skel
import torch
def point_in_box(point, box):
x, y = point
x1, y1, x2, y2 = box
return x1 <= x <= x2 and y1 <= y <= y2
def load_lerped_keypoints(lerped_keypoints_path):
with open(lerped_keypoints_path, 'r') as f:
return json.load(f)
def get_valid_skeletons(data, data_i, boxes, keypoints):
valid_skeletons = [skel.Skeleton([]) for _ in range(2)]
for avg_i, avg in enumerate(data[data_i]):
for i, box in enumerate(boxes.xyxy.tolist()):
if point_in_box((avg['x'], avg['y']), box):
skeleton = skel.Skeleton([])
for j, keypoint in enumerate(keypoints.xy[i]):
keypoint = keypoint.tolist() + [keypoints.conf[i][j].item()]
skeleton.keypoints.append(skel.Keypoint(keypoint[0], keypoint[1], keypoint[2]))
valid_skeletons[avg_i] = skeleton
break
return valid_skeletons
def get_yoloed_frames(results, lerped_keypoints_path):
frames = []
data = load_lerped_keypoints(lerped_keypoints_path)
for data_i, result in enumerate(results):
boxes = result.boxes # Boxes object for bounding box outputs
keypoints = result.keypoints # Keypoints object for pose outputs
frames.append(get_valid_skeletons(data, data_i, boxes, keypoints))
return frames
def process_clip(row, model):
clip_name = row
input_video_path = f"video_frames/{clip_name}"
lerped_keypoints_path = f"./lerped_keypoints/{clip_name}.json"
output_keypoints_path = f"./new_yolo_keypoints/{clip_name}.json"
# Ensure the folders exist
os.makedirs(os.path.dirname(lerped_keypoints_path), exist_ok=True)
os.makedirs(os.path.dirname(output_keypoints_path), exist_ok=True)
# return if the file already exists
if os.path.exists(output_keypoints_path):
return
results = model(input_video_path)
frames = get_yoloed_frames(results, lerped_keypoints_path)
# Write to JSON file
with open(output_keypoints_path, 'w') as f:
json.dump(frames, f, cls=skel.Encoder, indent=4)
def process_rows_on_gpu(rows, model, device):
for row in rows:
for _ in range(5):
try:
process_clip(row, model)
except Exception as e:
print(f"Error processing clip: {e}")
del model
model = YOLO("yolo11x-pose.pt").to(device)
continue
break
def gen_yolo_skeletons(descriptor):
num_gpus = torch.cuda.device_count()
ClipName_list = descriptor['ClipName'].tolist()
finished_list = os.listdir('./new_yolo_keypoints')
ClipName_list = [clip for clip in ClipName_list if clip + '.json' not in finished_list]
rows_per_gpu = len(ClipName_list) // num_gpus
models = [YOLO("yolo11x-pose.pt").to(torch.device(f'cuda:{i}')) for i in range(num_gpus)]
with concurrent.futures.ThreadPoolExecutor(max_workers=num_gpus) as executor:
futures = []
for i in range(num_gpus):
start_idx = i * rows_per_gpu
end_idx = (i + 1) * rows_per_gpu if i != num_gpus - 1 else len(ClipName_list)
gpu_rows = ClipName_list[start_idx:end_idx]
futures.append(executor.submit(process_rows_on_gpu, gpu_rows, models[i], torch.device(f'cuda:{i}')))
for future in concurrent.futures.as_completed(futures):
try:
future.result()
except Exception as e:
print(f"Error processing rows on GPU: {e}")
def main():
model = YOLO("yolo11x-pose.pt") # pretrained YOLO11n model
descriptor = pd.read_csv('./ClipDescriptorKaggle_processed.csv')
avg_keypoints_folder = './avg_keypoints'
gen_yolo_skeletons(descriptor)
# count number of files in the "./new_yolo_keypoints"
# print(f"Number of files in {"./new_yolo_keypoints"}: {len(os.listdir("./new_yolo_keypoints"))}")
if __name__ == "__main__":
main()