| | |
| | import mmpose |
| | from mmpose.apis import MMPoseInferencer |
| |
|
| | |
| | from ultralytics import YOLO |
| | import torch |
| |
|
| | |
| | import gradio as gr |
| |
|
| | |
| | import os |
| | import glob |
| | import uuid |
| |
|
| | |
| | import numpy as np |
| | import cv2 |
| |
|
| | print("[INFO]: Imported modules!") |
| | human = MMPoseInferencer("human") |
| | hand = MMPoseInferencer("hand") |
| | human3d = MMPoseInferencer(pose3d="human3d") |
| | track_model = YOLO('yolov8n.pt') |
| |
|
| | |
| |
|
| | |
| |
|
| | |
| | inferencers = {"Estimate human 2d poses":human, "Estimate human 2d hand poses":hand, "Estimate human 3d poses":human3d, "Detect and track":track_model} |
| |
|
| | print("[INFO]: Downloaded models!") |
| |
|
| | def tracking(video, model, boxes=True): |
| | print("[INFO] Loading model...") |
| | |
| |
|
| | |
| | print("[INFO] Starting tracking!") |
| | |
| | annotated_frame = model(video, boxes=boxes) |
| |
|
| | return annotated_frame |
| |
|
| | def show_tracking(video_content, vis_out_dir, model): |
| | video = cv2.VideoCapture(video_content) |
| |
|
| | |
| | video_track = tracking(video_content, model.track) |
| |
|
| | |
| | |
| | out_file = "track.mp4" |
| | print("[INFO]: TRACK", out_file) |
| |
|
| | fourcc = cv2.VideoWriter_fourcc(*"mp4v") |
| | fps = video.get(cv2.CAP_PROP_FPS) |
| | height, width, _ = video_track[0][0].orig_img.shape |
| | size = (width,height) |
| |
|
| | out_track = cv2.VideoWriter(out_file, fourcc, fps, size) |
| |
|
| | |
| | for frame_track in video_track: |
| | result_track = frame_track[0].plot() |
| | out_track.write(result_track) |
| | |
| | print("[INFO] Done with frames") |
| | |
| | |
| |
|
| | out_track.release() |
| |
|
| | video.release() |
| | cv2.destroyAllWindows() |
| |
|
| | return out_file |
| |
|
| |
|
| | def poses(inferencer, video, vis_out_dir, kpt_thr): |
| | print("[INFO] VIDEO INPUT: ", video) |
| | result_generator = inferencer(video, |
| | vis_out_dir = vis_out_dir, |
| | return_vis=True, |
| | thickness=2, |
| | rebase_keypoint_height=True, |
| | |
| | device="cuda" |
| | ) |
| | |
| | result = [result for result in result_generator] |
| |
|
| | out_file = glob.glob(os.path.join(vis_out_dir, "*.mp4")) |
| |
|
| | return out_file |
| |
|
| | def infer(video, check, kpt_thr, webcam=True): |
| | print("[INFO] VIDEO INPUT: ", video) |
| |
|
| | |
| | out_files=[] |
| |
|
| | for i in check: |
| | |
| | vis_out_dir = str(uuid.uuid4()) |
| | inferencer = inferencers[i] |
| |
|
| | if i == "Detect and track": |
| | |
| | trackfile = show_tracking(video, vis_out_dir, inferencer) |
| | |
| | else: |
| | if webcam==True: |
| | print("WEBCAM") |
| | add_dir = str(uuid.uuid4()) |
| | vidname = video.split("/")[-1] |
| | vis_out_dir_web = "/".join(["/".join(video.split("/")[:-1]), add_dir]) |
| | out_file = poses(inferencer, video, vis_out_dir_web, kpt_thr) |
| | fullname = os.path.join(vis_out_dir_web, vidname) |
| | |
| | |
| | |
| | |
| | out_files.append(fullname) |
| |
|
| | else: |
| | out_files.extend(out_file) |
| | |
| | print(out_files) |
| |
|
| | return "track.mp4", out_files[0], out_files[1], out_files[2] |
| |
|
| | def run(): |
| | |
| | check_web = gr.CheckboxGroup(choices = ["Detect and track", "Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses"], label="Methods", type="value", info="Select the model(s) you want") |
| | check_file = gr.CheckboxGroup(choices = ["Detect and track", "Estimate human 2d poses", "Estimate human 2d hand poses", "Estimate human 3d poses"], label="Methods", type="value", info="Select the model(s) you want") |
| | |
| | description = """ |
| | \n\nHere you can upload videos or record one with your webcam and track objects or detect bodyposes in 2d and 3d. |
| | """ |
| |
|
| | |
| | web_kpthr = gr.Slider(0, 1, value=0.3) |
| | file_kpthr = gr.Slider(0, 1, value=0.3) |
| | |
| | webcam = gr.Interface( |
| | fn=infer, |
| | inputs= [gr.Video(source="webcam", height=512), check_web, web_kpthr], |
| | outputs = [gr.Video(format='mp4', height=512, label="Detect and track", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d hand poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)], |
| | title = 'Tracking and pose estimation', |
| | description = description, |
| | allow_flagging=False |
| | ) |
| |
|
| | file = gr.Interface( |
| | infer, |
| | inputs = [gr.Video(source="upload", height=512), check_file, file_kpthr], |
| | outputs = [gr.Video(format='mp4', height=512, label="Detect and track", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 2d hand poses", show_label=True), gr.PlayableVideo(height=512, label = "Estimate human 3d poses", show_label=True)], |
| | title = 'Tracking and pose estimation', |
| | description = description, |
| | allow_flagging=False |
| | ) |
| |
|
| | demo = gr.TabbedInterface( |
| | interface_list=[file, webcam], |
| | tab_names=["From a File", "From your Webcam"] |
| | ) |
| |
|
| | demo.launch(server_name="0.0.0.0", server_port=7860) |
| |
|
| |
|
| | if __name__ == "__main__": |
| | run() |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| |
|
| | |
| | |
| |
|
| |
|
| |
|