File size: 7,924 Bytes
177bbfb 8fe7c93 177bbfb a658e6a 177bbfb a658e6a 177bbfb 6302caf 177bbfb 5f09c0c 177bbfb 61cf60c 177bbfb fe6938d 177bbfb 6de1fe3 177bbfb 8fe7c93 a658e6a 8fe7c93 6de1fe3 8fe7c93 177bbfb a658e6a 177bbfb a658e6a 177bbfb a658e6a 177bbfb a658e6a 177bbfb a658e6a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 | import gradio as gr
import cv2
import mediapipe as mp
import numpy as np
import os
# Initialize MediaPipe Pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)
# Subset of landmarks to visualize
landmark_indices = [
11, 13, 15, # Left shoulder, elbow, wrist
12, 14, 16, # Right shoulder, elbow, wrist
23, 25, 27, # Left hip, knee, ankle
24, 26, 28 # Right hip, knee, ankle
]
# Function to calculate frame similarity for the selected landmarks
def calculate_frame_similarity(ref_landmarks, inp_landmarks):
ref = np.array([(ref_landmarks[idx].x, ref_landmarks[idx].y) for idx in landmark_indices])
inp = np.array([(inp_landmarks[idx].x, inp_landmarks[idx].y) for idx in landmark_indices])
ref_torso = np.linalg.norm(ref[0][:2] - ref[6][:2])
inp_torso = np.linalg.norm(inp[0][:2] - inp[6][:2])
ref_normalized = ref / ref_torso
inp_normalized = inp / inp_torso
distance = np.linalg.norm(ref_normalized - inp_normalized)
similarity_score = max(0, 1 - distance)
return similarity_score
# Function to draw selected landmarks
def draw_selected_landmarks(frame, landmarks):
connections = [
(11, 13), (13, 15),
(12, 14), (14, 16),
(23, 25), (25, 27),
(24, 26), (26, 28)
]
for idx in landmark_indices:
landmark = landmarks[idx]
if landmark.visibility > 0.5:
x = int(landmark.x * frame.shape[1])
y = int(landmark.y * frame.shape[0])
cv2.circle(frame, (x, y), 5, (0, 255, 0), -1)
for start_idx, end_idx in connections:
start = landmarks[start_idx]
end = landmarks[end_idx]
if start.visibility > 0.5 and end.visibility > 0.5:
start_point = (int(start.x * frame.shape[1]), int(start.y * frame.shape[0]))
end_point = (int(end.x * frame.shape[1]), int(end.y * frame.shape[0]))
cv2.line(frame, start_point, end_point, (255, 0, 0), 2)
# Function to get color based on similarity score
def get_similarity_color(similarity_score):
if similarity_score < 0.25:
return (0, 0, 255)
elif similarity_score < 0.75:
return (0, 255, 255)
else:
return (0, 255, 0)
def generate_feedback(ref_landmarks, inp_landmarks, diff_threshold=0.05):
# Mapping of landmark indices to joint names
landmark_names = {
11: "left shoulder", 13: "left elbow", 15: "left wrist",
12: "right shoulder", 14: "right elbow", 16: "right wrist",
23: "left hip", 25: "left knee", 27: "left ankle",
24: "right hip", 26: "right knee", 28: "right ankle"
}
max_diff = 0
max_idx = None
# Compare each selected landmark
for idx in landmark_indices:
ref_point = np.array([ref_landmarks[idx].x, ref_landmarks[idx].y])
inp_point = np.array([inp_landmarks[idx].x, inp_landmarks[idx].y])
diff = np.linalg.norm(ref_point - inp_point)
if diff > max_diff:
max_diff = diff
max_idx = idx
advice = ""
if max_idx is not None and max_diff > diff_threshold:
joint = landmark_names.get(max_idx, f"landmark {max_idx}")
ref_point = np.array([ref_landmarks[max_idx].x, ref_landmarks[max_idx].y])
inp_point = np.array([inp_landmarks[max_idx].x, inp_landmarks[max_idx].y])
advice = f"Your {joint} seems misaligned. "
# Vertical adjustment (note: in image coordinates, a larger y means lower)
if inp_point[1] > ref_point[1] + diff_threshold:
advice += f"Try raising your {joint}."
elif inp_point[1] < ref_point[1] - diff_threshold:
advice += f"Try lowering your {joint}."
# Horizontal adjustment
if inp_point[0] < ref_point[0] - diff_threshold:
advice += f" Also, move it to the right."
elif inp_point[0] > ref_point[0] + diff_threshold:
advice += f" Also, move it to the left."
return advice
# Function to process videos and generate comparison
def process_videos(reference_video_path, input_video_path):
output_video_path = "output_comparison.mp4"
low_similarity_frames = []
cap_ref = cv2.VideoCapture(reference_video_path)
cap_inp = cv2.VideoCapture(input_video_path)
fps = int(cap_ref.get(cv2.CAP_PROP_FPS))
min_frame_gap = fps # at least one second gap; adjust this as needed
frame_index = 0
last_saved_frame_index = -min_frame_gap
ref_width = int(cap_ref.get(cv2.CAP_PROP_FRAME_WIDTH))
ref_height = int(cap_ref.get(cv2.CAP_PROP_FRAME_HEIGHT))
inp_width = int(cap_inp.get(cv2.CAP_PROP_FRAME_WIDTH))
inp_height = int(cap_inp.get(cv2.CAP_PROP_FRAME_HEIGHT))
output_width = max(ref_width, inp_width)
output_height = max(ref_height, inp_height)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (2 * output_width, output_height))
while cap_ref.isOpened() and cap_inp.isOpened():
ret_ref, frame_ref = cap_ref.read()
ret_inp, frame_inp = cap_inp.read()
if not ret_ref or not ret_inp:
break
frame_ref_resized = cv2.resize(frame_ref, (output_width, output_height))
frame_inp_resized = cv2.resize(frame_inp, (output_width, output_height))
frame_ref_rgb = cv2.cvtColor(frame_ref_resized, cv2.COLOR_BGR2RGB)
frame_inp_rgb = cv2.cvtColor(frame_inp_resized, cv2.COLOR_BGR2RGB)
results_ref = pose.process(frame_ref_rgb)
results_inp = pose.process(frame_inp_rgb)
similarity_score = 0
feedback = "";
if results_ref.pose_landmarks and results_inp.pose_landmarks:
draw_selected_landmarks(frame_ref_resized, results_ref.pose_landmarks.landmark)
draw_selected_landmarks(frame_inp_resized, results_inp.pose_landmarks.landmark)
similarity_score = calculate_frame_similarity(
results_ref.pose_world_landmarks.landmark,
results_inp.pose_world_landmarks.landmark
)
if (similarity_score<0.75) :
feedback = generate_feedback(results_ref.pose_landmarks.landmark, results_inp.pose_landmarks.landmark)
similarity_color = get_similarity_color(similarity_score)
combined_frame = cv2.hconcat([frame_ref_resized, frame_inp_resized])
cv2.putText(
combined_frame,
f"Similarity: {similarity_score*100:.2f}",
(50, 50),
cv2.FONT_HERSHEY_SIMPLEX,
2,
similarity_color,
2
)
cv2.putText(
combined_frame,
"Feedback: " + feedback,
(50,100),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(255,255,255),
2
)
out.write(combined_frame)
if similarity_score < 0.5 and (frame_index - last_saved_frame_index) >= min_frame_gap:
low_similarity_frames.append(cv2.cvtColor(combined_frame.copy(), cv2.COLOR_BGR2RGB))
last_saved_frame_index = frame_index
frame_index += 1
cap_ref.release()
cap_inp.release()
out.release()
return output_video_path, low_similarity_frames
# Gradio interface
def compare_dance_videos(reference_video, input_video):
output_path, low_similarity_frames = process_videos(reference_video, input_video)
return output_path, low_similarity_frames
# Gradio app setup
gr.Interface(
fn=compare_dance_videos,
inputs=[
gr.Video(label="Reference Video"),
gr.Video(label="Input Video")
],
outputs=[
gr.Video(label="Output Comparison Video"),
gr.Gallery(label="Low Similarity Frames")
],
title="Dance Comparison Tool",
description="Upload two videos to compare the dance sequences and generate a similarity score visualization."
).launch() |