File size: 3,775 Bytes
39dc633
 
 
 
e876d5a
39dc633
8eff39a
39dc633
e876d5a
8eff39a
39dc633
 
 
 
 
 
 
8eff39a
39dc633
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8eff39a
 
 
 
 
 
 
e876d5a
8eff39a
 
 
 
 
 
e876d5a
8eff39a
e876d5a
 
 
 
 
8eff39a
e876d5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8eff39a
e876d5a
 
 
 
 
 
 
 
8eff39a
e876d5a
 
8eff39a
e876d5a
8eff39a
 
39dc633
e876d5a
8eff39a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import cv2
import mediapipe as mp
import numpy as np
import gradio as gr
import tempfile

# Path to hand landmark model file (make sure it's in your repo!)
MODEL_PATH = "hand_landmarker.task"

# MediaPipe setup
BaseOptions = mp.tasks.BaseOptions
HandLandmarker = mp.tasks.vision.HandLandmarker
HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode
mp_image = mp.Image
mp_format = mp.ImageFormat

# Define hand connections and colors for visualization
HAND_CONNECTIONS = [
    (0, 1), (1, 2), (2, 3), (3, 4),
    (0, 5), (5, 6), (6, 7), (7, 8),
    (0, 9), (9,10), (10,11), (11,12),
    (0,13), (13,14), (14,15), (15,16),
    (0,17), (17,18), (18,19), (19,20)
]

FINGER_COLORS = {
    'thumb': (245, 245, 245),
    'index': (128, 0, 128),
    'middle': (0, 255, 0),
    'ring': (0, 165, 255),
    'pinky': (255, 0, 0),
    'palm': (100, 100, 100)
}

def get_finger_color(start_idx):
    if start_idx in range(0, 5):
        return FINGER_COLORS['thumb']
    elif start_idx in range(5, 9):
        return FINGER_COLORS['index']
    elif start_idx in range(9, 13):
        return FINGER_COLORS['middle']
    elif start_idx in range(13, 17):
        return FINGER_COLORS['ring']
    elif start_idx in range(17, 21):
        return FINGER_COLORS['pinky']
    else:
        return FINGER_COLORS['palm']

def process_video(video_file):
    # Gradio may send a dict or path string depending on how input is passed
    if isinstance(video_file, dict):
        video_path = video_file["name"]
    else:
        video_path = video_file

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError("Could not open video.")

    fps = cap.get(cv2.CAP_PROP_FPS) or 24
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Prepare output video path
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    tmp_out = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
    out_path = tmp_out.name
    out = cv2.VideoWriter(out_path, fourcc, fps, (w, h))

    # Load hand detection model
    options = HandLandmarkerOptions(
        base_options=BaseOptions(model_asset_path=MODEL_PATH),
        running_mode=VisionRunningMode.IMAGE,
        num_hands=2,
        min_hand_detection_confidence=0.5,
        min_hand_presence_confidence=0.5,
        min_tracking_confidence=0.5
    )

    with HandLandmarker.create_from_options(options) as landmarker:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            mp_img = mp_image(image_format=mp_format.SRGB, data=rgb_frame)
            results = landmarker.detect(mp_img)

            if results.hand_landmarks:
                for hand_landmarks in results.hand_landmarks:
                    points = [(int(lm.x * w), int(lm.y * h)) for lm in hand_landmarks]
                    for start, end in HAND_CONNECTIONS:
                        color = get_finger_color(start)
                        cv2.line(frame, points[start], points[end], color, 2)
                    for x, y in points:
                        cv2.circle(frame, (x, y), 4, (0, 255, 255), -1)

            out.write(frame)

    cap.release()
    out.release()
    return out_path

# Gradio app interface
demo = gr.Interface(
    fn=process_video,
    inputs=gr.Video(label="Upload Video or Record via Webcam"),
    outputs=gr.Video(label="Hand Landmark Annotated Video"),
    title="🖐️ Hand Detection using MediaPipe",
    description="Upload a video or record from webcam. The system will detect hands and annotate keypoints using MediaPipe HandLandmarker."
)

if __name__ == "__main__":
    demo.launch()