prakasa1234 commited on
Commit
0a0b7f1
·
verified ·
1 Parent(s): a1f4c0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -90
app.py CHANGED
@@ -1,109 +1,101 @@
1
- from ultralytics import YOLO
2
- import gradio as gr
3
- import cv2
4
  import os
 
5
  import numpy as np
6
- from mediapipe import Image
 
 
 
 
7
  from mediapipe.tasks import python
8
  from mediapipe.tasks.python import vision
9
- import os
10
- import requests
11
 
12
- # Path to save the model locally inside the Space
 
 
13
  HAND_MODEL_PATH = "hand_landmarker.task"
14
-
15
- # URL to download from (official MediaPipe mirror)
16
  HAND_MODEL_URL = "https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task"
17
-
18
- # ---------------------
19
- # Load YOLO model
20
- # ---------------------
21
- MODEL_PATH = "yolov11n_finetuned_ASL.pt"
22
- HAND_MODEL_PATH = "hand_landmarker.task"
23
- OUTPUT_DIR = "annotated_uploads"
24
- os.makedirs(OUTPUT_DIR, exist_ok=True)
25
-
26
- model = YOLO(MODEL_PATH)
27
-
28
- # ---------------------
29
- # Load MediaPipe hand landmark detector
30
- # ---------------------
 
 
 
 
 
 
31
  base_options = python.BaseOptions(model_asset_path=HAND_MODEL_PATH)
32
- options = vision.HandLandmarkerOptions(base_options=base_options, num_hands=1)
33
- detector = vision.HandLandmarker.create_from_options(options)
34
-
35
- # ---------------------
36
- # Helper: Annotate image with landmarks
37
- # ---------------------
38
- def annotate_with_mediapipe(image_path):
39
- img = cv2.imread(image_path)
40
- if img is None:
41
- return image_path # fallback
42
-
43
- mp_image = Image.create_from_file(image_path)
 
 
 
 
44
  detection_result = detector.detect(mp_image)
45
-
46
  if detection_result.hand_landmarks:
47
  for hand_landmarks in detection_result.hand_landmarks:
48
  for landmark in hand_landmarks:
49
- h, w, _ = img.shape
50
  x, y = int(landmark.x * w), int(landmark.y * h)
51
- cv2.circle(img, (x, y), 3, (0, 255, 0), -1) # green points
52
-
53
- annotated_path = os.path.join(OUTPUT_DIR, os.path.basename(image_path))
54
- cv2.imwrite(annotated_path, img)
55
- return annotated_path
56
-
57
- def ensure_hand_model():
58
- """Download the MediaPipe hand landmark model if not already present."""
59
- if not os.path.exists(HAND_MODEL_PATH):
60
- print("📥 Downloading MediaPipe hand landmark model...")
61
- r = requests.get(HAND_MODEL_URL)
62
- with open(HAND_MODEL_PATH, "wb") as f:
63
- f.write(r.content)
64
- print("✅ Download complete.")
65
- else:
66
- print("✅ MediaPipe hand landmark model already exists.")
67
-
68
- # ---------------------
69
- # Prediction function
70
- # ---------------------
71
- def predict(image):
72
- # Save uploaded image temporarily
73
- temp_path = "temp_upload.jpg"
74
- image.save(temp_path)
75
-
76
- # Step 1: Annotate
77
- annotated_path = annotate_with_mediapipe(temp_path)
78
-
79
- # Step 2: Run YOLO prediction
80
- results = model.predict(annotated_path, imgsz=300, verbose=False)[0]
81
- probs = results.probs
82
- top_idx = probs.top1
83
- top_label = results.names[top_idx]
84
- confidence = probs.data[top_idx].item()
85
-
86
- # Step 3: Load annotated image for display
87
- annotated_img = cv2.imread(annotated_path)
88
- annotated_img = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)
89
-
90
- return annotated_img, {top_label: confidence}
91
-
92
- # ---------------------
93
- # Build Gradio Interface
94
- # ---------------------
95
- demo = gr.Interface(
96
- fn=predict,
97
- inputs=gr.Image(type="pil", label="Upload a Hand Image"),
98
  outputs=[
99
- gr.Image(label="Annotated Image"),
100
- gr.Label(label="Predicted Letter (Confidence)")
 
101
  ],
102
- title="🖐️ ASL Letter Classifier",
103
- description="Upload an image of a hand gesture. The app will detect landmarks using MediaPipe and classify the ASL letter using YOLOv11.",
104
  allow_flagging="never"
105
  )
106
 
107
  if __name__ == "__main__":
108
- ensure_hand_model()
109
- demo.launch()
 
 
 
 
1
  import os
2
+ import cv2
3
  import numpy as np
4
+ import requests
5
+ import torch
6
+ from ultralytics import YOLO
7
+ import gradio as gr
8
+ from mediapipe import Image as MPImage
9
  from mediapipe.tasks import python
10
  from mediapipe.tasks.python import vision
 
 
11
 
12
+ # -----------------------------
13
+ # 1. Paths & URLs
14
+ # -----------------------------
15
  HAND_MODEL_PATH = "hand_landmarker.task"
 
 
16
  HAND_MODEL_URL = "https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task"
17
+ YOLO_MODEL_PATH = "yolov11n_finetuned_ASL.pt" # Already in repo via Git LFS or small enough
18
+
19
+ # -----------------------------
20
+ # 2. Download MediaPipe model if missing
21
+ # -----------------------------
22
+ if not os.path.exists(HAND_MODEL_PATH):
23
+ print("📥 Downloading MediaPipe hand landmark model...")
24
+ r = requests.get(HAND_MODEL_URL)
25
+ with open(HAND_MODEL_PATH, "wb") as f:
26
+ f.write(r.content)
27
+ print("✅ Download complete.")
28
+
29
+ # -----------------------------
30
+ # 3. Load models
31
+ # -----------------------------
32
+ # YOLO ASL classifier
33
+ yolo_model = YOLO(YOLO_MODEL_PATH)
34
+ yolo_model.eval()
35
+
36
+ # MediaPipe hand landmark detector
37
  base_options = python.BaseOptions(model_asset_path=HAND_MODEL_PATH)
38
+ hand_options = vision.HandLandmarkerOptions(base_options=base_options, num_hands=1)
39
+ detector = vision.HandLandmarker.create_from_options(hand_options)
40
+
41
+ # -----------------------------
42
+ # 4. Inference function
43
+ # -----------------------------
44
+ def predict_asl(image):
45
+ """
46
+ Input: numpy array (H x W x 3) from Gradio
47
+ Output: annotated image, predicted class, confidence
48
+ """
49
+ img = image.copy()
50
+ h, w, _ = img.shape
51
+
52
+ # --- Annotate hand landmarks ---
53
+ mp_image = MPImage.create_from_array(img)
54
  detection_result = detector.detect(mp_image)
 
55
  if detection_result.hand_landmarks:
56
  for hand_landmarks in detection_result.hand_landmarks:
57
  for landmark in hand_landmarks:
 
58
  x, y = int(landmark.x * w), int(landmark.y * h)
59
+ cv2.circle(img, (x, y), 3, (0, 255, 0), -1)
60
+
61
+ # --- YOLO prediction ---
62
+ results = yolo_model.predict(img, imgsz=300, verbose=False)[0]
63
+ pred_idx = results.probs.top1
64
+ pred_label = results.names[pred_idx]
65
+ confidence = results.probs.data[pred_idx].item()
66
+
67
+ # Overlay prediction text
68
+ cv2.putText(
69
+ img,
70
+ f"{pred_label} ({confidence:.2f})",
71
+ (10, 30),
72
+ cv2.FONT_HERSHEY_SIMPLEX,
73
+ 1,
74
+ (0, 0, 255),
75
+ 2,
76
+ cv2.LINE_AA
77
+ )
78
+
79
+ return cv2.cvtColor(img, cv2.COLOR_BGR2RGB), pred_label, round(confidence, 2)
80
+
81
+ # -----------------------------
82
+ # 5. Gradio Interface
83
+ # -----------------------------
84
+ title = "🖐️ ASL Letter Classifier"
85
+ description = "Upload a hand sign image and see the predicted letter and confidence.\nMediaPipe landmarks will be annotated automatically."
86
+
87
+ iface = gr.Interface(
88
+ fn=predict_asl,
89
+ inputs=gr.Image(type="numpy"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  outputs=[
91
+ gr.Image(type="numpy", label="Annotated Image"),
92
+ gr.Textbox(label="Predicted Letter"),
93
+ gr.Textbox(label="Confidence")
94
  ],
95
+ title=title,
96
+ description=description,
97
  allow_flagging="never"
98
  )
99
 
100
  if __name__ == "__main__":
101
+ iface.launch()