prakasa1234 commited on
Commit
ff714e1
·
verified ·
1 Parent(s): b9c1b8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -52
app.py CHANGED
@@ -1,46 +1,24 @@
1
  import os
2
- import io
3
  import cv2
4
  import numpy as np
5
  import torch
6
- import requests
7
- from PIL import Image
8
  from ultralytics import YOLO
9
  import gradio as gr
10
- from mediapipe.tasks import python
11
- from mediapipe.tasks.python import vision
12
- from mediapipe.tasks.python.vision import Image as MPImage
13
  import traceback
14
 
15
  # -----------------------------
16
- # 1. Paths & URLs
17
  # -----------------------------
18
- HAND_MODEL_PATH = "hand_landmarker.task"
19
- HAND_MODEL_URL = "https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task"
20
  YOLO_MODEL_PATH = "yolov11n_finetuned_ASL.pt" # Push this small model to HF repo
21
 
22
  # -----------------------------
23
- # 2. Download MediaPipe model if missing
24
- # -----------------------------
25
- if not os.path.exists(HAND_MODEL_PATH):
26
- print("📥 Downloading MediaPipe hand landmark model...")
27
- r = requests.get(HAND_MODEL_URL)
28
- with open(HAND_MODEL_PATH, "wb") as f:
29
- f.write(r.content)
30
- print("✅ Download complete.")
31
-
32
- # -----------------------------
33
- # 3. Load models
34
  # -----------------------------
35
  yolo_model = YOLO(YOLO_MODEL_PATH)
36
  yolo_model.eval()
37
 
38
- base_options = python.BaseOptions(model_asset_path=HAND_MODEL_PATH)
39
- hand_options = vision.HandLandmarkerOptions(base_options=base_options, num_hands=1)
40
- detector = vision.HandLandmarker.create_from_options(hand_options)
41
-
42
  # -----------------------------
43
- # 4. Inference function
44
  # -----------------------------
45
  def predict_asl(image):
46
  try:
@@ -51,36 +29,13 @@ def predict_asl(image):
51
  h, w, _ = img.shape
52
  print(f"🔹 Uploaded image shape: {img.shape}, dtype: {img.dtype}")
53
 
54
- # --- MediaPipe annotation ---
55
- try:
56
- # Convert OpenCV BGR -> RGB
57
- img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
58
-
59
- # PIL + BytesIO to create MediaPipe image
60
- pil_img = Image.fromarray(img_rgb)
61
- buf = io.BytesIO()
62
- pil_img.save(buf, format="PNG")
63
- buf.seek(0)
64
-
65
- mp_img = MPImage.create_from_file(buf)
66
-
67
- detection_result = detector.detect(mp_img)
68
- if detection_result.hand_landmarks:
69
- for hand_landmarks in detection_result.hand_landmarks:
70
- for landmark in hand_landmarks:
71
- x, y = int(landmark.x * w), int(landmark.y * h)
72
- cv2.circle(img, (x, y), 3, (0, 255, 0), -1)
73
- except Exception as e:
74
- print("❌ MediaPipe annotation error:", e)
75
- traceback.print_exc()
76
-
77
  # --- YOLO prediction directly on NumPy array ---
78
  results = yolo_model.predict(img, imgsz=300, verbose=False)[0]
79
  pred_idx = results.probs.top1
80
  pred_label = results.names[pred_idx]
81
  confidence = results.probs.data[pred_idx].item()
82
 
83
- # Overlay prediction text
84
  cv2.putText(
85
  img,
86
  f"{pred_label} ({confidence:.2f})",
@@ -100,16 +55,16 @@ def predict_asl(image):
100
  return image, "Error", 0.0
101
 
102
  # -----------------------------
103
- # 5. Gradio Interface
104
  # -----------------------------
105
  title = "🖐️ ASL Letter Classifier"
106
- description = "Upload a hand sign image and see the predicted letter and confidence.\nMediaPipe landmarks will be annotated automatically."
107
 
108
  iface = gr.Interface(
109
  fn=predict_asl,
110
  inputs=gr.Image(type="numpy"),
111
  outputs=[
112
- gr.Image(type="numpy", label="Annotated Image"),
113
  gr.Textbox(label="Predicted Letter"),
114
  gr.Textbox(label="Confidence")
115
  ],
 
1
  import os
 
2
  import cv2
3
  import numpy as np
4
  import torch
 
 
5
  from ultralytics import YOLO
6
  import gradio as gr
 
 
 
7
  import traceback
8
 
9
  # -----------------------------
10
+ # 1. YOLO model path
11
  # -----------------------------
 
 
12
  YOLO_MODEL_PATH = "yolov11n_finetuned_ASL.pt" # Push this small model to HF repo
13
 
14
  # -----------------------------
15
+ # 2. Load YOLO model
 
 
 
 
 
 
 
 
 
 
16
  # -----------------------------
17
  yolo_model = YOLO(YOLO_MODEL_PATH)
18
  yolo_model.eval()
19
 
 
 
 
 
20
  # -----------------------------
21
+ # 3. Inference function
22
  # -----------------------------
23
  def predict_asl(image):
24
  try:
 
29
  h, w, _ = img.shape
30
  print(f"🔹 Uploaded image shape: {img.shape}, dtype: {img.dtype}")
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  # --- YOLO prediction directly on NumPy array ---
33
  results = yolo_model.predict(img, imgsz=300, verbose=False)[0]
34
  pred_idx = results.probs.top1
35
  pred_label = results.names[pred_idx]
36
  confidence = results.probs.data[pred_idx].item()
37
 
38
+ # Overlay prediction text on original image
39
  cv2.putText(
40
  img,
41
  f"{pred_label} ({confidence:.2f})",
 
55
  return image, "Error", 0.0
56
 
57
  # -----------------------------
58
+ # 4. Gradio Interface
59
  # -----------------------------
60
  title = "🖐️ ASL Letter Classifier"
61
+ description = "Upload a hand sign image and see the predicted letter and confidence."
62
 
63
  iface = gr.Interface(
64
  fn=predict_asl,
65
  inputs=gr.Image(type="numpy"),
66
  outputs=[
67
+ gr.Image(type="numpy", label="Original Image with Prediction"),
68
  gr.Textbox(label="Predicted Letter"),
69
  gr.Textbox(label="Confidence")
70
  ],