Spaces:

prakasa1234
/

ASL_Classifier

Sleeping

App Files Files Community

prakasa1234 commited on Nov 6, 2025

Commit

0a0b7f1

verified ·

1 Parent(s): a1f4c0c

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -90

app.py CHANGED Viewed

@@ -1,109 +1,101 @@
-from ultralytics import YOLO
-import gradio as gr
-import cv2
 import os
 import numpy as np
-from mediapipe import Image
 from mediapipe.tasks import python
 from mediapipe.tasks.python import vision
-import os
-import requests
-# Path to save the model locally inside the Space
 HAND_MODEL_PATH = "hand_landmarker.task"
-# URL to download from (official MediaPipe mirror)
 HAND_MODEL_URL = "https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task"
-# ---------------------
-# Load YOLO model
-# ---------------------
-MODEL_PATH = "yolov11n_finetuned_ASL.pt"
-HAND_MODEL_PATH = "hand_landmarker.task"
-OUTPUT_DIR = "annotated_uploads"
-os.makedirs(OUTPUT_DIR, exist_ok=True)
-model = YOLO(MODEL_PATH)
-# ---------------------
-# Load MediaPipe hand landmark detector
-# ---------------------
 base_options = python.BaseOptions(model_asset_path=HAND_MODEL_PATH)
-options = vision.HandLandmarkerOptions(base_options=base_options, num_hands=1)
-detector = vision.HandLandmarker.create_from_options(options)
-# ---------------------
-# Helper: Annotate image with landmarks
-# ---------------------
-def annotate_with_mediapipe(image_path):
-    img = cv2.imread(image_path)
-    if img is None:
-        return image_path  # fallback
-    mp_image = Image.create_from_file(image_path)
     detection_result = detector.detect(mp_image)
     if detection_result.hand_landmarks:
         for hand_landmarks in detection_result.hand_landmarks:
             for landmark in hand_landmarks:
-                h, w, _ = img.shape
                 x, y = int(landmark.x * w), int(landmark.y * h)
-                cv2.circle(img, (x, y), 3, (0, 255, 0), -1)  # green points
-    annotated_path = os.path.join(OUTPUT_DIR, os.path.basename(image_path))
-    cv2.imwrite(annotated_path, img)
-    return annotated_path
-def ensure_hand_model():
-    """Download the MediaPipe hand landmark model if not already present."""
-    if not os.path.exists(HAND_MODEL_PATH):
-        print("📥 Downloading MediaPipe hand landmark model...")
-        r = requests.get(HAND_MODEL_URL)
-        with open(HAND_MODEL_PATH, "wb") as f:
-            f.write(r.content)
-        print("✅ Download complete.")
-    else:
-        print("✅ MediaPipe hand landmark model already exists.")
-# ---------------------
-# Prediction function
-# ---------------------
-def predict(image):
-    # Save uploaded image temporarily
-    temp_path = "temp_upload.jpg"
-    image.save(temp_path)
-    # Step 1: Annotate
-    annotated_path = annotate_with_mediapipe(temp_path)
-    # Step 2: Run YOLO prediction
-    results = model.predict(annotated_path, imgsz=300, verbose=False)[0]
-    probs = results.probs
-    top_idx = probs.top1
-    top_label = results.names[top_idx]
-    confidence = probs.data[top_idx].item()
-    # Step 3: Load annotated image for display
-    annotated_img = cv2.imread(annotated_path)
-    annotated_img = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)
-    return annotated_img, {top_label: confidence}
-# ---------------------
-# Build Gradio Interface
-# ---------------------
-demo = gr.Interface(
-    fn=predict,
-    inputs=gr.Image(type="pil", label="Upload a Hand Image"),
     outputs=[
-        gr.Image(label="Annotated Image"),
-        gr.Label(label="Predicted Letter (Confidence)")
     ],
-    title="🖐️ ASL Letter Classifier",
-    description="Upload an image of a hand gesture. The app will detect landmarks using MediaPipe and classify the ASL letter using YOLOv11.",
     allow_flagging="never"
 )
 if __name__ == "__main__":
-    ensure_hand_model()
-    demo.launch()

 import os
+import cv2
 import numpy as np
+import requests
+import torch
+from ultralytics import YOLO
+import gradio as gr
+from mediapipe import Image as MPImage
 from mediapipe.tasks import python
 from mediapipe.tasks.python import vision
+# -----------------------------
+# 1. Paths & URLs
+# -----------------------------
 HAND_MODEL_PATH = "hand_landmarker.task"
 HAND_MODEL_URL = "https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task"
+YOLO_MODEL_PATH = "yolov11n_finetuned_ASL.pt"  # Already in repo via Git LFS or small enough
+# -----------------------------
+# 2. Download MediaPipe model if missing
+# -----------------------------
+if not os.path.exists(HAND_MODEL_PATH):
+    print("📥 Downloading MediaPipe hand landmark model...")
+    r = requests.get(HAND_MODEL_URL)
+    with open(HAND_MODEL_PATH, "wb") as f:
+        f.write(r.content)
+    print("✅ Download complete.")
+# -----------------------------
+# 3. Load models
+# -----------------------------
+# YOLO ASL classifier
+yolo_model = YOLO(YOLO_MODEL_PATH)
+yolo_model.eval()
+# MediaPipe hand landmark detector
 base_options = python.BaseOptions(model_asset_path=HAND_MODEL_PATH)
+hand_options = vision.HandLandmarkerOptions(base_options=base_options, num_hands=1)
+detector = vision.HandLandmarker.create_from_options(hand_options)
+# -----------------------------
+# 4. Inference function
+# -----------------------------
+def predict_asl(image):
+    """
+    Input: numpy array (H x W x 3) from Gradio
+    Output: annotated image, predicted class, confidence
+    """
+    img = image.copy()
+    h, w, _ = img.shape
+    # --- Annotate hand landmarks ---
+    mp_image = MPImage.create_from_array(img)
     detection_result = detector.detect(mp_image)
     if detection_result.hand_landmarks:
         for hand_landmarks in detection_result.hand_landmarks:
             for landmark in hand_landmarks:
                 x, y = int(landmark.x * w), int(landmark.y * h)
+                cv2.circle(img, (x, y), 3, (0, 255, 0), -1)
+    # --- YOLO prediction ---
+    results = yolo_model.predict(img, imgsz=300, verbose=False)[0]
+    pred_idx = results.probs.top1
+    pred_label = results.names[pred_idx]
+    confidence = results.probs.data[pred_idx].item()
+    # Overlay prediction text
+    cv2.putText(
+        img,
+        f"{pred_label} ({confidence:.2f})",
+        (10, 30),
+        cv2.FONT_HERSHEY_SIMPLEX,
+        1,
+        (0, 0, 255),
+        2,
+        cv2.LINE_AA
+    )
+    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB), pred_label, round(confidence, 2)
+# -----------------------------
+# 5. Gradio Interface
+# -----------------------------
+title = "🖐️ ASL Letter Classifier"
+description = "Upload a hand sign image and see the predicted letter and confidence.\nMediaPipe landmarks will be annotated automatically."
+iface = gr.Interface(
+    fn=predict_asl,
+    inputs=gr.Image(type="numpy"),
     outputs=[
+        gr.Image(type="numpy", label="Annotated Image"),
+        gr.Textbox(label="Predicted Letter"),
+        gr.Textbox(label="Confidence")
     ],
+    title=title,
+    description=description,
     allow_flagging="never"
 )
 if __name__ == "__main__":
+    iface.launch()