Spaces:

AamirMalik
/

SignLanguage

Build error

AamirMalik commited on Feb 22, 2025

Commit

a5dbef3

verified ·

1 Parent(s): 3e8895b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,10 +4,11 @@ import mediapipe as mp
 import numpy as np
 import tensorflow as tf
 import tempfile
-from transformers import AutoModelForImageClassification
-# Load gesture classification model from Hugging Face Hub
-model = AutoModelForImageClassification.from_pretrained("nateraw/gesture-classification")
 # Mediapipe initialization
 mp_hands = mp.solutions.hands
@@ -15,10 +16,11 @@ hands = mp_hands.Hands()
 mp_draw = mp.solutions.drawing_utils
 # Function for gesture classification
-def classify_gesture(landmarks):
-    landmarks = np.array(landmarks).reshape(1, -1)
-    prediction = model(landmarks)
-    return np.argmax(prediction.logits.detach().numpy())
 # Streamlit UI
 def main():
@@ -48,10 +50,8 @@ def main():
                 for hand_landmarks in results.multi_hand_landmarks:
                     mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
-                    # Extract landmarks
-                    landmarks = [landmark.x for landmark in hand_landmarks.landmark]
-                    landmarks += [landmark.y for landmark in hand_landmarks.landmark]
-                    gesture = classify_gesture(landmarks)
                     st.write(f"Gesture: {gesture}")
             frame_placeholder.image(frame, channels="RGB")

 import numpy as np
 import tensorflow as tf
 import tempfile
+from transformers import AutoImageProcessor, AutoModelForImageClassification
+# Load gesture classification model from Hugging Face Hub (public model)
+processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
+model = AutoModelForImageClassification.from_pretrained("google/vit-base-patch16-224-in21k")
 # Mediapipe initialization
 mp_hands = mp.solutions.hands
 mp_draw = mp.solutions.drawing_utils
 # Function for gesture classification
+def classify_gesture(image):
+    inputs = processor(images=image, return_tensors="pt")
+    outputs = model(**inputs)
+    prediction = outputs.logits.argmax(-1).item()
+    return prediction
 # Streamlit UI
 def main():
                 for hand_landmarks in results.multi_hand_landmarks:
                     mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
+                    # Gesture classification
+                    gesture = classify_gesture(frame)
                     st.write(f"Gesture: {gesture}")
             frame_placeholder.image(frame, channels="RGB")