Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import cv2 | |
| from transformers import AutoImageProcessor, SiglipForImageClassification | |
| from collections import Counter | |
| # Load model | |
| model_name = "prithivMLmods/Alphabet-Sign-Language-Detection" | |
| processor = AutoImageProcessor.from_pretrained(model_name) | |
| model = SiglipForImageClassification.from_pretrained(model_name) | |
| def predict_from_video(video_path): | |
| cap = cv2.VideoCapture(video_path) | |
| predictions = [] | |
| while cap.isOpened(): | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| # Convert frame to RGB | |
| img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| # Run model | |
| inputs = processor(images=img, return_tensors="pt") | |
| outputs = model(**inputs) | |
| probs = outputs.logits.softmax(dim=-1)[0] | |
| idx = int(probs.argmax()) | |
| label = model.config.id2label[idx] | |
| predictions.append(label) | |
| cap.release() | |
| # Majority vote | |
| if predictions: | |
| most_common = Counter(predictions).most_common(1)[0] | |
| return f"Predicted Letter: {most_common[0]} (appeared {most_common[1]} times)" | |
| else: | |
| return "No frames processed." | |
| iface = gr.Interface( | |
| fn=predict_from_video, | |
| inputs=gr.Video(), # ✅ no 'type' argument in Gradio 5.x | |
| outputs="text", | |
| title="ASL Alphabet Recognition from Video", | |
| description="Upload a short video of your ASL sign (A–Z). The system will analyze frames and predict the most likely letter." | |
| ) | |
| iface.launch() | |