Spaces:

Haryiank
/

ASL_Sign_Lang

Sleeping

ASL_Sign_Lang / app.py

Create app.py

4acc044 verified 6 months ago

1.5 kB

	import gradio as gr
	import numpy as np
	import cv2
	from transformers import AutoImageProcessor, SiglipForImageClassification
	from collections import Counter

	# Load model
	model_name = "prithivMLmods/Alphabet-Sign-Language-Detection"
	processor = AutoImageProcessor.from_pretrained(model_name)
	model = SiglipForImageClassification.from_pretrained(model_name)

	def predict_from_video(video_path):
	cap = cv2.VideoCapture(video_path)
	predictions = []

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	# Convert frame to RGB
	img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

	# Run model
	inputs = processor(images=img, return_tensors="pt")
	outputs = model(**inputs)
	probs = outputs.logits.softmax(dim=-1)[0]

	idx = int(probs.argmax())
	label = model.config.id2label[idx]
	predictions.append(label)

	cap.release()

	# Majority vote
	if predictions:
	most_common = Counter(predictions).most_common(1)[0]
	return f"Predicted Letter: {most_common[0]} (appeared {most_common[1]} times)"
	else:
	return "No frames processed."

	iface = gr.Interface(
	fn=predict_from_video,
	inputs=gr.Video(), # ✅ no 'type' argument in Gradio 5.x
	outputs="text",
	title="ASL Alphabet Recognition from Video",
	description="Upload a short video of your ASL sign (A–Z). The system will analyze frames and predict the most likely letter."
	)

	iface.launch()