Spaces:

kalpniks
/

ASL_Translator

Sleeping

App Files Files Community

kalpniks commited on Nov 20, 2025

Commit

48bf46d

verified ·

1 Parent(s): 4e521c2

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app.py +103 -3
requirements.txt +4 -0

app.py CHANGED Viewed

@@ -1,9 +1,109 @@
 import streamlit as st
 import os
 os.environ["HF_HOME"] = "/tmp/huggingface"
 os.makedirs("/tmp/huggingface", exist_ok=True)
-st.title("Minimal Streamlit App")
-st.write("INFO: Minimal Streamlit App Started.")
-st.write("Hello from Streamlit!")

 import streamlit as st
 import os
+from collections import Counter
+import time
+import traceback
+from transformers import AutoImageProcessor, SiglipForImageClassification
+from PIL import Image
+import torch
+import cv2
 os.environ["HF_HOME"] = "/tmp/huggingface"
 os.makedirs("/tmp/huggingface", exist_ok=True)
+# Load model and processor
+model_name = "prithivMLmods/Alphabet-Sign-Language-Detection"
+@st.cache_resource
+def load_model_and_processor():
+    print(f"INFO: Loading model '{model_name}'...")
+    model = SiglipForImageClassification.from_pretrained(model_name)
+    processor = AutoImageProcessor.from_pretrained(model_name)
+    print("INFO: Model and processor loaded successfully.")
+    return model, processor
+model, processor = load_model_and_processor()
+# Define the maximum number of consecutive repetitions allowed for predictions
+MAX_CONSECUTIVE_REPETITIONS = 3
+# Define labels
+labels = {
+    "0": "A", "1": "B", "2": "C", "3": "D", "4": "E", "5": "F", "6": "G", "7": "H", "8": "I", "9": "J",
+    "10": "K", "11": "L", "12": "M", "13": "N", "14": "O", "15": "P", "16": "Q", "17": "R", "18": "S", "19": "T",
+    "20": "U", "21": "V", "22": "W", "23": "X", "24": "Y", "25": "Z"
+}
+def sign_language_classification_streamlit(video_path):
+    print("sign_language_classification_streamlit function called.")
+    predicted_letters = []
+    last_predicted_label = None
+    consecutive_repetitions = 0
+    try:
+        cap = cv2.VideoCapture(video_path)
+        if not cap.isOpened():
+            return "Error: Could not open video file.", ""
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            image = Image.fromarray(frame).convert("RGB")
+            inputs = processor(images=image, return_tensors="pt")
+            with torch.no_grad():
+                outputs = model(**inputs)
+                logits = outputs.logits
+            predicted_label_index = torch.argmax(logits, dim=1).item()
+            current_predicted_label = labels[str(predicted_label_index)]
+            # Apply repetition logic
+            if current_predicted_label == last_predicted_label:
+                consecutive_repetitions += 1
+            else:
+                consecutive_repetitions = 1
+            if consecutive_repetitions > MAX_CONSECUTIVE_REPETITIONS or last_predicted_label is None:
+                predicted_letters.append(current_predicted_label)
+                last_predicted_label = current_predicted_label
+        cap.release()
+        unique_predicted_letters = list(dict.fromkeys(predicted_letters))
+        final_output_str = ", ".join(unique_predicted_letters)
+        # For 'Real-time Prediction' equivalent, let's use the last valid unique prediction or the most frequent
+        realtime_equivalent_prediction = unique_predicted_letters[-1] if unique_predicted_letters else ""
+        return realtime_equivalent_prediction, final_output_str
+    except Exception as e:
+        print(f"Error caught: {e}")
+        return f"Error processing video: {e}", f"Error processing video: {e}
+{traceback.format_exc()}"
+st.set_page_config(page_title="ASL Translator", layout="centered")
+st.title("ASL Translator")
+st.markdown("Upload a video to translate ASL into one of the 26 sign language alphabet categories and see predictions. ASL Words Translator coming soon!")
+uploaded_file = st.file_uploader("Upload a video file", type=["mp4", "avi", "mov", "webm"])
+if uploaded_file is not None:
+    # Save the uploaded file temporarily
+    video_path = os.path.join("/tmp", uploaded_file.name)
+    with open(video_path, "wb") as f:
+        f.write(uploaded_file.getbuffer())
+    st.video(video_path)
+    if st.button("Translate ASL"):
+        with st.spinner("Translating video... This might take a while depending on video length."):
+            realtime_pred, unique_letters = sign_language_classification_streamlit(video_path)
+            st.success("Translation Complete!")
+            st.subheader("Last Predicted Sign (Real-time Equivalent)")
+            st.write(realtime_pred)
+            st.subheader("Unique Predicted Letters")
+            st.write(unique_letters)
+        os.remove(video_path) # Clean up temporary file
+else:
+    st.info("Please upload a video file to start the translation.")

requirements.txt CHANGED Viewed

	@@ -1,2 +1,6 @@
1
2	streamlit

 streamlit
+opencv-python-headless
+transformers
+torch
+Pillow