kalpniks commited on
Commit
48bf46d
·
verified ·
1 Parent(s): 4e521c2

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +103 -3
  2. requirements.txt +4 -0
app.py CHANGED
@@ -1,9 +1,109 @@
1
  import streamlit as st
2
  import os
 
 
 
 
 
 
 
3
 
4
  os.environ["HF_HOME"] = "/tmp/huggingface"
5
  os.makedirs("/tmp/huggingface", exist_ok=True)
6
 
7
- st.title("Minimal Streamlit App")
8
- st.write("INFO: Minimal Streamlit App Started.")
9
- st.write("Hello from Streamlit!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import os
3
+ from collections import Counter
4
+ import time
5
+ import traceback
6
+ from transformers import AutoImageProcessor, SiglipForImageClassification
7
+ from PIL import Image
8
+ import torch
9
+ import cv2
10
 
11
  os.environ["HF_HOME"] = "/tmp/huggingface"
12
  os.makedirs("/tmp/huggingface", exist_ok=True)
13
 
14
+ # Load model and processor
15
+ model_name = "prithivMLmods/Alphabet-Sign-Language-Detection"
16
+ @st.cache_resource
17
+ def load_model_and_processor():
18
+ print(f"INFO: Loading model '{model_name}'...")
19
+ model = SiglipForImageClassification.from_pretrained(model_name)
20
+ processor = AutoImageProcessor.from_pretrained(model_name)
21
+ print("INFO: Model and processor loaded successfully.")
22
+ return model, processor
23
+
24
+ model, processor = load_model_and_processor()
25
+
26
+ # Define the maximum number of consecutive repetitions allowed for predictions
27
+ MAX_CONSECUTIVE_REPETITIONS = 3
28
+
29
+ # Define labels
30
+ labels = {
31
+ "0": "A", "1": "B", "2": "C", "3": "D", "4": "E", "5": "F", "6": "G", "7": "H", "8": "I", "9": "J",
32
+ "10": "K", "11": "L", "12": "M", "13": "N", "14": "O", "15": "P", "16": "Q", "17": "R", "18": "S", "19": "T",
33
+ "20": "U", "21": "V", "22": "W", "23": "X", "24": "Y", "25": "Z"
34
+ }
35
+
36
+ def sign_language_classification_streamlit(video_path):
37
+ print("sign_language_classification_streamlit function called.")
38
+ predicted_letters = []
39
+ last_predicted_label = None
40
+ consecutive_repetitions = 0
41
+
42
+ try:
43
+ cap = cv2.VideoCapture(video_path)
44
+ if not cap.isOpened():
45
+ return "Error: Could not open video file.", ""
46
+
47
+ while True:
48
+ ret, frame = cap.read()
49
+ if not ret:
50
+ break
51
+ image = Image.fromarray(frame).convert("RGB")
52
+ inputs = processor(images=image, return_tensors="pt")
53
+ with torch.no_grad():
54
+ outputs = model(**inputs)
55
+ logits = outputs.logits
56
+ predicted_label_index = torch.argmax(logits, dim=1).item()
57
+ current_predicted_label = labels[str(predicted_label_index)]
58
+
59
+ # Apply repetition logic
60
+ if current_predicted_label == last_predicted_label:
61
+ consecutive_repetitions += 1
62
+ else:
63
+ consecutive_repetitions = 1
64
+
65
+ if consecutive_repetitions > MAX_CONSECUTIVE_REPETITIONS or last_predicted_label is None:
66
+ predicted_letters.append(current_predicted_label)
67
+ last_predicted_label = current_predicted_label
68
+
69
+ cap.release()
70
+ unique_predicted_letters = list(dict.fromkeys(predicted_letters))
71
+ final_output_str = ", ".join(unique_predicted_letters)
72
+ # For 'Real-time Prediction' equivalent, let's use the last valid unique prediction or the most frequent
73
+ realtime_equivalent_prediction = unique_predicted_letters[-1] if unique_predicted_letters else ""
74
+
75
+ return realtime_equivalent_prediction, final_output_str
76
+
77
+ except Exception as e:
78
+ print(f"Error caught: {e}")
79
+ return f"Error processing video: {e}", f"Error processing video: {e}
80
+ {traceback.format_exc()}"
81
+
82
+
83
+ st.set_page_config(page_title="ASL Translator", layout="centered")
84
+ st.title("ASL Translator")
85
+ st.markdown("Upload a video to translate ASL into one of the 26 sign language alphabet categories and see predictions. ASL Words Translator coming soon!")
86
+
87
+ uploaded_file = st.file_uploader("Upload a video file", type=["mp4", "avi", "mov", "webm"])
88
+
89
+ if uploaded_file is not None:
90
+ # Save the uploaded file temporarily
91
+ video_path = os.path.join("/tmp", uploaded_file.name)
92
+ with open(video_path, "wb") as f:
93
+ f.write(uploaded_file.getbuffer())
94
+ st.video(video_path)
95
+
96
+ if st.button("Translate ASL"):
97
+ with st.spinner("Translating video... This might take a while depending on video length."):
98
+ realtime_pred, unique_letters = sign_language_classification_streamlit(video_path)
99
+ st.success("Translation Complete!")
100
+
101
+ st.subheader("Last Predicted Sign (Real-time Equivalent)")
102
+ st.write(realtime_pred)
103
+
104
+ st.subheader("Unique Predicted Letters")
105
+ st.write(unique_letters)
106
+
107
+ os.remove(video_path) # Clean up temporary file
108
+ else:
109
+ st.info("Please upload a video file to start the translation.")
requirements.txt CHANGED
@@ -1,2 +1,6 @@
1
 
2
  streamlit
 
 
 
 
 
1
 
2
  streamlit
3
+ opencv-python-headless
4
+ transformers
5
+ torch
6
+ Pillow