kalpniks commited on
Commit
ad4360e
·
verified ·
1 Parent(s): 24eef17

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +10 -169
app.py CHANGED
@@ -1,178 +1,19 @@
 
 
1
  import os
 
2
  os.environ["HF_HOME"] = "/tmp/huggingface"
3
  os.makedirs("/tmp/huggingface", exist_ok=True)
4
 
5
- print("INFO: app.py started.")
6
-
7
- # Import necessary libraries
8
- print("INFO: Importing collections...")
9
- from collections import Counter
10
- print("INFO: Importing time...")
11
- import time
12
- print("INFO: Importing traceback...")
13
- import traceback
14
- print("INFO: Importing gradio...")
15
- import gradio as gr
16
- print("INFO: Importing transformers.AutoImageProcessor...")
17
- from transformers import AutoImageProcessor
18
- print("INFO: Importing transformers.SiglipForImageClassification...")
19
- from transformers import SiglipForImageClassification
20
- print("INFO: Importing transformers.image_utils.load_image...")
21
- from transformers.image_utils import load_image
22
- print("INFO: Importing PIL.Image...")
23
- from PIL import Image
24
- print("INFO: Importing torch...")
25
- import torch
26
- print("INFO: Importing cv2...")
27
- import cv2 # Import cv2 for video frame processing
28
-
29
- print("INFO: All libraries imported successfully.")
30
-
31
- print("INFO: Loading model and processor.")
32
- # Load model and processor for Alphabet Sign Language Detection
33
- model_name = "prithivMLmods/Alphabet-Sign-Language-Detection"
34
- print(f"INFO: Loading model '{model_name}'...")
35
- model = SiglipForImageClassification.from_pretrained(model_name)
36
- processor = AutoImageProcessor.from_pretrained(model_name)
37
- print("INFO: Model and processor loaded successfully.")
38
-
39
- # Define the maximum number of consecutive repetitions allowed for predictions
40
- MAX_CONSECUTIVE_REPETITIONS = 3
41
-
42
-
43
- def sign_language_classification(video):
44
- """
45
- Predicts sign language alphabet category for each frame in a video,
46
- yields predictions in real-time with repetition handling, and returns a list of unique predicted letters.
47
- """
48
- print("sign_language_classification function called.") # Debug print to indicate function call
49
- if video is None:
50
- print("No video provided.") # Debug print if no video input
51
- print(f"DEBUG: Yielding 'No video provided.' and '', types: {type('No video provided.')}, {type('')}")
52
- yield "No video provided.", "" # Yield empty string for the second output if no video
53
- return
54
-
55
- print(f"Video input type: {type(video)}") # Debug print to show video input type
56
- print(f"Video value: {video}") # Debug print to show video input value
57
-
58
- predicted_letters = [] # List to store all predicted letters from each frame
59
- last_predicted_label = None # Initialize variable to store the last predicted label to handle repetitions
60
- consecutive_repetitions = 0 # Initialize counter for consecutive repetitions of the same prediction
61
-
62
- try:
63
- print("Starting frame processing loop.") # Debug print to indicate start of frame processing
64
- frames = []
65
- if isinstance(video, str):
66
- # If video is a filepath (e.g., uploaded file), load the video frames using OpenCV
67
- cap = cv2.VideoCapture(video)
68
- if not cap.isOpened():
69
- print(f"DEBUG: Yielding 'Error: Could not open video file.' and '', types: {type('Error: Could not open video file.')}, {type('')}")
70
- yield "Error: Could not open video file.", "" # Yield error if video file cannot be opened
71
- return
72
- while True:
73
- ret, frame = cap.read()
74
- if not ret: # Break the loop if no more frames are returned
75
- break
76
- frames.append(frame) # Append the read frame to the frames list
77
- cap.release() # Release the video capture object
78
- elif isinstance(video, list):
79
- # If video is already a list of frames (e.g., from webcam in some Gradio versions)
80
- frames = video
81
- else:
82
- print(f"DEBUG: Yielding 'Error: Unsupported video input type.' and '', types: {type('Error: Unsupported video input type.')}, {type('')}")
83
- yield "Error: Unsupported video input type.", "" # Yield error for unsupported video input types
84
- return
85
-
86
-
87
- for i, frame in enumerate(frames):
88
- # print(f"Processing frame {i}") # Debug print - Removed for cleaner output
89
-
90
- # Convert the numpy frame (BGR format from OpenCV) to a PIL Image in RGB format for the model
91
- image = Image.fromarray(frame).convert("RGB")
92
- # print(f"Frame {i} converted to PIL Image.") # Debug print - Removed for cleaner output
93
-
94
-
95
- # Process the image frame using the pre-trained processor and model
96
- inputs = processor(images=image, return_tensors="pt") # Prepare image for model input
97
- # print(f"Frame {i} processed by processor.)" # Debug print - Removed for cleaner output
98
-
99
-
100
- # Perform inference with the model
101
- with torch.no_grad(): # Disable gradient calculation for inference
102
- outputs = model(**inputs)
103
- logits = outputs.logits # Get the raw output scores (logits)
104
- # probs = torch.nn.functional.softmax(logits, dim=1).squeeze().tolist() # Apply softmax to get probabilities and convert to list
105
- # print(f"Frame {i} processed by model. Logits shape: {logits.shape}") # Debug print - Removed for cleaner output
106
-
107
-
108
- # Define the labels mapping model output indices to ASL alphabet letters
109
- labels = {
110
- "0": "A", "1": "B", "2": "C", "3": "D", "4": "E", "5": "F", "6": "G", "7": "H", "8": "I", "9": "J",
111
- "10": "K", "11": "L", "12": "M", "13": "N", "14": "O", "15": "P", "16": "Q", "17": "R", "18": "S", "19": "T",
112
- "20": "U", "21": "V", "22": "W", "23": "X", "24": "Y", "25": "Z"
113
- }
114
- # Get the index of the highest probability directly from logits
115
- predicted_label_index = torch.argmax(logits, dim=1).item()
116
- predicted_label = labels[str(predicted_label_index)]
117
- # print(f"Frame {i} prediction: {predicted_label}") # Debug print - Removed for cleaner output
118
-
119
- predicted_letters.append(predicted_label) # Append predicted letter to the list of all predictions
120
-
121
- # Check for consecutive repetitions and yield only if the rule is met
122
- if predicted_label == last_predicted_label:
123
- consecutive_repetitions += 1
124
- else:
125
- consecutive_repetitions = 1 # Reset consecutive count if prediction changes
126
-
127
- # Yield the prediction if it's not a consecutive repetition beyond the limit or if it's the first prediction
128
- if consecutive_repetitions > MAX_CONSECUTIVE_REPETITIONS or last_predicted_label is None:
129
- print(f"DEBUG: Yielding predicted_label: {predicted_label}, type: {type(predicted_label)}")
130
- yield predicted_label, "" # Yield real-time prediction and empty string for the second output
131
- last_predicted_label = predicted_label # Update the last predicted label
132
-
133
-
134
- print("Finished frame processing loop.") # Debug print to indicate end of frame processing
135
- # Get unique predicted letters while maintaining order of appearance
136
- unique_predicted_letters = list(dict.fromkeys(predicted_letters))
137
- final_output = ", ".join(unique_predicted_letters) # Join unique letters into a comma-separated string
138
- # Yield the last predicted label (or empty string if none) and the final list of unique letters
139
- output1 = last_predicted_label if last_predicted_label is not None else ""
140
- output2 = final_output
141
- print(f"DEBUG: Final yield outputs - output1: {output1}, type: {type(output1)}; output2: {output2}, type: {type(output2)}")
142
- yield output1, output2
143
-
144
- except Exception as e:
145
- print(f"Error caught: {e}") # Debug print if an error occurs
146
- # Yield error message and traceback information in case of an exception
147
- error_message = f"Error processing video: {e}"
148
- traceback_info = f"Error processing video: {e}\n{traceback.format_exc()}"
149
- print(f"DEBUG: Error yield outputs - error_message: {error_message}, type: {type(error_message)}; traceback_info: {traceback_info}, type: {type(traceback_info)}")
150
- yield error_message, traceback_info
151
 
152
- # Custom CSS for styling (commented out)
153
- # custom_css = """
154
- # body {
155
- # background-color: #add8e6;
156
- # }
157
- # """
158
 
159
- print("INFO: Setting up Gradio interface.")
160
- # Create Gradio interface with video input and multiple outputs
161
- iface = gr.Interface(
162
- fn=sign_language_classification, # The function to run when the user interacts with the interface
163
- inputs=gr.Video(), # Input component: Video, allowing upload or webcam
164
- outputs=[
165
- gr.Label(label="Real-time Prediction"), # Output component: Label to display the real-time prediction
166
- gr.Textbox(label="Unique Predicted Letters") # Output component: Textbox to display the final list of unique predicted letters
167
- ],
168
- title="ASL Translator", # Title of the Gradio interface
169
- description="Upload a video or use your webcam to translate ASL into one of the 26 sign language alphabet categories and see predictions in real-time and a summary list. ASL Words Translator coming soon!", # Description displayed below the title
170
- # css=custom_css # Apply custom CSS (commented out)
171
- )
172
- print("INFO: Gradio interface created.")
173
 
174
- # Launch the Gradio app
175
  if __name__ == "__main__":
176
- print("INFO: Attempting to launch Gradio app.")
177
  iface.queue().launch(share=True)
178
- print("INFO: Gradio app launch command executed (may not indicate immediate availability).")
 
1
+ import gradio as gr
2
+ import time
3
  import os
4
+
5
  os.environ["HF_HOME"] = "/tmp/huggingface"
6
  os.makedirs("/tmp/huggingface", exist_ok=True)
7
 
8
+ print("INFO: Simple Gradio app starting.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ def greet(name):
11
+ time.sleep(1)
12
+ return "Hello " + name + "!"
 
 
 
13
 
14
+ iface = gr.Interface(fn=greet, inputs="text", outputs="text", title="Minimal Gradio App")
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
 
16
  if __name__ == "__main__":
17
+ print("INFO: Attempting to launch simple Gradio app.")
18
  iface.queue().launch(share=True)
19
+ print("INFO: Simple Gradio app launch command executed.")