ALYYAN commited on
Commit
ed6e9bc
·
verified ·
1 Parent(s): 55b3ce5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -19
app.py CHANGED
@@ -24,11 +24,13 @@ body {
24
  animation: gradient 15s ease infinite;
25
  }
26
  @keyframes gradient { 0% { background-position: 0% 50%; } 50% { background-position: 100% 50%; } 100% { background-position: 0% 50%; } }
 
27
  /* General Layout & Typography */
28
  .gradio-container { max-width: 1320px !important; margin: auto !important; }
29
  #title { text-align: center; font-size: 3rem !important; font-weight: 700; color: #FFF; margin-bottom: 0.5rem; }
30
  #subtitle { text-align: center; color: #bebebe; margin-top: 0; margin-bottom: 40px; font-size: 1.2rem; font-weight: 300; }
31
  .gr-button { font-weight: bold !important; }
 
32
  /* Main Content Card */
33
  #main-card {
34
  background: rgba(22, 22, 34, 0.65);
@@ -38,9 +40,10 @@ body {
38
  border: 1px solid rgba(255, 255, 255, 0.18);
39
  padding: 1rem;
40
  }
 
41
  /* Prediction Bar Styling */
42
  #predictions-column { background-color: transparent !important; padding: 1.5rem; }
43
- #predictions-column > .gr-label { display: none; }
44
  .prediction-list { list-style-type: none; padding: 0; margin-top: 1.5rem; }
45
  .prediction-list li { display: flex; align-items: center; margin-bottom: 12px; font-size: 1.1rem; }
46
  .prediction-list .label { width: 100px; text-transform: capitalize; color: #e0e0e0; }
@@ -52,16 +55,17 @@ footer { display: none !important; }
52
 
53
  ABOUT_MARKDOWN = """
54
  ### Model: Vision Transformer (ViT)
55
- This application uses a Vision Transformer model, fine-tuned for facial emotion recognition.
56
  ### Dataset
57
- The model was fine-tuned on the **Emotion Recognition Dataset** from Kaggle, a large, curated collection of labeled facial images. This diverse dataset allows the model to generalize to a wide variety of real-world faces and expressions.
58
- *Dataset Link:* [https://www.kaggle.com/datasets/sujaykapadnis/emotion-recognition-dataset](https://www.kaggle.com/datasets/sujaykapadnis/emotion-recognition-dataset)
59
  ### MLOps Pipeline
60
- This entire application, from data processing to training and deployment, was built using a reproducible MLOps pipeline, ensuring consistency and quality at every step.
61
  """
62
 
63
  # --- BACKEND LOGIC ---
 
64
  def create_prediction_html(probabilities):
 
65
  if not probabilities:
66
  return "<div style='padding: 2rem; text-align: center; color: #999;'>Waiting for prediction...</div>"
67
  html = "<ul class='prediction-list'>"
@@ -78,16 +82,22 @@ def create_prediction_html(probabilities):
78
  return html
79
 
80
  def unified_prediction_function(frame):
81
- """A single, unified function to process any frame (live or uploaded)."""
 
 
 
82
  if frame is None:
83
  return None, create_prediction_html({})
84
- # The predictor class handles all annotation and prediction logic
 
85
  annotated_frame, probabilities = predictor.process_frame(frame)
 
86
  return annotated_frame, create_prediction_html(probabilities)
87
 
88
  def process_video(video_path, progress=gr.Progress(track_tqdm=True)):
89
  """Processes an uploaded video file frame-by-frame."""
90
- if video_path is None: return None
 
91
  try:
92
  cap = cv2.VideoCapture(video_path)
93
  frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
@@ -121,13 +131,11 @@ with gr.Blocks(css=CSS, theme=gr.themes.Base()) as demo:
121
  with gr.TabItem("Live Detection"):
122
  with gr.Row(equal_height=False):
123
  with gr.Column(scale=3):
124
- # --- THIS IS THE DEFINITIVE FIX ---
125
- # We use TWO components. One is an INVISIBLE input to capture the stream.
126
- # The other is a VISIBLE output to display the result.
127
- webcam_capture = gr.Image(source="webcam", streaming=True, type="numpy", visible=False, mirror_webcam=True)
128
- live_output = gr.Image(label="Live Feed", interactive=False, height=550)
129
- # --- END FIX ---
130
  with gr.Column(scale=2, elem_id="predictions-column"):
 
131
  live_predictions = gr.HTML()
132
 
133
  with gr.TabItem("Upload Image"):
@@ -148,20 +156,25 @@ with gr.Blocks(css=CSS, theme=gr.themes.Base()) as demo:
148
  gr.Markdown(ABOUT_MARKDOWN)
149
 
150
  # --- EVENT LISTENERS ---
151
- # The .stream() event is attached to the INVISIBLE capture component.
152
- # Its outputs are the VISIBLE components.
153
- webcam_capture.stream(
 
 
 
154
  fn=unified_prediction_function,
155
- inputs=[webcam_capture],
156
- outputs=[live_output, live_predictions]
157
  )
158
 
 
159
  image_button.click(
160
  fn=unified_prediction_function,
161
  inputs=[image_input],
162
  outputs=[image_input, image_predictions]
163
  )
164
 
 
165
  video_button.click(
166
  fn=process_video,
167
  inputs=[video_input],
@@ -170,6 +183,7 @@ with gr.Blocks(css=CSS, theme=gr.themes.Base()) as demo:
170
 
171
  # --- LAUNCH THE APP ---
172
  if predictor:
 
173
  demo.queue().launch(debug=True)
174
  else:
175
  print("\n[FATAL ERROR] Could not start the application.")
 
24
  animation: gradient 15s ease infinite;
25
  }
26
  @keyframes gradient { 0% { background-position: 0% 50%; } 50% { background-position: 100% 50%; } 100% { background-position: 0% 50%; } }
27
+
28
  /* General Layout & Typography */
29
  .gradio-container { max-width: 1320px !important; margin: auto !important; }
30
  #title { text-align: center; font-size: 3rem !important; font-weight: 700; color: #FFF; margin-bottom: 0.5rem; }
31
  #subtitle { text-align: center; color: #bebebe; margin-top: 0; margin-bottom: 40px; font-size: 1.2rem; font-weight: 300; }
32
  .gr-button { font-weight: bold !important; }
33
+
34
  /* Main Content Card */
35
  #main-card {
36
  background: rgba(22, 22, 34, 0.65);
 
40
  border: 1px solid rgba(255, 255, 255, 0.18);
41
  padding: 1rem;
42
  }
43
+
44
  /* Prediction Bar Styling */
45
  #predictions-column { background-color: transparent !important; padding: 1.5rem; }
46
+ #predictions-column > .gr-label { display: none; } /* Hide the default Gradio label */
47
  .prediction-list { list-style-type: none; padding: 0; margin-top: 1.5rem; }
48
  .prediction-list li { display: flex; align-items: center; margin-bottom: 12px; font-size: 1.1rem; }
49
  .prediction-list .label { width: 100px; text-transform: capitalize; color: #e0e0e0; }
 
55
 
56
  ABOUT_MARKDOWN = """
57
  ### Model: Vision Transformer (ViT)
58
+ This application uses a state-of-the-art Vision Transformer model to perform real-time facial emotion recognition.
59
  ### Dataset
60
+ The model was pre-trained on the **AffectNet** dataset, the largest database of "in the wild" facial expressions. This ensures robust performance on real-world, spontaneous emotions.
 
61
  ### MLOps Pipeline
62
+ This application is the deployment artifact of a complete MLOps pipeline, demonstrating skills in data management (DVC), model training (TensorFlow), and application development (Gradio).
63
  """
64
 
65
  # --- BACKEND LOGIC ---
66
+
67
  def create_prediction_html(probabilities):
68
+ """Generates clean HTML for the prediction bars."""
69
  if not probabilities:
70
  return "<div style='padding: 2rem; text-align: center; color: #999;'>Waiting for prediction...</div>"
71
  html = "<ul class='prediction-list'>"
 
82
  return html
83
 
84
  def unified_prediction_function(frame):
85
+ """
86
+ A single, robust function that takes any frame (from webcam or upload)
87
+ and returns the annotated frame and the prediction HTML.
88
+ """
89
  if frame is None:
90
  return None, create_prediction_html({})
91
+
92
+ # The predictor class handles all the complex ML logic
93
  annotated_frame, probabilities = predictor.process_frame(frame)
94
+
95
  return annotated_frame, create_prediction_html(probabilities)
96
 
97
  def process_video(video_path, progress=gr.Progress(track_tqdm=True)):
98
  """Processes an uploaded video file frame-by-frame."""
99
+ if video_path is None:
100
+ return None
101
  try:
102
  cap = cv2.VideoCapture(video_path)
103
  frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 
131
  with gr.TabItem("Live Detection"):
132
  with gr.Row(equal_height=False):
133
  with gr.Column(scale=3):
134
+ # The single, correct component for a live webcam feed.
135
+ # It acts as both input (from webcam) and output (displaying the result).
136
+ live_feed = gr.Image(source="webcam", streaming=True, type="numpy", label="Live Feed", height=550, mirror_webcam=True)
 
 
 
137
  with gr.Column(scale=2, elem_id="predictions-column"):
138
+ gr.Markdown("### Emotion Probabilities")
139
  live_predictions = gr.HTML()
140
 
141
  with gr.TabItem("Upload Image"):
 
156
  gr.Markdown(ABOUT_MARKDOWN)
157
 
158
  # --- EVENT LISTENERS ---
159
+
160
+ # Live Feed Logic: This is the simple, direct, and correct way.
161
+ # The stream from the 'live_feed' component calls the prediction function.
162
+ # The outputs are sent back to the 'live_feed' component (to update the image)
163
+ # and the 'live_predictions' component.
164
+ live_feed.stream(
165
  fn=unified_prediction_function,
166
+ inputs=[live_feed],
167
+ outputs=[live_feed, live_predictions]
168
  )
169
 
170
+ # Image Upload Logic
171
  image_button.click(
172
  fn=unified_prediction_function,
173
  inputs=[image_input],
174
  outputs=[image_input, image_predictions]
175
  )
176
 
177
+ # Video Upload Logic
178
  video_button.click(
179
  fn=process_video,
180
  inputs=[video_input],
 
183
 
184
  # --- LAUNCH THE APP ---
185
  if predictor:
186
+ # Enabling the queue is essential for the video processing progress bar.
187
  demo.queue().launch(debug=True)
188
  else:
189
  print("\n[FATAL ERROR] Could not start the application.")