Spaces:

ALYYAN
/

Emotion-Recognition

Running

App Files Files Community

ALYYAN commited on Sep 8, 2025

Commit

f0293fd

verified ·

1 Parent(s): 15ee904

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -40

app.py CHANGED Viewed

@@ -3,11 +3,10 @@ import os
 import cv2
 import time
-# Ensure the correct predictor class is imported from your MLOps pipeline
 from src.EmotionRecognition.pipeline.hf_predictor import HFPredictor
 # --- INITIALIZE THE MODEL ---
-# This happens only once when the application starts
 print("[INFO] Initializing predictor...")
 try:
     predictor = HFPredictor()
@@ -23,63 +22,45 @@ body {
     background: linear-gradient(-45deg, #0b0f19, #131a2d, #2a2a72, #522a72);
     background-size: 400% 400%;
     animation: gradient 15s ease infinite;
-    color: #e0e0e0;
 }
 @keyframes gradient { 0% { background-position: 0% 50%; } 50% { background-position: 100% 50%; } 100% { background-position: 0% 50%; } }
 /* General Layout & Typography */
 .gradio-container { max-width: 1320px !important; margin: auto !important; }
 #title { text-align: center; font-size: 3rem !important; font-weight: 700; color: #FFF; margin-bottom: 0.5rem; }
 #subtitle { text-align: center; color: #bebebe; margin-top: 0; margin-bottom: 40px; font-size: 1.2rem; font-weight: 300; }
 .gr-button { font-weight: bold !important; }
-/* Main Content Card "Glassmorphism" effect */
 #main-card {
     background: rgba(22, 22, 34, 0.65);
     border-radius: 16px;
     box-shadow: 0 8px 32px 0 rgba(0, 0, 0, 0.37);
-    backdrop-filter: blur(12px);
-    -webkit-backdrop-filter: blur(12px);
     border: 1px solid rgba(255, 255, 255, 0.18);
     padding: 1rem;
 }
 /* Prediction Bar Styling */
 #predictions-column { background-color: transparent !important; padding: 1.5rem; }
-#predictions-column > .gr-label { display: none; } /* Hide the default Gradio label */
 .prediction-list { list-style-type: none; padding: 0; margin-top: 1.5rem; }
 .prediction-list li { display: flex; align-items: center; margin-bottom: 12px; font-size: 1.1rem; }
 .prediction-list .label { width: 100px; text-transform: capitalize; color: #e0e0e0; }
 .prediction-list .bar-container { flex-grow: 1; height: 24px; background-color: rgba(255,255,255,0.1); border-radius: 12px; margin: 0 15px; overflow: hidden; }
-.prediction-list .bar { height: 100%; background: linear-gradient(90deg, #8A2BE2, #C71585); border-radius: 12px; transition: width 0.1s linear; }
 .prediction-list .percent { width: 60px; text-align: right; font-weight: bold; color: #FFF; }
 footer { display: none !important; }
 """
 ABOUT_MARKDOWN = """
-## 🚀 About This Project
-This application is a demonstration of a complete, end-to-end MLOps pipeline for facial emotion recognition. It showcases a full lifecycle from data research and model selection to deployment as a professional, interactive web application.
-### Key Technical Features:
-*   **State-of-the-Art AI Model:** Utilizes a **Swin Transformer**, a powerful Vision Transformer (ViT) architecture, pre-trained on the massive **AffectNet** dataset. This ensures high accuracy and robust generalization to real-world, "in the wild" facial expressions.
-*   **Reproducible MLOps Pipeline:** The original model training and data processing workflows were built using **DVC (Data Version Control)**, ensuring that every experiment is versioned and reproducible.
-*   **Full-Stack Architecture:** The initial project was built with a decoupled **FastAPI (Python) backend** and a **React (JavaScript) frontend**, demonstrating professional full-stack development practices. This Gradio app serves as the final, streamlined deployment.
-*   **Containerized for Deployment:** The entire application is packaged with **Docker** and deployed via a **CI/CD pipeline using GitHub Actions**, enabling automated testing and deployment to cloud platforms like Hugging Face Spaces.
-### Skills Demonstrated:
-*   **Data Science:** Dataset research, analysis (CK+, FER+), and advanced data preparation techniques.
-*   **Deep Learning:** Transfer learning, fine-tuning, and inference with modern architectures (MobileNetV2, Swin Transformer) using TensorFlow/Keras and Hugging Face `transformers`.
-*   **MLOps:** Pipeline orchestration (DVC), experiment tracking (MLflow), and CI/CD automation (GitHub Actions).
-*   **Software Engineering:** Python, UI/UX development (Gradio, React), API design (FastAPI), and containerization (Docker).
-This project represents a comprehensive understanding of building and productionizing modern AI systems.
 """
 # --- BACKEND LOGIC ---
 def create_prediction_html(probabilities):
     if not probabilities:
         return "<div style='padding: 2rem; text-align: center; color: #999;'>Waiting for prediction...</div>"
@@ -100,16 +81,13 @@ def unified_prediction_function(frame):
     """A single, unified function to process any frame (live or uploaded)."""
     if frame is None:
         return None, create_prediction_html({})
     # The predictor class handles all annotation and prediction logic
     annotated_frame, probabilities = predictor.process_frame(frame)
     return annotated_frame, create_prediction_html(probabilities)
 def process_video(video_path, progress=gr.Progress(track_tqdm=True)):
     """Processes an uploaded video file frame-by-frame."""
-    if video_path is None:
-        return None
     try:
         cap = cv2.VideoCapture(video_path)
         frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
@@ -143,7 +121,12 @@ with gr.Blocks(css=CSS, theme=gr.themes.Base()) as demo:
             with gr.TabItem("Live Detection"):
                 with gr.Row(equal_height=False):
                     with gr.Column(scale=3):
-                        live_feed = gr.Image(source="webcam", streaming=True, type="numpy", label="Live Feed", height=550, mirror_webcam=True)
                     with gr.Column(scale=2, elem_id="predictions-column"):
                         live_predictions = gr.HTML()
@@ -165,13 +148,28 @@ with gr.Blocks(css=CSS, theme=gr.themes.Base()) as demo:
                 gr.Markdown(ABOUT_MARKDOWN)
     # --- EVENT LISTENERS ---
-    live_feed.stream(fn=unified_prediction_function, inputs=live_feed, outputs=[live_feed, live_predictions])
-    image_button.click(fn=unified_prediction_function, inputs=[image_input], outputs=[image_input, image_predictions])
-    video_button.click(fn=process_video, inputs=[video_input], outputs=[video_output])
 # --- LAUNCH THE APP ---
 if predictor:
-    # Enabling the queue is essential for the video processing progress bar and smooth operation
     demo.queue().launch(debug=True)
 else:
     print("\n[FATAL ERROR] Could not start the application.")

 import cv2
 import time
+# Ensure the correct predictor class is imported
 from src.EmotionRecognition.pipeline.hf_predictor import HFPredictor
 # --- INITIALIZE THE MODEL ---
 print("[INFO] Initializing predictor...")
 try:
     predictor = HFPredictor()
     background: linear-gradient(-45deg, #0b0f19, #131a2d, #2a2a72, #522a72);
     background-size: 400% 400%;
     animation: gradient 15s ease infinite;
 }
 @keyframes gradient { 0% { background-position: 0% 50%; } 50% { background-position: 100% 50%; } 100% { background-position: 0% 50%; } }
 /* General Layout & Typography */
 .gradio-container { max-width: 1320px !important; margin: auto !important; }
 #title { text-align: center; font-size: 3rem !important; font-weight: 700; color: #FFF; margin-bottom: 0.5rem; }
 #subtitle { text-align: center; color: #bebebe; margin-top: 0; margin-bottom: 40px; font-size: 1.2rem; font-weight: 300; }
 .gr-button { font-weight: bold !important; }
+/* Main Content Card */
 #main-card {
     background: rgba(22, 22, 34, 0.65);
     border-radius: 16px;
     box-shadow: 0 8px 32px 0 rgba(0, 0, 0, 0.37);
+    backdrop-filter: blur(12px); -webkit-backdrop-filter: blur(12px);
     border: 1px solid rgba(255, 255, 255, 0.18);
     padding: 1rem;
 }
 /* Prediction Bar Styling */
 #predictions-column { background-color: transparent !important; padding: 1.5rem; }
+#predictions-column > .gr-label { display: none; }
 .prediction-list { list-style-type: none; padding: 0; margin-top: 1.5rem; }
 .prediction-list li { display: flex; align-items: center; margin-bottom: 12px; font-size: 1.1rem; }
 .prediction-list .label { width: 100px; text-transform: capitalize; color: #e0e0e0; }
 .prediction-list .bar-container { flex-grow: 1; height: 24px; background-color: rgba(255,255,255,0.1); border-radius: 12px; margin: 0 15px; overflow: hidden; }
+.prediction-list .bar { height: 100%; background: linear-gradient(90deg, #8A2BE2, #C71585); border-radius: 12px; transition: width: 0.1s linear; }
 .prediction-list .percent { width: 60px; text-align: right; font-weight: bold; color: #FFF; }
 footer { display: none !important; }
 """
 ABOUT_MARKDOWN = """
+### Model: Vision Transformer (ViT)
+This application uses a Vision Transformer model, fine-tuned for facial emotion recognition.
+### Dataset
+The model was fine-tuned on the **Emotion Recognition Dataset** from Kaggle, a large, curated collection of labeled facial images. This diverse dataset allows the model to generalize to a wide variety of real-world faces and expressions.
+*Dataset Link:* [https://www.kaggle.com/datasets/sujaykapadnis/emotion-recognition-dataset](https://www.kaggle.com/datasets/sujaykapadnis/emotion-recognition-dataset)
+### MLOps Pipeline
+This entire application, from data processing to training and deployment, was built using a reproducible MLOps pipeline, ensuring consistency and quality at every step.
 """
 # --- BACKEND LOGIC ---
 def create_prediction_html(probabilities):
     if not probabilities:
         return "<div style='padding: 2rem; text-align: center; color: #999;'>Waiting for prediction...</div>"
     """A single, unified function to process any frame (live or uploaded)."""
     if frame is None:
         return None, create_prediction_html({})
     # The predictor class handles all annotation and prediction logic
     annotated_frame, probabilities = predictor.process_frame(frame)
     return annotated_frame, create_prediction_html(probabilities)
 def process_video(video_path, progress=gr.Progress(track_tqdm=True)):
     """Processes an uploaded video file frame-by-frame."""
+    if video_path is None: return None
     try:
         cap = cv2.VideoCapture(video_path)
         frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
             with gr.TabItem("Live Detection"):
                 with gr.Row(equal_height=False):
                     with gr.Column(scale=3):
+                        # --- THIS IS THE DEFINITIVE FIX ---
+                        # We use TWO components. One is an INVISIBLE input to capture the stream.
+                        # The other is a VISIBLE output to display the result.
+                        webcam_capture = gr.Image(source="webcam", streaming=True, type="numpy", visible=False, mirror_webcam=True)
+                        live_output = gr.Image(label="Live Feed", interactive=False, height=550)
+                        # --- END FIX ---
                     with gr.Column(scale=2, elem_id="predictions-column"):
                         live_predictions = gr.HTML()
                 gr.Markdown(ABOUT_MARKDOWN)
     # --- EVENT LISTENERS ---
+    # The .stream() event is attached to the INVISIBLE capture component.
+    # Its outputs are the VISIBLE components.
+    webcam_capture.stream(
+        fn=unified_prediction_function,
+        inputs=[webcam_capture],
+        outputs=[live_output, live_predictions]
+    )
+    image_button.click(
+        fn=unified_prediction_function,
+        inputs=[image_input],
+        outputs=[image_input, image_predictions]
+    )
+    video_button.click(
+        fn=process_video,
+        inputs=[video_input],
+        outputs=[video_output]
+    )
 # --- LAUNCH THE APP ---
 if predictor:
     demo.queue().launch(debug=True)
 else:
     print("\n[FATAL ERROR] Could not start the application.")