Spaces:

Rivalcoder
/

Video-Processing

Sleeping

App Files Files Community

Rivalcoder commited on Mar 29, 2025

Commit

9a2edf3

1 Parent(s): d674b3d

Add Files

Browse files

Files changed (2) hide show

app.py +211 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,211 @@

+import cv2
+import torch
+import numpy as np
+from PIL import Image
+import torchvision.transforms as transforms
+from ultralytics import YOLO
+import time
+import os
+import tempfile
+from flask import Flask, request, jsonify
+import gradio as gr
+# Initialize Flask app and Gradio interface
+app = Flask(__name__)
+# Global variable to store detection history
+detection_history = []
+# Emotion labels
+emotions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
+# Load models (cache in Hugging Face Space)
+def load_models():
+    # Face detection model
+    face_model = YOLO('yolov8n-face.pt')
+    # Emotion model (simplified version of your CNN)
+    class EmotionCNN(torch.nn.Module):
+        def __init__(self, num_classes=7):
+            super().__init__()
+            self.features = torch.nn.Sequential(
+                torch.nn.Conv2d(1, 64, 3, padding=1),
+                torch.nn.ReLU(),
+                torch.nn.MaxPool2d(2),
+                torch.nn.Conv2d(64, 128, 3, padding=1),
+                torch.nn.ReLU(),
+                torch.nn.MaxPool2d(2),
+                torch.nn.Conv2d(128, 256, 3, padding=1),
+                torch.nn.ReLU(),
+                torch.nn.MaxPool2d(2)
+            )
+            self.classifier = torch.nn.Sequential(
+                torch.nn.Dropout(0.5),
+                torch.nn.Linear(256*6*6, 1024),
+                torch.nn.ReLU(),
+                torch.nn.Dropout(0.5),
+                torch.nn.Linear(1024, num_classes)
+            )
+        def forward(self, x):
+            x = self.features(x)
+            x = torch.flatten(x, 1)
+            x = self.classifier(x)
+            return x
+    emotion_model = EmotionCNN()
+    # Load your pretrained weights here
+    # emotion_model.load_state_dict(torch.load('emotion_model.pth'))
+    emotion_model.eval()
+    return face_model, emotion_model
+face_model, emotion_model = load_models()
+# Preprocessing function
+def preprocess_face(face_img):
+    transform = transforms.Compose([
+        transforms.Resize((48, 48)),
+        transforms.Grayscale(),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.5], std=[0.5])
+    ])
+    face_pil = Image.fromarray(cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB))
+    return transform(face_pil).unsqueeze(0)
+# Process video function
+def process_video(video_path):
+    global detection_history
+    detection_history = []
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        return {"error": "Could not open video"}
+    frame_count = 0
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    frame_skip = int(fps / 3)  # Process ~3 frames per second
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        frame_count += 1
+        if frame_count % frame_skip != 0:
+            continue
+        # Face detection
+        results = face_model(frame)
+        for result in results:
+            boxes = result.boxes
+            if len(boxes) == 0:
+                continue
+            for box in boxes:
+                x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
+                face_img = frame[y1:y2, x1:x2]
+                if face_img.size == 0:
+                    continue
+                # Emotion prediction
+                face_tensor = preprocess_face(face_img)
+                with torch.no_grad():
+                    output = emotion_model(face_tensor)
+                    prob = torch.nn.functional.softmax(output, dim=1)[0]
+                    pred_idx = torch.argmax(output).item()
+                    confidence = prob[pred_idx].item()
+                detection_history.append({
+                    "frame": frame_count,
+                    "time": frame_count / fps,
+                    "emotion": emotions[pred_idx],
+                    "confidence": confidence,
+                    "box": [x1, y1, x2, y2]
+                })
+    cap.release()
+    if not detection_history:
+        return {"error": "No faces detected"}
+    return {
+        "detections": detection_history,
+        "summary": {
+            "total_frames": frame_count,
+            "fps": fps,
+            "duration": frame_count / fps
+        }
+    }
+# Flask API endpoint
+@app.route('/api/predict', methods=['POST'])
+def api_predict():
+    if 'file' not in request.files:
+        return jsonify({"error": "No file provided"}), 400
+    file = request.files['file']
+    if file.filename == '':
+        return jsonify({"error": "No selected file"}), 400
+    # Save to temp file
+    temp_path = os.path.join(tempfile.gettempdir(), file.filename)
+    file.save(temp_path)
+    # Process video
+    result = process_video(temp_path)
+    # Clean up
+    os.remove(temp_path)
+    return jsonify(result)
+# Gradio interface
+def gradio_predict(video):
+    temp_path = os.path.join(tempfile.gettempdir(), video.name)
+    with open(temp_path, 'wb') as f:
+        f.write(video.read())
+    result = process_video(temp_path)
+    os.remove(temp_path)
+    if "error" in result:
+        return result["error"]
+    # Create visualization
+    cap = cv2.VideoCapture(video.name)
+    ret, frame = cap.read()
+    cap.release()
+    if ret:
+        # Draw last detection on frame
+        last_det = result["detections"][-1]
+        x1, y1, x2, y2 = last_det["box"]
+        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
+        cv2.putText(frame, f"{last_det['emotion']} ({last_det['confidence']:.2f})",
+                   (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
+        # Convert to RGB for Gradio
+        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        return frame, result
+    return result
+# Create Gradio interface
+demo = gr.Interface(
+    fn=gradio_predict,
+    inputs=gr.Video(label="Upload Video"),
+    outputs=[
+        gr.Image(label="Detection Preview"),
+        gr.JSON(label="Results")
+    ],
+    title="Video Emotion Detection",
+    description="Upload a video to detect emotions in faces"
+)
+# Mount Gradio app
+app = gr.mount_gradio_app(app, demo, path="/")
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+torch
+torchvision
+opencv-python
+ultralytics
+gradio
+flask
+numpy
+Pillow