Spaces:

Rivalcoder
/

Video-Processing

Sleeping

App Files Files Community

Rivalcoder commited on Mar 31, 2025

Commit

d044a6c

1 Parent(s): 4168c5d

Add New Version

Browse files

Files changed (4) hide show

app.py +152 -136
best_emotion_model.pth → models/best_emotion_model.pth +0 -0
requirements.txt +4 -5
yolov8n-face.pt +0 -3

app.py CHANGED Viewed

@@ -1,173 +1,189 @@
 import cv2
 import torch
 import numpy as np
 from PIL import Image
 import torchvision.transforms as transforms
-from ultralytics import YOLO
-import tempfile
 import time
-import os
 import json
 import gradio as gr
-from fastapi import FastAPI, UploadFile, File, HTTPException
-import uvicorn
-# Initialize FastAPI
 app = FastAPI()
-# Global variable for face detections
 largest_face_detections = []
-# Load models
-yolo_model_path = "yolov8n-face.pt"
-emotion_model_path = "best_emotion_model.pth"
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-# Check if models exist
-if os.path.exists(yolo_model_path):
-    yolo_model = YOLO(yolo_model_path)
-else:
-    raise FileNotFoundError(f"YOLO model not found at {yolo_model_path}")
-if os.path.exists(emotion_model_path):
-    from torch import nn
-    class EmotionCNN(nn.Module):
-        def __init__(self, num_classes=7):
-            super(EmotionCNN, self).__init__()
-            self.conv1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=3, padding=1),
-                                       nn.BatchNorm2d(64),
-                                       nn.ReLU(),
-                                       nn.MaxPool2d(kernel_size=2, stride=2))
-            self.fc = nn.Sequential(nn.Linear(64 * 24 * 24, 1024),
-                                    nn.ReLU(),
-                                    nn.Linear(1024, num_classes))
-        def forward(self, x):
-            x = self.conv1(x)
-            x = x.view(x.size(0), -1)
-            x = self.fc(x)
-            return x
-    emotion_model = EmotionCNN(num_classes=7)
-    checkpoint = torch.load(emotion_model_path, map_location=device)
-    emotion_model.load_state_dict(checkpoint['model_state_dict'])
-    emotion_model.to(device)
-    emotion_model.eval()
-else:
-    raise FileNotFoundError(f"Emotion model not found at {emotion_model_path}")
-# Emotion labels
-emotions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
-def preprocess_face(face_img):
-    """Preprocess face image for emotion detection"""
-    transform = transforms.Compose([
-        transforms.Resize((48, 48)),
-        transforms.ToTensor(),
-        transforms.Normalize(mean=[0.5], std=[0.5])
-    ])
-    face_img = Image.fromarray(cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)).convert('L')
-    face_tensor = transform(face_img).unsqueeze(0)
-    return face_tensor
-def process_video(video_path: str):
-    """Process video and return emotion results"""
     global largest_face_detections
-    largest_face_detections = []
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
-        return {"success": False, "message": "Could not open video file"}
     while True:
         ret, frame = cap.read()
         if not ret:
             break
         largest_face_area = 0
         current_detection = None
-        results = yolo_model(frame, stream=True)
-        for result in results:
-            boxes = result.boxes
-            for box in boxes:
-                x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
-                face_img = frame[y1:y2, x1:x2]
-                if face_img.size == 0:
-                    continue
-                face_tensor = preprocess_face(face_img).to(device)
-                with torch.no_grad():
-                    output = emotion_model(face_tensor)
-                    probabilities = torch.nn.functional.softmax(output, dim=1)
-                    emotion_idx = torch.argmax(output, dim=1).item()
-                    confidence = probabilities[0][emotion_idx].item()
-                emotion = emotions[emotion_idx]
-                if (x2 - x1) * (y2 - y1) > largest_face_area:
-                    largest_face_area = (x2 - x1) * (y2 - y1)
-                    current_detection = {"emotion": emotion, "confidence": confidence}
         if current_detection:
             largest_face_detections.append(current_detection)
     cap.release()
     if not largest_face_detections:
-        return {"success": True, "message": "No faces detected", "results": []}
     return {
         "success": True,
-        "message": "Video processed",
-        "results": largest_face_detections
     }
-@app.post("/api/video")
-async def handle_video(file: UploadFile = File(...)):
-    """API endpoint for video emotion detection"""
-    try:
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
-            tmp.write(await file.read())
-            video_path = tmp.name
-        result = process_video(video_path)
-        os.remove(video_path)
-        return result
     except Exception as e:
-        return {"success": False, "message": "Error processing video", "error": str(e)}
-# Gradio UI
-def gradio_process(video):
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
-        tmp.write(video)
-        video_path = tmp.name
-    result = process_video(video_path)
-    os.remove(video_path)
-    return result
-with gr.Blocks() as demo:
-    gr.Markdown("# Video Emotion Analysis")
-    with gr.Row():
-        with gr.Column():
-            video_input = gr.File(label="Upload a video", file_types=[".mp4"])
-            submit_btn = gr.Button("Analyze")
-        with gr.Column():
-            output = gr.JSON(label="Results")
-    submit_btn.click(fn=gradio_process, inputs=video_input, outputs=output)
-app = gr.mount_gradio_app(app, demo, path="/")
-if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=7860)

+import os
 import cv2
 import torch
 import numpy as np
 from PIL import Image
 import torchvision.transforms as transforms
 import time
 import json
+from typing import Dict, Any
+from fastapi import FastAPI, HTTPException, File, UploadFile
+from pydantic import BaseModel
 import gradio as gr
+import shutil
+import tempfile
 app = FastAPI()
+# Global variable to store the history of largest face detections
 largest_face_detections = []
+# EmotionCNN model definition (same as in your original code)
+class EmotionCNN(torch.nn.Module):
+    def __init__(self, num_classes=7):
+        super(EmotionCNN, self).__init__()
+        # Your convolutional layers and other definitions
+        # ...
+    def forward(self, x):
+        # Forward method as in your code
+        pass
+# Load emotion model
+def load_emotion_model(model_path, device='cuda' if torch.cuda.is_available() else 'cpu'):
+    checkpoint = torch.load(model_path, map_location=device)
+    model = EmotionCNN(num_classes=7)
+    model.load_state_dict(checkpoint['model_state_dict'])
+    model.to(device)
+    model.eval()
+    return model
+# Process the uploaded video (either MP4 or WebM)
+def process_video(video_file: UploadFile) -> Dict[str, Any]:
     global largest_face_detections
+    largest_face_detections = []  # Reset detections for new video
+    # Path to models and other setup
+    face_cascade_path = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
+    emotion_model_path = "best_emotion_model.pth"
+    if not os.path.exists(face_cascade_path):
+        raise HTTPException(status_code=400, detail="Face cascade classifier not found")
+    if not os.path.exists(emotion_model_path):
+        raise HTTPException(status_code=400, detail="Emotion model not found")
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    try:
+        face_cascade = cv2.CascadeClassifier(face_cascade_path)
+        emotion_model = load_emotion_model(emotion_model_path, device)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error loading models: {str(e)}")
+    emotions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']
+    # Save the uploaded video file to a temporary directory
+    temp_dir = tempfile.mkdtemp()
+    video_path = os.path.join(temp_dir, "uploaded_video")
+    with open(video_path, "wb") as buffer:
+        shutil.copyfileobj(video_file.file, buffer)
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
+        raise HTTPException(status_code=400, detail=f"Could not open video file at {video_path}")
+    frame_count = 0
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     while True:
         ret, frame = cap.read()
         if not ret:
             break
+        frame_count += 1
         largest_face_area = 0
         current_detection = None
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
+        for (x, y, w, h) in faces:
+            face_area = w * h
+            margin = 20
+            x1 = max(0, x - margin)
+            y1 = max(0, y - margin)
+            x2 = min(frame.shape[1], x + w + margin)
+            y2 = min(frame.shape[0], y + h + margin)
+            face_img = frame[y1:y2, x1:x2]
+            if face_img.size == 0 or face_img.shape[0] < 20 or face_img.shape[1] < 20:
+                continue
+            face_tensor = preprocess_face(face_img)
+            with torch.no_grad():
+                face_tensor = face_tensor.to(device)
+                output = emotion_model(face_tensor)
+                probabilities = torch.nn.functional.softmax(output, dim=1)
+                emotion_idx = torch.argmax(output, dim=1).item()
+                confidence = probabilities[0][emotion_idx].item()
+            emotion = emotions[emotion_idx]
+            if face_area > largest_face_area:
+                largest_face_area = face_area
+                current_detection = {
+                    'emotion': emotion,
+                    'confidence': confidence,
+                    'timestamp': time.time(),
+                    'frame_number': frame_count
+                }
         if current_detection:
             largest_face_detections.append(current_detection)
     cap.release()
     if not largest_face_detections:
+        return {
+            "success": True,
+            "message": "No faces detected in video",
+            "results": [],
+            "error": None
+        }
+    emotions_count = {}
+    for detection in largest_face_detections:
+        emotion = detection['emotion']
+        emotions_count[emotion] = emotions_count.get(emotion, 0) + 1
+    dominant_emotion = max(emotions_count.items(), key=lambda x: x[1])[0]
     return {
         "success": True,
+        "message": "Video processed successfully",
+        "results": {
+            "detections": largest_face_detections,
+            "summary": {
+                "total_frames": total_frames,
+                "total_detections": len(largest_face_detections),
+                "emotions_count": emotions_count,
+                "dominant_emotion": dominant_emotion
+            }
+        },
+        "error": None
     }
+class VideoRequest(BaseModel):
+    path: str
+# FastAPI endpoint for processing the video file
+@app.post("/process_video/")
+async def process_video_request(file: UploadFile = File(...)):
+    try:
+        results = process_video(file)
+        return results
     except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# Gradio interface
+def gradio_interface():
+    def process_gradio_video(video_file):
+        # This function now accepts WebM files and other video formats.
+        return process_video(video_file)
+    interface = gr.Interface(
+        fn=process_gradio_video,
+        inputs=gr.inputs.Video(type="file"),  # 'file' ensures that Gradio handles all formats including WebM
+        outputs="json"
+    )
+    return interface
+# Launch Gradio Interface on FastAPI
+gradio_interface().launch(server_name="0.0.0.0", server_port=7860, share=True)

best_emotion_model.pth → models/best_emotion_model.pth RENAMED Viewed

File without changes

requirements.txt CHANGED Viewed

@@ -1,8 +1,7 @@
-ultralytics
-torch
-torchvision
-gradio
 fastapi
-uvicorn
 opencv-python
 pillow

 fastapi
+gradio
+torch
 opencv-python
 pillow
+torchvision
+uvicorn

yolov8n-face.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d17b38523a994b13ee604b67f02791ca0f43b9f446a32fd7bc44e17c56ead077
-size 6250099