Spaces:

usman-khn
/

Crowd-Behavior-Detection

Sleeping

App Files Files Community

usman-khn commited on Nov 23, 2025

Commit

8665c26

verified ·

1 Parent(s): d40a474

Update app.py

Browse files

Files changed (1) hide show

app.py +155 -362

app.py CHANGED Viewed

@@ -1,30 +1,25 @@
 # app.py
-# Final production-ready Gradio + embedded React UI for Hugging Face Spaces
-# Dark theme + glassmorphism + React-based preview + autoplay frames
-# Make sure best_model.pth is uploaded into the same directory.
-# Local source path (for tooling): /mnt/data/app.py
-SOURCE_APP_PATH = "/mnt/data/app.py"
 import os
 import torch
 import torch.nn as nn
 from torchvision import transforms
 from PIL import Image
-import numpy as np
 import gradio as gr
-import cv2
 import tempfile
 import base64
-from typing import List, Union
-# ---------------------- MODEL CONFIG ----------------------
 SEQUENCE_LENGTH = 16
 NUM_CLASSES = 4
 MODEL_PATH = "best_model.pth"
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 CLASS_NAMES = ["aggressive", "idle", "panic", "normal"]
-# ---------------------- MODEL DEFINITION ----------------------
 class CNNLSTM(nn.Module):
     def __init__(self, num_classes):
         super(CNNLSTM, self).__init__()
@@ -48,401 +43,199 @@ class CNNLSTM(nn.Module):
         x, _ = self.lstm(x)
         return self.fc(x[:, -1, :])
-# ---------------------- LOAD MODEL ----------------------
 def load_model():
     if not os.path.exists(MODEL_PATH):
-        raise FileNotFoundError("Model file missing: best_model.pth in repository root.")
-    model = CNNLSTM(num_classes=NUM_CLASSES).to(device)
     model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
     model.eval()
     return model
 try:
     model = load_model()
-except Exception as e:
     model = None
-    print("Model Load Error:", e)
-# ---------------------- TRANSFORMS ----------------------
-transform = transforms.Compose([
-    transforms.Resize((64, 64)),
-    transforms.ToTensor(),
-])
-# ---------------------- VIDEO FRAME EXTRACTION ----------------------
-def extract_frames_from_video(video_path: str, num_frames: int = SEQUENCE_LENGTH):
     """
-    Extract `num_frames` evenly spaced frames from video file path.
-    Returns list[PIL.Image] or None if not enough frames.
     """
-    frames = []
-    cap = cv2.VideoCapture(video_path)
-    if not cap.isOpened():
-        cap.release()
-        return None
-    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    if total < num_frames:
-        cap.release()
-        return None
-    interval = max(1, total // num_frames)
-    idx = 0
-    while len(frames) < num_frames:
-        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
-        ret, frame = cap.read()
-        if not ret:
-            break
-        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-        frames.append(Image.fromarray(frame))
-        idx += interval
-    cap.release()
-    if len(frames) < num_frames:
         return None
-    return frames[:num_frames]
-# ---------------------- PREDICTION ----------------------
-def predict_from_frames(frames: List[Image.Image]):
     if model is None:
-        return {"Error": "Model failed to load. Please upload 'best_model.pth' to the repo."}
-    if len(frames) != SEQUENCE_LENGTH:
-        return {"Error": f"Need exactly {SEQUENCE_LENGTH} frames (got {len(frames)})."}
-    try:
-        frame_tensors = []
-        for img in frames:
-            if img.mode != 'RGB':
-                img = img.convert('RGB')
-            frame_tensors.append(transform(img))
-        video_tensor = torch.stack(frame_tensors).unsqueeze(0).to(device)  # (1, T, C, H, W)
-        with torch.no_grad():
-            outputs = model(video_tensor)
-        probs = torch.softmax(outputs, dim=1)[0].cpu().numpy().tolist()
-        return {CLASS_NAMES[i]: float(probs[i]) for i in range(NUM_CLASSES)}
-    except Exception as e:
-        return {"Error": f"Prediction failed: {str(e)}"}
-def predict(input_files: Union[str, List[str]]):
-    """
-    Accepts:
-    - single video filepath string
-    - single image filepath string
-    - list of image filepaths (multiple)
-    Returns label probabilities dict for Gradio Label.
-    """
-    # Video path (string) or list of file paths
-    # Gradio returns a list when file_count="multiple"
-    files = input_files
     if files is None:
-        return {"Error": "No file provided."}
-    # If a single str path (gr.File with single), normalize to list
     if isinstance(files, str):
         files = [files]
-    # If single file and it's a video -> extract frames
-    if len(files) == 1:
-        f = files[0]
-        lower = f.lower()
-        if lower.endswith(('.mp4', '.mov', '.avi', '.mkv', '.webm')):
-            frames = extract_frames_from_video(f, SEQUENCE_LENGTH)
-            if frames is None:
-                return {"Error": "Video too short or couldn't be read. Needs at least 16 frames."}
-            return predict_from_frames(frames)
-        else:
-            # Treat single image: repeat the same frame to make sequence
-            try:
-                img = Image.open(f)
-                frames = [img.convert("RGB")] * SEQUENCE_LENGTH
-                return predict_from_frames(frames)
-            except Exception as e:
-                return {"Error": f"Could not open image: {e}"}
-    # If multiple files: assume images, take first SEQUENCE_LENGTH
-    if len(files) >= SEQUENCE_LENGTH:
-        imgs = []
-        for p in files[:SEQUENCE_LENGTH]:
-            try:
-                imgs.append(Image.open(p).convert("RGB"))
-            except Exception as e:
-                return {"Error": f"Failed to open one of the images: {e}"}
-        return predict_from_frames(imgs)
-    else:
-        return {"Error": f"Need at least {SEQUENCE_LENGTH} image files (got {len(files)})."}
-# ---------------------- GRADIO UI (Blocks) ----------------------
-# We'll embed a small React app inside an HTML block to provide an advanced preview,
-# autoplay frames, and glass/dark UI. The React app listens to the file input with id "media_input"
-# (we set elem_id for the Gradio file component).
 css = """
-/* Dark glassmorphism styles */
-:root{
-  --bg:#0b0f12;
-  --card: rgba(255,255,255,0.04);
-  --glass: rgba(255,255,255,0.06);
-  --accent: rgba(59,130,246,0.9);
-  --muted: rgba(255,255,255,0.6);
-}
-body, .gradio-container {
-  background: linear-gradient(180deg, #071018 0%, #0b0f12 100%) !important;
-  color: #E6EEF3 !important;
-}
-.gradio-container .block {
-  background: transparent !important;
-}
-/* glass card */
 .glass {
-  background: var(--glass);
-  backdrop-filter: blur(8px) saturate(120%);
-  -webkit-backdrop-filter: blur(8px) saturate(120%);
-  border-radius: 14px;
-  border: 1px solid rgba(255,255,255,0.06);
-  padding: 18px;
-  box-shadow: 0 6px 24px rgba(2,6,23,0.6);
-}
-.title {
-  font-family: Inter, ui-sans-serif, system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial;
-  font-weight: 700;
-  font-size: 28px;
-  letter-spacing: -0.2px;
-}
-.subtitle {
-  color: var(--muted);
-  margin-top: 6px;
-  margin-bottom: 10px;
-}
-.controls {
-  display:flex;
-  gap:12px;
-  align-items:center;
-}
-.preview-area {
-  display:flex;
-  gap:12px;
-  align-items:center;
-  justify-content:center;
-  flex-wrap:wrap;
-  margin-top:12px;
-}
-#react-root {
-  width:100%;
-}
-.frame-thumb {
-  width: 120px;
-  height: 80px;
-  object-fit: cover;
-  border-radius:8px;
-  border: 1px solid rgba(255,255,255,0.04);
-  box-shadow: 0 8px 20px rgba(2,6,23,0.5);
-}
-.video-preview {
-  max-width: 420px;
-  border-radius: 12px;
-  overflow: hidden;
-  border: 1px solid rgba(255,255,255,0.04);
-}
-.info {
-  color: var(--muted);
-  font-size: 13px;
-}
-.btn-ghost {
-  background: transparent;
-  border: 1px solid rgba(255,255,255,0.06);
-  padding: 8px 12px;
-  border-radius: 10px;
-  color: var(--muted);
-}
-.small {
-  font-size: 13px;
-}
-.footer {
-  text-align:center;
-  color: var(--muted);
-  font-size:12px;
-  margin-top:12px;
 }
 """
-# HTML + React app embed (CDN-based React for simplicity)
 react_html = """
-<div class="glass" style="padding:16px;">
-  <div style="display:flex;justify-content:space-between;align-items:center;">
-    <div>
-      <div class="title">Crowd Behavior Analyzer</div>
-      <div class="subtitle">Dark • Glassmorphism • React preview • Autoplay frames</div>
-    </div>
-    <div style="text-align:right;">
-      <div class="info">Model: CNN-LSTM | Frames: 16</div>
-    </div>
-  </div>
-  <div style="margin-top:12px;">
-    <div id="react-root"></div>
-  </div>
-  <div class="footer">Upload a video or images using the file picker below. Use "Analyze" to run the model.</div>
 </div>
-<!-- React and app script -->
 <script crossorigin src="https://unpkg.com/react@18/umd/react.production.min.js"></script>
 <script crossorigin src="https://unpkg.com/react-dom@18/umd/react-dom.production.min.js"></script>
 <script>
 const e = React.createElement;
-function PreviewApp(){
-  const [frames, setFrames] = React.useState([]);
-  const [isVideo, setIsVideo] = React.useState(false);
-  const [autoplay, setAutoplay] = React.useState(true);
-  const [playingIndex, setPlayingIndex] = React.useState(0);
-  const intervalRef = React.useRef(null);
-  // Connect to the Gradio file input by elem_id
-  React.useEffect(() => {
-    const input = document.getElementById("media_input");
-    if(!input) return;
-    function handleFiles(event) {
-      const files = input.files;
-      if(!files || files.length === 0){
-        setFrames([]);
-        return;
-      }
-      // If single file and it's video
-      if(files.length === 1 && files[0].type.startsWith("video/")){
-        setIsVideo(true);
-        const url = URL.createObjectURL(files[0]);
-        // create a video element, sample frames
-        const video = document.createElement("video");
-        video.src = url;
-        video.crossOrigin = "anonymous";
-        video.muted = true;
-        video.playsInline = true;
-        video.addEventListener('loadedmetadata', async () => {
-          const duration = video.duration;
-          const canvas = document.createElement('canvas');
-          const ctx = canvas.getContext('2d');
-          canvas.width = 320;
-          canvas.height = 180;
-          const count = 16;
-          const newFrames = [];
-          for(let i=0;i<count;i++){
-            const t = Math.min(duration * (i / count), duration - 0.05);
-            await new Promise((res) => {
-              video.currentTime = t;
-              video.addEventListener('seeked', function handler(){
-                ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
-                newFrames.push(canvas.toDataURL('image/jpeg', 0.7));
-                video.removeEventListener('seeked', handler);
-                res();
-              });
-            });
-          }
-          setFrames(newFrames);
-          setPlayingIndex(0);
-        }, {once:true});
-      } else {
-        // treat as images
-        setIsVideo(false);
-        const imageFiles = Array.from(files).slice(0,16);
-        const readers = imageFiles.map(f => {
-          return new Promise((res, rej) => {
-            const fr = new FileReader();
-            fr.onload = () => res(fr.result);
-            fr.onerror = rej;
-            fr.readAsDataURL(f);
-          });
         });
-        Promise.all(readers).then(results => {
-          const picked = results.slice(0,16);
-          // If fewer than 16, repeat to make 16 visually
-          while(picked.length < 16){
-            picked.push(picked[picked.length % picked.length] || picked[0]);
-          }
-          setFrames(picked.slice(0,16));
-          setPlayingIndex(0);
-        }).catch(()=> setFrames([]));
-      }
-    }
-    input.addEventListener('change', handleFiles);
-    return () => input.removeEventListener('change', handleFiles);
-  }, []);
-  // autoplay logic
-  React.useEffect(() => {
-    if(autoplay && frames.length > 0){
-      intervalRef.current = setInterval(() => {
-        setPlayingIndex(p => (p+1) % frames.length);
-      }, 400);
-      return () => clearInterval(intervalRef.current);
-    } else {
-      if(intervalRef.current) clearInterval(intervalRef.current);
-    }
-  }, [autoplay, frames]);
-  return e('div', {style:{display:'flex', gap:16, flexWrap:'wrap', alignItems:'flex-start'}},
-    e('div', {style:{flex:'1 1 420px', minWidth:320}},
-      e('div', {className:"video-preview", style:{padding:12, display:'flex', justifyContent:'center', alignItems:'center', background:'linear-gradient(180deg, rgba(255,255,255,0.02), rgba(255,255,255,0.01))'}},
-        frames.length > 0 ? e('img', {src: frames[playingIndex], style:{width:'100%', height:'240px', objectFit:'cover', borderRadius:8}}) : e('div', {style:{padding:30, textAlign:'center', color:'rgba(255,255,255,0.6)'}}, "Preview will appear here")
-      ),
-      e('div', {style:{display:'flex', justifyContent:'space-between', marginTop:8}},
-        e('div', {className:'small info'}, frames.length ? `${frames.length} frames prepared` : 'No frames prepared'),
-        e('div', {},
-          e('button', {className:'btn-ghost small', onClick: ()=> setAutoplay(a=>!a)}, autoplay? 'Pause' : 'Autoplay')
-        )
-      )
-    ),
-    e('div', {style:{flex:'0 1 320px', minWidth:260}},
-      e('div', {style:{display:'grid', gridTemplateColumns:'repeat(2,1fr)', gap:8}},
-        frames.slice(0,8).map((f,i) => e('img', {key:i, src:f, className:'frame-thumb', onClick: ()=> setPlayingIndex(i)})),
-        frames.slice(8,16).map((f,i) => e('img', {key:8+i, src:f, className:'frame-thumb', onClick: ()=> setPlayingIndex(8+i)}))
-      ),
-      e('div', {style:{marginTop:12, color:'var(--muted)', fontSize:13}},
-        "Click thumbnails to jump to frame. Drag files to the file picker below to update preview."
-      )
-    )
   );
 }
-const domRoot = document.getElementById('react-root');
-if(domRoot) {
-  ReactDOM.createRoot(domRoot).render(React.createElement(PreviewApp));
-}
 </script>
 """
-# Build the Gradio App
-with gr.Blocks(css=css, title="Crowd Behavior Analyzer — Dark Glass UI") as demo:
-    with gr.Row():
-        gr.Markdown("<div style='font-size:12px;color:rgba(255,255,255,0.45)'>Source: {}</div>".format(SOURCE_APP_PATH))
-    # top glass/react preview
-    demo.append(gr.HTML(react_html))
-    with gr.Row(elem_id="controls_row"):
-        file_input = gr.File(label="Upload Video (.mp4/.mov/.avi/.mkv) or Images (select multiple)", file_count="multiple", type="filepath", elem_id="media_input")
-        analyze_btn = gr.Button("Analyze", variant="primary")
-    with gr.Row():
-        result_label = gr.Label(num_top_classes=NUM_CLASSES, label="Prediction (Probabilities)")
-    with gr.Row():
-        notes = gr.Markdown("""
-        **How to use**
-        - Upload a single **video**: app will sample 16 frames automatically.
-        - Upload a single **image**: image will be repeated to form a 16-frame input (quick test).
-        - Upload **multiple images**: first 16 images will be used.
-        """)
-    with gr.Row():
-        footer = gr.Markdown("<div style='color:rgba(255,255,255,0.45);font-size:12px'>© Crowd Analyzer • Dark Glass UI</div>")
-    # Wire up interactions
-    analyze_btn.click(fn=predict, inputs=file_input, outputs=result_label)
-# Launch
-if __name__ == "__main__":
-    # For Spaces, Gradio will handle host/port automatically.
-    demo.launch(server_name="0.0.0.0", share=False)

 # app.py
+# FINAL VERSION — No OpenCV. Works on Hugging Face Spaces.
+# Dark theme + Glassmorphism + React autoplay preview
+# Just upload this + best_model.pth
 import os
+import subprocess
 import torch
 import torch.nn as nn
 from torchvision import transforms
 from PIL import Image
 import gradio as gr
 import tempfile
 import base64
 SEQUENCE_LENGTH = 16
 NUM_CLASSES = 4
 MODEL_PATH = "best_model.pth"
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 CLASS_NAMES = ["aggressive", "idle", "panic", "normal"]
+# ------------------ MODEL ------------------
 class CNNLSTM(nn.Module):
     def __init__(self, num_classes):
         super(CNNLSTM, self).__init__()
         x, _ = self.lstm(x)
         return self.fc(x[:, -1, :])
+# ------------------ LOAD MODEL ------------------
 def load_model():
     if not os.path.exists(MODEL_PATH):
+        raise FileNotFoundError("Upload best_model.pth to the repository.")
+    model = CNNLSTM(NUM_CLASSES).to(device)
     model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
     model.eval()
     return model
 try:
     model = load_model()
+except:
     model = None
+# ------------------ FRAME EXTRACTION (FFmpeg) ------------------
+def extract_frames_ffmpeg(video_path):
     """
+    Extract 16 evenly spaced frames using FFmpeg (preinstalled on Hugging Face Spaces).
+    Returns list[PIL.Image].
     """
+    tmp_dir = tempfile.mkdtemp()
+    cmd = [
+        "ffmpeg",
+        "-i", video_path,
+        "-vf", f"fps=1,scale=320:180",
+        os.path.join(tmp_dir, "frame_%03d.jpg"),
+        "-hide_banner",
+        "-loglevel", "error"
+    ]
+    subprocess.run(cmd)
+    frames = sorted([os.path.join(tmp_dir, f) for f in os.listdir(tmp_dir) if f.endswith(".jpg")])
+    if len(frames) == 0:
         return None
+    # sample exactly 16 frames evenly
+    if len(frames) >= SEQUENCE_LENGTH:
+        import numpy as np
+        idxs = np.linspace(0, len(frames)-1, SEQUENCE_LENGTH).astype(int)
+        frames = [frames[i] for i in idxs]
+    else:
+        # repeat frames
+        frames = (frames * 16)[:16]
+    pil_frames = [Image.open(f).convert("RGB") for f in frames]
+    return pil_frames
+# ------------------ PREDICTION ------------------
+transform = transforms.Compose([
+    transforms.Resize((64, 64)),
+    transforms.ToTensor(),
+])
+def run_prediction(frames):
     if model is None:
+        return {"Error": "Model not loaded."}
+    tensors = [transform(f) for f in frames]
+    video_tensor = torch.stack(tensors).unsqueeze(0).to(device)
+    with torch.no_grad():
+        out = model(video_tensor)
+    probs = torch.softmax(out, dim=1)[0].cpu().numpy()
+    return {CLASS_NAMES[i]: float(probs[i]) for i in range(NUM_CLASSES)}
+def predict(files):
     if files is None:
+        return {"Error": "Upload a file."}
+    # Normalize file list
     if isinstance(files, str):
         files = [files]
+    # CASE 1: video
+    if len(files) == 1 and files[0].lower().endswith((".mp4",".mov",".avi",".mkv",".webm")):
+        frames = extract_frames_ffmpeg(files[0])
+        if frames is None:
+            return {"Error": "Unable to extract frames from video."}
+        return run_prediction(frames)
+    # CASE 2: multiple images
+    if len(files) >= 16:
+        frames = [Image.open(f).convert("RGB") for f in files[:16]]
+        return run_prediction(frames)
+    # CASE 3: single image
+    try:
+        img = Image.open(files[0]).convert("RGB")
+        frames = [img] * 16
+        return run_prediction(frames)
+    except:
+        return {"Error": "Invalid image."}
+# ------------------ UI & React ------------------
 css = """
+body, .gradio-container { background: #0b0f12 !important; color: white !important; }
 .glass {
+    backdrop-filter: blur(12px) saturate(180%);
+    background: rgba(255,255,255,0.06);
+    border-radius: 16px;
+    padding: 20px;
+    border: 1px solid rgba(255,255,255,0.08);
+    box-shadow: 0 4px 40px rgba(0,0,0,0.4);
 }
 """
 react_html = """
+<div class="glass">
+  <h1 style="margin:0; font-size:28px;">Crowd Behavior Analyzer</h1>
+  <p style="opacity:0.7;">React Preview • Dark • Glassmorphism • Autoplay Frames</p>
+  <div id="react-root"></div>
 </div>
 <script crossorigin src="https://unpkg.com/react@18/umd/react.production.min.js"></script>
 <script crossorigin src="https://unpkg.com/react-dom@18/umd/react-dom.production.min.js"></script>
 <script>
 const e = React.createElement;
+function App(){
+  const [frames,setFrames] = React.useState([]);
+  const [index,setIndex] = React.useState(0);
+  React.useEffect(()=>{
+    const fileInput = document.getElementById("media_input");
+    if(!fileInput) return;
+    const handle = (evt)=>{
+      const files = fileInput.files;
+      if(!files || files.length === 0) return;
+      // images only for UI preview
+      const readers = [...files].slice(0,16).map(file => {
+        return new Promise((res)=>{
+          const r = new FileReader();
+          r.onload = ()=>res(r.result);
+          r.readAsDataURL(file);
         });
+      });
+      Promise.all(readers).then(imgs=>{
+        if(imgs.length === 0) return;
+        while(imgs.length < 16) imgs.push(imgs[0]);
+        setFrames(imgs.slice(0,16));
+        setIndex(0);
+      });
+    };
+    fileInput.addEventListener("change",handle);
+    return ()=>fileInput.removeEventListener("change",handle);
+  },[]);
+  React.useEffect(()=>{
+    if(frames.length === 0) return;
+    const t = setInterval(()=>setIndex(i=>(i+1)%frames.length),350);
+    return ()=>clearInterval(t);
+  },[frames]);
+  return e("div",{},
+    frames.length
+      ? e("img",{src:frames[index], style:{width:"100%",borderRadius:"12px"}})
+      : e("p",{style:{opacity:0.6}},"Preview will appear here after upload.")
   );
 }
+ReactDOM.createRoot(document.getElementById("react-root")).render(e(App));
 </script>
 """
+with gr.Blocks(css=css) as demo:
+    gr.HTML(react_html)
+    file_input = gr.File(
+        label="Upload Video or Images",
+        file_count="multiple",
+        type="filepath",
+        elem_id="media_input"
+    )
+    btn = gr.Button("Analyze Behavior", variant="primary")
+    output = gr.Label(num_top_classes=4)
+    btn.click(fn=predict, inputs=file_input, outputs=output)
+demo.launch()