import os import subprocess import torch import torch.nn as nn from torchvision import transforms from PIL import Image import gradio as gr import tempfile import base64 import numpy as np SEQUENCE_LENGTH = 16 NUM_CLASSES = 4 MODEL_PATH = "best_model.pth" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") CLASS_NAMES = ["aggressive", "idle", "panic", "normal"] # ------------------ MODEL ------------------ class CNNLSTM(nn.Module): def __init__(self, num_classes): super(CNNLSTM, self).__init__() self.cnn = nn.Sequential( nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2), nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2), ) self.lstm = nn.LSTM(64 * 16 * 16, 128, batch_first=True) self.fc = nn.Linear(128, num_classes) def forward(self, x): B, T, C, H, W = x.size() x = x.view(B*T, C, H, W) x = self.cnn(x) x = x.view(B, T, -1) x, _ = self.lstm(x) return self.fc(x[:, -1, :]) # ------------------ LOAD MODEL ------------------ def load_model(): if not os.path.exists(MODEL_PATH): raise FileNotFoundError("Upload best_model.pth to the Space!") model = CNNLSTM(NUM_CLASSES).to(device) model.load_state_dict(torch.load(MODEL_PATH, map_location=device)) model.eval() return model try: model = load_model() except: model = None # ------------------ FRAME EXTRACTION (FFmpeg) ------------------ def extract_frames_ffmpeg(video_path): tmp_dir = tempfile.mkdtemp() out_pattern = os.path.join(tmp_dir, "frame_%03d.jpg") cmd = [ "ffmpeg", "-i", video_path, "-vf", "fps=1,scale=320:180", out_pattern, "-hide_banner", "-loglevel", "error" ] subprocess.run(cmd) jpgs = sorted([os.path.join(tmp_dir, f) for f in os.listdir(tmp_dir) if f.endswith(".jpg")]) if len(jpgs) == 0: return None if len(jpgs) >= SEQUENCE_LENGTH: idxs = np.linspace(0, len(jpgs)-1, SEQUENCE_LENGTH).astype(int) jpgs = [jpgs[i] for i in idxs] else: jpgs = (jpgs * SEQUENCE_LENGTH)[:SEQUENCE_LENGTH] return [Image.open(f).convert("RGB") for f in jpgs] # ------------------ PREDICTION ------------------ transform = transforms.Compose([ transforms.Resize((64, 64)), transforms.ToTensor(), ]) def do_predict(frames): if model is None: return {"Error": "Model not loaded"} tensors = [transform(f) for f in frames] tensor = torch.stack(tensors).unsqueeze(0).to(device) with torch.no_grad(): out = model(tensor) probs = torch.softmax(out, dim=1)[0].cpu().numpy() return {CLASS_NAMES[i]: float(probs[i]) for i in range(NUM_CLASSES)} def predict(files): if files is None: return {"Error": "Upload a file first!"} if isinstance(files, str): files = [files] # Video if len(files) == 1 and files[0].lower().endswith((".mp4",".mov",".avi",".mkv",".webm")): frames = extract_frames_ffmpeg(files[0]) if frames is None: return {"Error": "FFmpeg could not extract frames!"} return do_predict(frames) # Multiple images if len(files) >= SEQUENCE_LENGTH: imgs = [Image.open(f).convert("RGB") for f in files[:16]] return do_predict(imgs) # Single image try: img = Image.open(files[0]).convert("RGB") frames = [img] * SEQUENCE_LENGTH return do_predict(frames) except: return {"Error": "Invalid image"} # ------------------ CSS (insert via HTML) ------------------ css_html = """ """ # ------------------ REACT FRONTEND (subtitle color updated) ------------------ react_html = """

Crowd Behavior Analyzer

Dark • Glassmorphism • React Autoplay Preview

""" # ------------------ UI ------------------ with gr.Blocks() as demo: gr.HTML(css_html) gr.HTML(react_html) file_input = gr.File( label="Upload video or multiple images", file_count="multiple", type="filepath", elem_id="media_input" ) btn = gr.Button("Analyze", variant="primary") output = gr.Label(num_top_classes=4) btn.click(predict, file_input, output) demo.launch()