Spaces:

AIDetect-benchmarked
/

Deepfake-Detector

Sleeping

App Files Files Community

AZIIIIIIIIZ commited on Sep 29

Commit

efb1c7c

verified ·

1 Parent(s): 9d10281

Upload app.py

Browse files

Files changed (1) hide show

app.py +66 -186

app.py CHANGED Viewed

@@ -1,186 +1,66 @@
-import os
-import gradio as gr
-import cv2
-import numpy as np
-from PIL import Image
-import torch
-import torchvision.transforms as transforms
-import torchvision.models as models
-# Simple video action recognition using pre-trained models
-class SimpleVideoAnalyzer:
-    def __init__(self):
-        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
-        print(f"Using device: {self.device}")
-        # Load a pre-trained ResNet model for feature extraction
-        self.model = models.resnet50(pretrained=True)
-        self.model.eval()
-        self.model.to(self.device)
-        # Image preprocessing
-        self.transform = transforms.Compose([
-            transforms.Resize((224, 224)),
-            transforms.ToTensor(),
-            transforms.Normalize(mean=[0.485, 0.456, 0.406],
-                               std=[0.229, 0.224, 0.225])
-        ])
-        # Load labels from Kinetics-400 if available, else fallback
-        self.action_categories = self.load_kinetics_labels()
-        print("✅ Simple video analyzer initialized successfully!")
-    def extract_frames(self, video_path, num_frames=8):
-        """Extract frames from video"""
-        cap = cv2.VideoCapture(video_path)
-        frames = []
-        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        # Sample frames evenly
-        frame_indices = np.linspace(0, total_frames-1, num_frames, dtype=int)
-        for idx in frame_indices:
-            cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
-            ret, frame = cap.read()
-            if ret:
-                # Convert BGR to RGB
-                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-                frames.append(frame_rgb)
-        cap.release()
-        return frames
-    def load_kinetics_labels(self):
-        """Load Kinetics-400 class labels if available."""
-        label_file = 'tools/data/kinetics/label_map_k400.txt'
-        if os.path.exists(label_file):
-            try:
-                with open(label_file, 'r') as f:
-                    labels = [line.strip() for line in f.readlines() if line.strip()]
-                if labels:
-                    print(f"✅ Loaded {len(labels)} Kinetics-400 labels from {label_file}")
-                    return labels
-            except Exception:
-                pass
-        print("⚠️ Kinetics labels not found, using fallback categories")
-        return [
-            "walking", "running", "jumping", "sitting", "standing",
-            "dancing", "cooking", "reading", "writing", "typing",
-            "clapping", "waving", "pointing", "lifting", "throwing",
-            "catching", "kicking", "punching", "swimming", "cycling"
-        ]
-    def analyze_frames(self, frames):
-        """Analyze frames and return predictions"""
-        features = []
-        for frame in frames:
-            # Convert to PIL Image
-            pil_image = Image.fromarray(frame)
-            # Preprocess
-            input_tensor = self.transform(pil_image).unsqueeze(0).to(self.device)
-            # Extract features
-            with torch.no_grad():
-                features.append(self.model(input_tensor).cpu().numpy())
-        # Average features across frames (not directly used for class mapping here)
-        _ = np.mean(features, axis=0)
-        # Create deterministic-looking output: 1 dominant class with score 1.0 and
-        # four tiny scores, formatted like the example
-        num_classes = len(self.action_categories)
-        num_return = min(5, num_classes)
-        # Choose a dominant class index (random for demo)
-        dominant_idx = np.random.randint(0, num_classes)
-        # Pick four other unique indices
-        candidate_indices = [i for i in range(num_classes) if i != dominant_idx]
-        np.random.shuffle(candidate_indices)
-        other_indices = candidate_indices[:max(0, num_return - 1)]
-        results = []
-        # Top-1 with score exactly 1.0
-        results.append((self.action_categories[dominant_idx], "1.0"))
-        # Four tiny scores using scientific notation similar to example
-        for i in other_indices:
-            tiny = 10 ** (-(14 + np.random.rand() * 3))  # ~1e-14 to 1e-17
-            results.append((self.action_categories[i], f"{tiny:.15e}"))
-        return results
-    def analyze_video(self, video_path):
-        """Main analysis function"""
-        try:
-            if video_path is None:
-                return "Please upload a video file."
-            print(f"Processing video: {video_path}")
-            # Extract frames
-            frames = self.extract_frames(video_path)
-            if not frames:
-                return "❌ Could not extract frames from video."
-            # Analyze frames
-            results = self.analyze_frames(frames)
-            # Format results to match requested style: "label:  score" per line
-            result_lines = []
-            for label, score in results:
-                result_lines.append(f"{label}:  {score}")
-            result_text = "\n".join(result_lines)
-            result_text += f"\n📊 Analyzed {len(frames)} frames"
-            result_text += f"\n🔧 Using: {self.device.upper()}"
-            return result_text
-        except Exception as e:
-            return f"❌ Error processing video: {str(e)}"
-# Initialize analyzer
-print("🚀 Initializing Simple Video Analyzer...")
-analyzer = SimpleVideoAnalyzer()
-# Create Gradio interface
-def analyze_video(video):
-    """Gradio interface function"""
-    return analyzer.analyze_video(video)
-# Create the interface
-demo = gr.Interface(
-    fn=analyze_video,
-    inputs=gr.Video(label="Upload Video", height=300),
-    outputs=gr.Textbox(label="Analysis Results", lines=15),
-    title="🎬 GenVidBench - Simple Video Action Recognition",
-    description="""
-    **Simple Video Action Recognition Demo**
-    Upload a video to analyze its content using a simplified approach.
-    This demo uses pre-trained ResNet features for basic action recognition.
-    **Features:**
-    - 🎥 Multi-frame analysis
-    - 🧠 Pre-trained ResNet50 features
-    - ⚡ Fast processing
-    - 📊 Top-5 predictions
-    **Supported formats:** MP4, AVI, MOV, etc.
-    **Recommended:** Short videos (under 30 seconds) for best performance.
-    """,
-    examples=[
-        ["demo/demo.mp4"] if os.path.exists("demo/demo.mp4") else None
-    ],
-    cache_examples=False,
-    theme=gr.themes.Soft(),
-    allow_flagging="never"
-)
-if __name__ == "__main__":
-    print("🌟 Starting GenVidBench Simple Demo...")
-    demo.launch()

+import os
+from operator import itemgetter
+import gradio as gr
+from mmaction.apis import init_recognizer, inference_recognizer
+CONFIG_FILE = 'demo/demo_configs/tsn_r50_1x1x8_video_infer.py'
+CHECKPOINT_FILE = 'checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'
+LABEL_FILE = 'tools/data/kinetics/label_map_k400.txt'
+def load_labels(path):
+    if os.path.exists(path):
+        with open(path, 'r') as f:
+            return [x.strip() for x in f if x.strip()]
+    return None
+def build_model():
+    if not os.path.exists(CHECKPOINT_FILE):
+        raise FileNotFoundError(f'Checkpoint not found at {CHECKPOINT_FILE}')
+    return init_recognizer(CONFIG_FILE, CHECKPOINT_FILE, device='cpu')
+print('Initializing model...')
+try:
+    model = build_model()
+    print('✅ Model loaded successfully!')
+except Exception as e:
+    print(f'❌ Error loading model: {e}')
+    model = None
+labels = load_labels(LABEL_FILE)
+def analyze_video(video_path: str):
+    try:
+        if video_path is None:
+            return 'Please upload a video file.'
+        if model is None:
+            return '⚠️ Model not loaded. Check logs for details.'
+        result = inference_recognizer(model, video_path)
+        pred_scores = result.pred_score.tolist()
+        score_sorted = sorted(zip(range(len(pred_scores)), pred_scores), key=itemgetter(1), reverse=True)
+        top5 = score_sorted[:5]
+        lines = []
+        for idx, score in top5:
+            name = labels[idx] if labels and idx < len(labels) else f'class_{idx}'
+            lines.append(f'{name}:  {score}')
+        return '\n'.join(lines)
+    except Exception as e:
+        return f'❌ Error processing video: {str(e)}'
+demo = gr.Interface(
+    fn=analyze_video,
+    inputs=gr.Video(label='Upload Video', height=300),
+    outputs=gr.Textbox(label='Analysis Results', lines=12),
+    title='🎬 GenVidBench - TSN (MMAction2)',
+    description='Upload a video. Inference uses TSN R50 on Kinetics-400.',
+    cache_examples=False,
+    allow_flagging='never'
+)
+if __name__ == '__main__':
+    demo.launch()