Spaces:

gnikhilchand
/

misinformation_detector_video

Sleeping

App Files Files Community

gnikhilchand commited on Jan 7

Commit

acce773

verified ·

1 Parent(s): e540bdf

Create app.py

Browse files

Files changed (1) hide show

app.py +112 -0

app.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import gradio as gr
+import torch
+from torch import nn
+from PIL import Image
+from transformers import CLIPProcessor, CLIPModel
+import cv2
+import numpy as np
+# --- 1. Define Model Architecture (Must match your training script) ---
+class CLIPImageClassifier(nn.Module):
+    def __init__(self, clip_model_name="openai/clip-vit-base-patch32"):
+        super(CLIPImageClassifier, self).__init__()
+        self.clip = CLIPModel.from_pretrained(clip_model_name)
+        self.classifier = nn.Sequential(
+            nn.Linear(self.clip.config.vision_config.hidden_size, 256),
+            nn.ReLU(),
+            nn.Dropout(0.5),
+            nn.Linear(256, 1),
+            nn.Sigmoid()
+        )
+    def forward(self, pixel_values):
+        vision_outputs = self.clip.vision_model(pixel_values=pixel_values)
+        image_features = vision_outputs.pooler_output
+        return self.classifier(image_features)
+# --- 2. Load Model & Processor ---
+DEVICE = "cpu"  # Force CPU for Hugging Face Free Tier
+MODEL_PATH = "best_clip_finetuned_classifier.pth"
+CLIP_NAME = "openai/clip-vit-base-patch32"
+print("Loading model...")
+model = CLIPImageClassifier()
+# Load weights with strict=False to ignore potential extra keys
+model.load_state_dict(torch.load(MODEL_PATH, map_location=torch.device(DEVICE)), strict=False)
+model.to(DEVICE)
+model.eval()
+print("Loading processor...")
+processor = CLIPProcessor.from_pretrained(CLIP_NAME)
+# --- 3. Define Inference Function ---
+def predict_video(video_path):
+    """
+    Gradio passes the 'video_path' as a string to the temporary file.
+    """
+    if video_path is None:
+        return "Please upload a video.", 0.0
+    print(f"Processing video: {video_path}")
+    cap = cv2.VideoCapture(video_path)
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    if fps == 0 or np.isnan(fps):
+        fps = 30 # Default fallback
+    # Sample 1 frame every second to keep it fast on CPU
+    frames_to_sample = 1
+    frame_skip = max(1, int(fps / frames_to_sample))
+    predictions = []
+    frame_count = 0
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
+            break
+        if frame_count % frame_skip == 0:
+            # Convert BGR (OpenCV) to RGB (PIL)
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            pil_image = Image.fromarray(frame_rgb)
+            # Preprocess
+            inputs = processor(images=pil_image, return_tensors="pt")['pixel_values'].to(DEVICE)
+            # Inference
+            with torch.no_grad():
+                output = model(inputs)
+                prob = output.item()
+                predictions.append(prob)
+        frame_count += 1
+    cap.release()
+    if not predictions:
+        return "Could not analyze video frames.", 0.0
+    # Aggregate results
+    avg_fake_prob = sum(predictions) / len(predictions)
+    # Create Final Label
+    label = "FAKE" if avg_fake_prob > 0.5 else "REAL"
+    confidence = avg_fake_prob if label == "FAKE" else (1 - avg_fake_prob)
+    return f"{label} (Confidence: {confidence:.2%})", avg_fake_prob
+# --- 4. Create Gradio Interface ---
+interface = gr.Interface(
+    fn=predict_video,
+    inputs=gr.Video(label="Upload Video"),
+    outputs=[
+        gr.Textbox(label="Verdict"),
+        gr.Number(label="Fake Probability Score (0=Real, 1=Fake)")
+    ],
+    title="DeepFake Video Detector",
+    description="Upload a video to check if it is Real or AI-Generated. The model analyzes using a fine-tuned CLIP classifier."
+)
+# Launch the app
+if __name__ == "__main__":
+    interface.launch()