import cv2 from PIL import Image from transformers import AutoImageProcessor, AutoModelForImageClassification import torch import gradio as gr from spaces import GPU # Load model and processor model_name = "Smogy/SMOGY-Ai-images-detector" processor = AutoImageProcessor.from_pretrained(model_name) model = AutoModelForImageClassification.from_pretrained(model_name) model.eval() # Use GPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) # Get label mappings id2label = model.config.id2label label2id = {v.lower(): k for k, v in id2label.items()} # Try to identify AI and Human class indexes ai_class_index = label2id.get("artificial") or label2id.get("ai") or 0 human_class_index = label2id.get("human") or label2id.get("real") or 1 @GPU def analyze_video(video): cap = cv2.VideoCapture(video) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) if total_frames == 0: return { "error": "❌ Could not read video frames." } num_frames_to_process = min(200, total_frames) frame_indices = [int(i * total_frames / num_frames_to_process) for i in range(num_frames_to_process)] frames_to_process = [] current_index = 0 frame_num = 0 while cap.isOpened(): ret, frame = cap.read() if not ret or current_index >= len(frame_indices): break if frame_num == frame_indices[current_index]: rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) pil_image = Image.fromarray(rgb) frames_to_process.append(pil_image) current_index += 1 frame_num += 1 cap.release() if not frames_to_process: return { "error": "❌ No frames extracted." } inputs = processor(images=frames_to_process, return_tensors="pt") inputs = {k: v.to(device) for k, v in inputs.items()} with torch.no_grad(): outputs = model(**inputs) probs = torch.nn.functional.softmax(outputs.logits, dim=1) predictions = torch.argmax(probs, dim=1).tolist() human_confidences = probs[:, human_class_index].tolist() ai_confidences = [1 - h for h in human_confidences] total = len(ai_confidences) ai_frame_count = sum(1 for score in ai_confidences if score >= 0.5) ai_frame_ratio = ai_frame_count / total avg_score = sum(ai_confidences) / total if ai_frame_ratio < 0.25: verdict = "✅ Very likely real — no strong signs of AI generation." elif ai_frame_ratio < 0.5: verdict = "⚠️ Mostly real — a few signs could raise suspicion." elif avg_score >= 0.85: verdict = "🚨 Definitely AI-generated — high confidence across many frames." elif avg_score >= 0.75: verdict = "⚠️ Likely AI-generated — multiple signs suggest manipulation." else: verdict = "🤔 Unclear — the video shows mixed signals." return { "ai_frame_count": ai_frame_count, "total_frames": total, "ai_frame_ratio": round(ai_frame_ratio, 3), "average_ai_confidence": round(avg_score, 3), "verdict": verdict } # Optional: Pretty UI for humans def analyze_video_ui(video): summary = analyze_video(video) if "error" in summary: return summary["error"] return ( f"Frames flagged as AI-generated: {summary['ai_frame_count']} " f"out of {summary['total_frames']} " f"({summary['ai_frame_ratio'] * 100:.1f}%)\n" f"Average AI Confidence: {summary['average_ai_confidence'] * 100:.1f}%\n" f"Final Verdict: {summary['verdict']}" ) # Gradio interface gr.Interface( fn=analyze_video_ui, inputs=gr.Video(label="Upload a video"), outputs=gr.Textbox(label="Detection Results"), title="AI Frame Detector", description="Analyzes sampled frames from a video to detect whether it's likely AI-generated or real." ).launch()