| | import cv2 |
| | from PIL import Image |
| | from transformers import AutoImageProcessor, AutoModelForImageClassification |
| | import torch |
| | import gradio as gr |
| | from spaces import GPU |
| |
|
| | |
| | model_name = "Smogy/SMOGY-Ai-images-detector" |
| | processor = AutoImageProcessor.from_pretrained(model_name) |
| | model = AutoModelForImageClassification.from_pretrained(model_name) |
| | model.eval() |
| |
|
| | |
| | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| | model = model.to(device) |
| |
|
| | |
| | id2label = model.config.id2label |
| | label2id = {v.lower(): k for k, v in id2label.items()} |
| |
|
| | |
| | ai_class_index = label2id.get("artificial") or label2id.get("ai") or 0 |
| | human_class_index = label2id.get("human") or label2id.get("real") or 1 |
| |
|
| | @GPU |
| | def analyze_video(video): |
| | cap = cv2.VideoCapture(video) |
| | total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) |
| | if total_frames == 0: |
| | return { |
| | "error": "β Could not read video frames." |
| | } |
| |
|
| | num_frames_to_process = min(200, total_frames) |
| | frame_indices = [int(i * total_frames / num_frames_to_process) for i in range(num_frames_to_process)] |
| |
|
| | frames_to_process = [] |
| | current_index = 0 |
| | frame_num = 0 |
| |
|
| | while cap.isOpened(): |
| | ret, frame = cap.read() |
| | if not ret or current_index >= len(frame_indices): |
| | break |
| | if frame_num == frame_indices[current_index]: |
| | rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
| | pil_image = Image.fromarray(rgb) |
| | frames_to_process.append(pil_image) |
| | current_index += 1 |
| | frame_num += 1 |
| |
|
| | cap.release() |
| | if not frames_to_process: |
| | return { |
| | "error": "β No frames extracted." |
| | } |
| |
|
| | inputs = processor(images=frames_to_process, return_tensors="pt") |
| | inputs = {k: v.to(device) for k, v in inputs.items()} |
| |
|
| | with torch.no_grad(): |
| | outputs = model(**inputs) |
| | probs = torch.nn.functional.softmax(outputs.logits, dim=1) |
| | predictions = torch.argmax(probs, dim=1).tolist() |
| |
|
| | human_confidences = probs[:, human_class_index].tolist() |
| | ai_confidences = [1 - h for h in human_confidences] |
| |
|
| | total = len(ai_confidences) |
| | ai_frame_count = sum(1 for score in ai_confidences if score >= 0.5) |
| | ai_frame_ratio = ai_frame_count / total |
| | avg_score = sum(ai_confidences) / total |
| |
|
| | if ai_frame_ratio < 0.25: |
| | verdict = "β
Very likely real β no strong signs of AI generation." |
| | elif ai_frame_ratio < 0.5: |
| | verdict = "β οΈ Mostly real β a few signs could raise suspicion." |
| | elif avg_score >= 0.85: |
| | verdict = "π¨ Definitely AI-generated β high confidence across many frames." |
| | elif avg_score >= 0.75: |
| | verdict = "β οΈ Likely AI-generated β multiple signs suggest manipulation." |
| | else: |
| | verdict = "π€ Unclear β the video shows mixed signals." |
| |
|
| | return { |
| | "ai_frame_count": ai_frame_count, |
| | "total_frames": total, |
| | "ai_frame_ratio": round(ai_frame_ratio, 3), |
| | "average_ai_confidence": round(avg_score, 3), |
| | "verdict": verdict |
| | } |
| |
|
| | |
| | def analyze_video_ui(video): |
| | summary = analyze_video(video) |
| | if "error" in summary: |
| | return summary["error"] |
| | return ( |
| | f"Frames flagged as AI-generated: {summary['ai_frame_count']} " |
| | f"out of {summary['total_frames']} " |
| | f"({summary['ai_frame_ratio'] * 100:.1f}%)\n" |
| | f"Average AI Confidence: {summary['average_ai_confidence'] * 100:.1f}%\n" |
| | f"Final Verdict: {summary['verdict']}" |
| | ) |
| |
|
| | |
| | gr.Interface( |
| | fn=analyze_video_ui, |
| | inputs=gr.Video(label="Upload a video"), |
| | outputs=gr.Textbox(label="Detection Results"), |
| | title="AI Frame Detector", |
| | description="Analyzes sampled frames from a video to detect whether it's likely AI-generated or real." |
| | ).launch() |