import cv2
from PIL import Image
from transformers import AutoImageProcessor, AutoModelForImageClassification
import torch
import gradio as gr
from spaces import GPU

# Load model and processor
model_name = "Smogy/SMOGY-Ai-images-detector"
processor = AutoImageProcessor.from_pretrained(model_name)
model = AutoModelForImageClassification.from_pretrained(model_name)
model.eval()

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Get label mappings
id2label = model.config.id2label
label2id = {v.lower(): k for k, v in id2label.items()}

# Try to identify AI and Human class indexes
ai_class_index = label2id.get("artificial") or label2id.get("ai") or 0
human_class_index = label2id.get("human") or label2id.get("real") or 1

@GPU
def analyze_video(video):
    cap = cv2.VideoCapture(video)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total_frames == 0:
        return {
            "error": "❌ Could not read video frames."
        }

    num_frames_to_process = min(200, total_frames)
    frame_indices = [int(i * total_frames / num_frames_to_process) for i in range(num_frames_to_process)]

    frames_to_process = []
    current_index = 0
    frame_num = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret or current_index >= len(frame_indices):
            break
        if frame_num == frame_indices[current_index]:
            rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            pil_image = Image.fromarray(rgb)
            frames_to_process.append(pil_image)
            current_index += 1
        frame_num += 1

    cap.release()
    if not frames_to_process:
        return {
            "error": "❌ No frames extracted."
        }

    inputs = processor(images=frames_to_process, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=1)
        predictions = torch.argmax(probs, dim=1).tolist()

    human_confidences = probs[:, human_class_index].tolist()
    ai_confidences = [1 - h for h in human_confidences]

    total = len(ai_confidences)
    ai_frame_count = sum(1 for score in ai_confidences if score >= 0.5)
    ai_frame_ratio = ai_frame_count / total
    avg_score = sum(ai_confidences) / total

    if ai_frame_ratio < 0.25:
        verdict = "✅ Very likely real — no strong signs of AI generation."
    elif ai_frame_ratio < 0.5:
        verdict = "⚠️ Mostly real — a few signs could raise suspicion."
    elif avg_score >= 0.85:
        verdict = "🚨 Definitely AI-generated — high confidence across many frames."
    elif avg_score >= 0.75:
        verdict = "⚠️ Likely AI-generated — multiple signs suggest manipulation."
    else:
        verdict = "🤔 Unclear — the video shows mixed signals."

    return {
        "ai_frame_count": ai_frame_count,
        "total_frames": total,
        "ai_frame_ratio": round(ai_frame_ratio, 3),
        "average_ai_confidence": round(avg_score, 3),
        "verdict": verdict
    }

# Optional: Pretty UI for humans
def analyze_video_ui(video):
    summary = analyze_video(video)
    if "error" in summary:
        return summary["error"]
    return (
        f"Frames flagged as AI-generated: {summary['ai_frame_count']} "
        f"out of {summary['total_frames']} "
        f"({summary['ai_frame_ratio'] * 100:.1f}%)\n"
        f"Average AI Confidence: {summary['average_ai_confidence'] * 100:.1f}%\n"
        f"Final Verdict: {summary['verdict']}"
    )

# Gradio interface
gr.Interface(
    fn=analyze_video_ui,
    inputs=gr.Video(label="Upload a video"),
    outputs=gr.Textbox(label="Detection Results"),
    title="AI Frame Detector",
    description="Analyzes sampled frames from a video to detect whether it's likely AI-generated or real."
).launch()