import moviepy.editor as mp
import cv2
import numpy as np
from PIL import Image
import os
import json
from datetime import datetime
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch

# BLIP setup
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

class VideoSceneAnalyzer:
    def __init__(self, video_path, scene_duration=8):
        self.video_path = video_path
        self.scene_duration = scene_duration
        self.clip = None
        self.video_info = {}
        self.scenes = []

    def load_video(self):
        try:
            self.clip = mp.VideoFileClip(self.video_path)
            self.video_info = {
                'duration': self.clip.duration,
                'fps': self.clip.fps,
                'size': self.clip.size,
                'aspect_ratio': self.clip.size[0] / self.clip.size[1],
                'total_frames': int(self.clip.duration * self.clip.fps)
            }
            return True
        except Exception as e:
            print(f"Erro ao carregar vídeo: {e}")
            return False

    def describe_image_and_generate_prompt(self, frame, idx):
        tmp_path = f"temp_{idx:02d}.jpg"
        Image.fromarray(np.uint8(frame)).save(tmp_path)
        img = Image.open(tmp_path).convert("RGB")

        inputs = blip_processor(images=img, return_tensors="pt")
        out = blip_model.generate(**inputs)
        cap = blip_processor.decode(out[0], skip_special_tokens=True).strip().capitalize()

        descricao = (
            f"Cena {idx}: {cap}. "
            "Registrada ao entardecer com luz dourada suave, profundidade de campo rasa desfocando o fundo. "
            "O ambiente apresenta uma paisagem urbana vibrante com vegetação e construções ao longe, "
            "enquanto o sujeito exibe expressão serena e postura confiante. "
            "Atmosfera de leve nostalgia e contemplação."
        )

        prompt = (
            f"A cinematic, warm golden-hour shot of {cap}. "
            "Captured with a 35mm lens at f/1.8 for shallow depth of field, "
            "soft backlighting, and gentle handheld motion. "
            "Background features a vibrant urban setting with trees and distant buildings, "
            "evoking a sense of nostalgia and calm. "
            "--ar 16:9 --v 6 --style photorealistic --quality 2"
        )

        negative = (
            "--no (CGI artifacts, plastic textures, overexposed skies, cartoonish colors, static poses, low detail)"
        )

        return descricao, prompt, negative

    def extract_keyframes(self, threshold=30.0):
        output_dir = f"keyframes_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
        os.makedirs(output_dir, exist_ok=True)

        cap = cv2.VideoCapture(self.video_path)
        prev = None
        count = 0
        success, frame = cap.read()
        self.scenes = []

        while success:
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            diff = np.inf if prev is None else np.mean(cv2.absdiff(gray, prev))
            if prev is None or diff > threshold:
                timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0
                img_path = os.path.join(output_dir, f"keyframe_{count+1:02d}.jpg")
                Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).save(img_path)
                desc, prmpt, neg = self.describe_image_and_generate_prompt(
                    cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), count+1
                )
                self.scenes.append({
                    'scene_number': count+1,
                    'time': timestamp,
                    'image_path': img_path,
                    'descricao_detalhada': desc,
                    'prompt_ia': prmpt,
                    'negative_prompt': neg
                })
                count += 1
            prev = gray
            success, frame = cap.read()

        cap.release()
        return True

    def save_results(self, output_file=None):
        if not output_file:
            output_file = f"video_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        results = {
            'video_info': self.video_info,
            'analysis_type': 'keyframe_extraction',
            'scenes': self.scenes,
            'generated_at': datetime.now().isoformat()
        }
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(results, f, indent=2, ensure_ascii=False)
        return output_file

    def cleanup(self):
        if self.clip:
            self.clip.close()