Spaces:

cngsm
/

video_analyzer

Runtime error

File size: 4,666 Bytes


import moviepy.editor as mp
import cv2
import numpy as np
from PIL import Image
import os
import json
from datetime import datetime
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch

# BLIP setup
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

class VideoSceneAnalyzer:
    def __init__(self, video_path, scene_duration=8):
        self.video_path = video_path
        self.scene_duration = scene_duration
        self.clip = None
        self.video_info = {}
        self.scenes = []

    def load_video(self):
        try:
            self.clip = mp.VideoFileClip(self.video_path)
            self.video_info = {
                'duration': self.clip.duration,
                'fps': self.clip.fps,
                'size': self.clip.size,
                'aspect_ratio': self.clip.size[0] / self.clip.size[1],
                'total_frames': int(self.clip.duration * self.clip.fps)
            }
            return True
        except Exception as e:
            print(f"Erro ao carregar vídeo: {e}")
            return False

    def describe_image_and_generate_prompt(self, frame, idx):
        tmp_path = f"temp_{idx:02d}.jpg"
        Image.fromarray(np.uint8(frame)).save(tmp_path)
        img = Image.open(tmp_path).convert("RGB")

        inputs = blip_processor(images=img, return_tensors="pt")
        out = blip_model.generate(**inputs)
        cap = blip_processor.decode(out[0], skip_special_tokens=True).strip().capitalize()

        descricao = (
            f"Cena {idx}: {cap}. "
            "Registrada ao entardecer com luz dourada suave, profundidade de campo rasa desfocando o fundo. "
            "O ambiente apresenta uma paisagem urbana vibrante com vegetação e construções ao longe, "
            "enquanto o sujeito exibe expressão serena e postura confiante. "
            "Atmosfera de leve nostalgia e contemplação."
        )

        prompt = (
            f"A cinematic, warm golden-hour shot of {cap}. "
            "Captured with a 35mm lens at f/1.8 for shallow depth of field, "
            "soft backlighting, and gentle handheld motion. "
            "Background features a vibrant urban setting with trees and distant buildings, "
            "evoking a sense of nostalgia and calm. "
            "--ar 16:9 --v 6 --style photorealistic --quality 2"
        )

        negative = (
            "--no (CGI artifacts, plastic textures, overexposed skies, cartoonish colors, static poses, low detail)"
        )

        return descricao, prompt, negative

    def extract_keyframes(self, threshold=30.0):
        output_dir = f"keyframes_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
        os.makedirs(output_dir, exist_ok=True)

        cap = cv2.VideoCapture(self.video_path)
        prev = None
        count = 0
        success, frame = cap.read()
        self.scenes = []

        while success:
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            diff = np.inf if prev is None else np.mean(cv2.absdiff(gray, prev))
            if prev is None or diff > threshold:
                timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0
                img_path = os.path.join(output_dir, f"keyframe_{count+1:02d}.jpg")
                Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).save(img_path)
                desc, prmpt, neg = self.describe_image_and_generate_prompt(
                    cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), count+1
                )
                self.scenes.append({
                    'scene_number': count+1,
                    'time': timestamp,
                    'image_path': img_path,
                    'descricao_detalhada': desc,
                    'prompt_ia': prmpt,
                    'negative_prompt': neg
                })
                count += 1
            prev = gray
            success, frame = cap.read()

        cap.release()
        return True

    def save_results(self, output_file=None):
        if not output_file:
            output_file = f"video_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        results = {
            'video_info': self.video_info,
            'analysis_type': 'keyframe_extraction',
            'scenes': self.scenes,
            'generated_at': datetime.now().isoformat()
        }
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(results, f, indent=2, ensure_ascii=False)
        return output_file

    def cleanup(self):
        if self.clip:
            self.clip.close()