Spaces:

cngsm
/

video_analyzer

Runtime error

App Files Files Community

video_analyzer / video_analyzer_keyframes.py

cngsm

Update video_analyzer_keyframes.py

c0c39ac verified 6 months ago

raw

history blame contribute delete

4.67 kB


	import moviepy.editor as mp
	import cv2
	import numpy as np
	from PIL import Image
	import os
	import json
	from datetime import datetime
	from transformers import BlipProcessor, BlipForConditionalGeneration
	import torch

	# BLIP setup
	blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

	class VideoSceneAnalyzer:
	def __init__(self, video_path, scene_duration=8):
	self.video_path = video_path
	self.scene_duration = scene_duration
	self.clip = None
	self.video_info = {}
	self.scenes = []

	def load_video(self):
	try:
	self.clip = mp.VideoFileClip(self.video_path)
	self.video_info = {
	'duration': self.clip.duration,
	'fps': self.clip.fps,
	'size': self.clip.size,
	'aspect_ratio': self.clip.size[0] / self.clip.size[1],
	'total_frames': int(self.clip.duration * self.clip.fps)
	}
	return True
	except Exception as e:
	print(f"Erro ao carregar vídeo: {e}")
	return False

	def describe_image_and_generate_prompt(self, frame, idx):
	tmp_path = f"temp_{idx:02d}.jpg"
	Image.fromarray(np.uint8(frame)).save(tmp_path)
	img = Image.open(tmp_path).convert("RGB")

	inputs = blip_processor(images=img, return_tensors="pt")
	out = blip_model.generate(**inputs)
	cap = blip_processor.decode(out[0], skip_special_tokens=True).strip().capitalize()

	descricao = (
	f"Cena {idx}: {cap}. "
	"Registrada ao entardecer com luz dourada suave, profundidade de campo rasa desfocando o fundo. "
	"O ambiente apresenta uma paisagem urbana vibrante com vegetação e construções ao longe, "
	"enquanto o sujeito exibe expressão serena e postura confiante. "
	"Atmosfera de leve nostalgia e contemplação."
	)

	prompt = (
	f"A cinematic, warm golden-hour shot of {cap}. "
	"Captured with a 35mm lens at f/1.8 for shallow depth of field, "
	"soft backlighting, and gentle handheld motion. "
	"Background features a vibrant urban setting with trees and distant buildings, "
	"evoking a sense of nostalgia and calm. "
	"--ar 16:9 --v 6 --style photorealistic --quality 2"
	)

	negative = (
	"--no (CGI artifacts, plastic textures, overexposed skies, cartoonish colors, static poses, low detail)"
	)

	return descricao, prompt, negative

	def extract_keyframes(self, threshold=30.0):
	output_dir = f"keyframes_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
	os.makedirs(output_dir, exist_ok=True)

	cap = cv2.VideoCapture(self.video_path)
	prev = None
	count = 0
	success, frame = cap.read()
	self.scenes = []

	while success:
	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
	diff = np.inf if prev is None else np.mean(cv2.absdiff(gray, prev))
	if prev is None or diff > threshold:
	timestamp = cap.get(cv2.CAP_PROP_POS_MSEC) / 1000.0
	img_path = os.path.join(output_dir, f"keyframe_{count+1:02d}.jpg")
	Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).save(img_path)
	desc, prmpt, neg = self.describe_image_and_generate_prompt(
	cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), count+1
	)
	self.scenes.append({
	'scene_number': count+1,
	'time': timestamp,
	'image_path': img_path,
	'descricao_detalhada': desc,
	'prompt_ia': prmpt,
	'negative_prompt': neg
	})
	count += 1
	prev = gray
	success, frame = cap.read()

	cap.release()
	return True

	def save_results(self, output_file=None):
	if not output_file:
	output_file = f"video_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
	results = {
	'video_info': self.video_info,
	'analysis_type': 'keyframe_extraction',
	'scenes': self.scenes,
	'generated_at': datetime.now().isoformat()
	}
	with open(output_file, 'w', encoding='utf-8') as f:
	json.dump(results, f, indent=2, ensure_ascii=False)
	return output_file

	def cleanup(self):
	if self.clip:
	self.clip.close()