|
|
|
|
|
import gradio as gr |
|
|
from transformers import BlipProcessor, BlipForConditionalGeneration |
|
|
from PIL import Image |
|
|
import cv2 |
|
|
import os |
|
|
|
|
|
|
|
|
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") |
|
|
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") |
|
|
|
|
|
def process_video(video_path): |
|
|
|
|
|
os.makedirs("frames", exist_ok=True) |
|
|
|
|
|
|
|
|
vidcap = cv2.VideoCapture(video_path) |
|
|
fps = vidcap.get(cv2.CAP_PROP_FPS) |
|
|
total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) |
|
|
duration = total_frames / fps |
|
|
|
|
|
|
|
|
segments = [] |
|
|
current = 0 |
|
|
while current < duration: |
|
|
start = current |
|
|
end = min(current + 4, duration) |
|
|
segments.append((start, end)) |
|
|
current += 4 |
|
|
|
|
|
descriptions = [] |
|
|
for i, (start, end) in enumerate(segments): |
|
|
|
|
|
center_time = (start + end) / 2 |
|
|
center_frame = int(center_time * fps) |
|
|
vidcap.set(cv2.CAP_PROP_POS_FRAMES, center_frame) |
|
|
success, frame = vidcap.read() |
|
|
if success: |
|
|
img_path = f"frames/frame_{i}.jpg" |
|
|
cv2.imwrite(img_path, frame) |
|
|
|
|
|
pil_image = Image.open(img_path).convert("RGB") |
|
|
inputs = processor(images=pil_image, return_tensors="pt") |
|
|
out = model.generate(**inputs) |
|
|
caption = processor.decode(out[0], skip_special_tokens=True) |
|
|
descriptions.append(f"Segmento {i+1} ({start:.1f}-{end:.1f}s): {caption}") |
|
|
|
|
|
vidcap.release() |
|
|
|
|
|
|
|
|
prompts = [] |
|
|
for j in range(0, len(descriptions), 2): |
|
|
combined = " ".join(descriptions[j:j+2]) |
|
|
prompts.append(f"Prompt {j//2 +1}: {combined}") |
|
|
|
|
|
return "\n".join(descriptions) + "\n\n" + "\n".join(prompts) |
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=process_video, |
|
|
inputs=gr.Video(), |
|
|
outputs="text", |
|
|
title="Video Analyzer with BLIP (CPU Friendly)", |
|
|
description="Faz análise de frames centrais de segmentos de 4s e gera prompts combinados de 8s." |
|
|
) |
|
|
|
|
|
iface.launch() |
|
|
|