Blip / app.py
WaysAheadGlobal's picture
Update app.py
f9d091a verified
raw
history blame
2.28 kB
import gradio as gr
import cv2
import tempfile
from PIL import Image
from transformers import Blip2Processor, Blip2ForConditionalGeneration
import torch
import os
# Load BLIP-2 model (FLAN-T5 - CPU friendly)
processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xl")
def describe_image(image):
image = image.convert("RGB")
inputs = processor(images=image, return_tensors="pt")
generated_ids = model.generate(**inputs, max_new_tokens=50)
caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
return caption
def extract_video_frames(video_path, interval=30):
cap = cv2.VideoCapture(video_path)
frames = []
count = 0
success = True
while success:
success, frame = cap.read()
if not success:
break
if count % interval == 0:
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frames.append((count, Image.fromarray(frame_rgb)))
count += 1
cap.release()
return frames
def handle_upload(file):
name = file.name.lower()
if name.endswith((".jpg", ".jpeg", ".png")):
image = Image.open(file)
caption = describe_image(image)
return f"πŸ–ΌοΈ Image Caption:\n{caption}"
elif name.endswith((".mp4", ".mov", ".avi", ".mkv")):
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
tmp.write(file.read())
tmp_path = tmp.name
frames = extract_video_frames(tmp_path, interval=30) # 1 fps
captions = []
for idx, frame in frames:
caption = describe_image(frame)
captions.append(f"πŸ•’ Frame {idx}: {caption}")
os.remove(tmp_path)
return "\n".join(captions)
else:
return "❌ Unsupported file type. Please upload an image or video."
# Gradio UI
gr.Interface(
fn=handle_upload,
inputs=gr.File(label="Upload Image or Video"),
outputs=gr.Textbox(label="Scene Descriptions"),
title="🧠 Scene Understanding AI – BLIP-2 (Image + Video)",
description="Upload a photo or video. The AI will describe the scene(s) using BLIP-2 (FLAN-T5). Works on CPU."
).launch()