Spaces:

WaysAheadGlobal
/

Blip

Build error

WaysAheadGlobal commited on Jun 20

Commit

a3895ed

verified ·

1 Parent(s): 419702c

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+import cv2
+from PIL import Image
+from transformers import Blip2Processor, Blip2ForConditionalGeneration
+import torch
+import numpy as np
+import tempfile
+# Load lightweight BLIP-2 model
+processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
+model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xl")
+# Captioning function (every ~5 seconds)
+def describe_live_frame():
+    cap = cv2.VideoCapture(0)  # Use 0 for default webcam
+    if not cap.isOpened():
+        return "Cannot access camera."
+    ret, frame = cap.read()
+    cap.release()
+    if not ret:
+        return "Failed to capture frame."
+    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+    image = Image.fromarray(frame_rgb)
+    inputs = processor(images=image, return_tensors="pt")
+    generated_ids = model.generate(**inputs, max_new_tokens=50)
+    caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+    return image, caption
+# UI
+with gr.Blocks() as demo:
+    gr.Markdown("## 🧠 Live Scene Understanding – BLIP-2 (Simulated Real-Time)")
+    btn = gr.Button("Capture & Describe Scene")
+    img_output = gr.Image(label="Captured Frame")
+    caption_output = gr.Textbox(label="Scene Description")
+    btn.click(fn=describe_live_frame, inputs=[], outputs=[img_output, caption_output])
+demo.launch()