WaysAheadGlobal commited on
Commit
a3895ed
·
verified ·
1 Parent(s): 419702c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ from PIL import Image
4
+ from transformers import Blip2Processor, Blip2ForConditionalGeneration
5
+ import torch
6
+ import numpy as np
7
+ import tempfile
8
+
9
+ # Load lightweight BLIP-2 model
10
+ processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
11
+ model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xl")
12
+
13
+ # Captioning function (every ~5 seconds)
14
+ def describe_live_frame():
15
+ cap = cv2.VideoCapture(0) # Use 0 for default webcam
16
+ if not cap.isOpened():
17
+ return "Cannot access camera."
18
+
19
+ ret, frame = cap.read()
20
+ cap.release()
21
+ if not ret:
22
+ return "Failed to capture frame."
23
+
24
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
25
+ image = Image.fromarray(frame_rgb)
26
+
27
+ inputs = processor(images=image, return_tensors="pt")
28
+ generated_ids = model.generate(**inputs, max_new_tokens=50)
29
+ caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
30
+ return image, caption
31
+
32
+ # UI
33
+ with gr.Blocks() as demo:
34
+ gr.Markdown("## 🧠 Live Scene Understanding – BLIP-2 (Simulated Real-Time)")
35
+ btn = gr.Button("Capture & Describe Scene")
36
+ img_output = gr.Image(label="Captured Frame")
37
+ caption_output = gr.Textbox(label="Scene Description")
38
+
39
+ btn.click(fn=describe_live_frame, inputs=[], outputs=[img_output, caption_output])
40
+
41
+ demo.launch()