Spaces:

WaysAheadGlobal
/

Blip

Build error

App Files Files Community

WaysAheadGlobal commited on Jun 20

Commit

f9d091a

verified ·

1 Parent(s): 0932151

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -26

app.py CHANGED Viewed

@@ -1,42 +1,67 @@
 import gradio as gr
 import cv2
 from PIL import Image
 from transformers import Blip2Processor, Blip2ForConditionalGeneration
 import torch
-# Load BLIP-2 FLAN-T5 model (CPU-compatible)
 processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
 model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xl")
-# Function to capture frame and generate caption
-def describe_live_frame():
-    cap = cv2.VideoCapture(0)  # 0 = default webcam
-    if not cap.isOpened():
-        return None, "❌ Cannot access camera. Try reconnecting or use a different device."
-    ret, frame = cap.read()
-    cap.release()
-    if not ret:
-        return None, "❌ Failed to capture frame."
-    # Convert OpenCV frame to PIL
-    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-    image = Image.fromarray(frame_rgb)
-    # Run BLIP-2 captioning
     inputs = processor(images=image, return_tensors="pt")
     generated_ids = model.generate(**inputs, max_new_tokens=50)
     caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
-    return image, caption
-# Gradio interface
-with gr.Blocks() as demo:
-    gr.Markdown("## 🧠 Live Scene Captioning (Simulated Real-Time)\nBLIP-2 FLAN-T5 – CPU Friendly")
-    btn = gr.Button("📸 Capture & Describe Scene")
-    img_output = gr.Image(label="Captured Frame")
-    text_output = gr.Textbox(label="Scene Description")
-    btn.click(fn=describe_live_frame, inputs=[], outputs=[img_output, text_output])
-demo.launch()

 import gradio as gr
 import cv2
+import tempfile
 from PIL import Image
 from transformers import Blip2Processor, Blip2ForConditionalGeneration
 import torch
+import os
+# Load BLIP-2 model (FLAN-T5 - CPU friendly)
 processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
 model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xl")
+def describe_image(image):
+    image = image.convert("RGB")
     inputs = processor(images=image, return_tensors="pt")
     generated_ids = model.generate(**inputs, max_new_tokens=50)
     caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+    return caption
+def extract_video_frames(video_path, interval=30):
+    cap = cv2.VideoCapture(video_path)
+    frames = []
+    count = 0
+    success = True
+    while success:
+        success, frame = cap.read()
+        if not success:
+            break
+        if count % interval == 0:
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            frames.append((count, Image.fromarray(frame_rgb)))
+        count += 1
+    cap.release()
+    return frames
+def handle_upload(file):
+    name = file.name.lower()
+    if name.endswith((".jpg", ".jpeg", ".png")):
+        image = Image.open(file)
+        caption = describe_image(image)
+        return f"🖼️ Image Caption:\n{caption}"
+    elif name.endswith((".mp4", ".mov", ".avi", ".mkv")):
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
+            tmp.write(file.read())
+            tmp_path = tmp.name
+        frames = extract_video_frames(tmp_path, interval=30)  # 1 fps
+        captions = []
+        for idx, frame in frames:
+            caption = describe_image(frame)
+            captions.append(f"🕒 Frame {idx}: {caption}")
+        os.remove(tmp_path)
+        return "\n".join(captions)
+    else:
+        return "❌ Unsupported file type. Please upload an image or video."
+# Gradio UI
+gr.Interface(
+    fn=handle_upload,
+    inputs=gr.File(label="Upload Image or Video"),
+    outputs=gr.Textbox(label="Scene Descriptions"),
+    title="🧠 Scene Understanding AI – BLIP-2 (Image + Video)",
+    description="Upload a photo or video. The AI will describe the scene(s) using BLIP-2 (FLAN-T5). Works on CPU."
+).launch()