Spaces:

WaysAheadGlobal
/

Blip

Build error

App Files Files Community

WaysAheadGlobal commited on Jun 28

Commit

1c5a277

verified ·

1 Parent(s): a841bb3

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -56

app.py CHANGED Viewed

@@ -1,67 +1,50 @@
 import gradio as gr
 import cv2
-import tempfile
 from PIL import Image
-from transformers import Blip2Processor, Blip2ForConditionalGeneration
-import torch
-import os
-# Load BLIP-2 model (FLAN-T5 - CPU friendly)
-processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
-model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xl")
-def describe_image(image):
-    image = image.convert("RGB")
-    inputs = processor(images=image, return_tensors="pt")
-    generated_ids = model.generate(**inputs, max_new_tokens=50)
-    caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
-    return caption
-def extract_video_frames(video_path, interval=30):
-    cap = cv2.VideoCapture(video_path)
-    frames = []
-    count = 0
-    success = True
-    while success:
-        success, frame = cap.read()
-        if not success:
             break
-        if count % interval == 0:
-            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            frames.append((count, Image.fromarray(frame_rgb)))
-        count += 1
-    cap.release()
-    return frames
-def handle_upload(file):
-    name = file.name.lower()
-    if name.endswith((".jpg", ".jpeg", ".png")):
-        image = Image.open(file)
-        caption = describe_image(image)
-        return f"🖼️ Image Caption:\n{caption}"
-    elif name.endswith((".mp4", ".mov", ".avi", ".mkv")):
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
-            tmp.write(file.read())
-            tmp_path = tmp.name
-        frames = extract_video_frames(tmp_path, interval=30)  # 1 fps
-        captions = []
-        for idx, frame in frames:
-            caption = describe_image(frame)
-            captions.append(f"🕒 Frame {idx}: {caption}")
-        os.remove(tmp_path)
-        return "\n".join(captions)
-    else:
-        return "❌ Unsupported file type. Please upload an image or video."
-# Gradio UI
-gr.Interface(
-    fn=handle_upload,
-    inputs=gr.File(label="Upload Image or Video"),
-    outputs=gr.Textbox(label="Scene Descriptions"),
-    title="🧠 Scene Understanding AI – BLIP-2 (Image + Video)",
-    description="Upload a photo or video. The AI will describe the scene(s) using BLIP-2 (FLAN-T5). Works on CPU."
-).launch()

+# app.py
 import gradio as gr
+import torch
+from transformers import BlipProcessor, BlipForConditionalGeneration
 import cv2
 from PIL import Image
+# Load BLIP captioning model
+processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+device = torch.device("cpu")
+model.to(device)
+# Live webcam captioning generator
+def webcam_caption():
+    cap = cv2.VideoCapture(0)  # open webcam
+    while True:
+        ret, frame = cap.read()
+        if not ret:
             break
+        # Convert OpenCV frame (BGR) to RGB PIL Image
+        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        image = Image.fromarray(frame_rgb)
+        # Generate caption
+        inputs = processor(images=image, return_tensors="pt").to(device)
+        out = model.generate(**inputs, max_new_tokens=50)
+        caption = processor.decode(out[0], skip_special_tokens=True)
+        yield frame_rgb, caption
+    cap.release()
+# Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("## 🎥 Live Webcam BLIP Captioning (CPU)")
+    video = gr.Image(label="Webcam Stream")
+    text = gr.Textbox(label="Caption")
+    demo.load(
+        fn=webcam_caption,
+        inputs=None,
+        outputs=[video, text],
+        every=2  # call generator every 2 sec (adjust if you want)
+    )
+demo.launch()