Spaces:
Build error
Build error
| # app.py | |
| import gradio as gr | |
| import torch | |
| from transformers import BlipProcessor, BlipForConditionalGeneration | |
| import cv2 | |
| from PIL import Image | |
| # Load BLIP captioning model | |
| processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") | |
| model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") | |
| device = torch.device("cpu") | |
| model.to(device) | |
| # Live webcam captioning generator | |
| def webcam_caption(): | |
| cap = cv2.VideoCapture(0) # open webcam | |
| while True: | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| # Convert OpenCV frame (BGR) to RGB PIL Image | |
| frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| image = Image.fromarray(frame_rgb) | |
| # Generate caption | |
| inputs = processor(images=image, return_tensors="pt").to(device) | |
| out = model.generate(**inputs, max_new_tokens=50) | |
| caption = processor.decode(out[0], skip_special_tokens=True) | |
| yield frame_rgb, caption | |
| cap.release() | |
| # Gradio interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## π₯ Live Webcam BLIP Captioning (CPU)") | |
| video = gr.Image(label="Webcam Stream") | |
| text = gr.Textbox(label="Caption") | |
| demo.load( | |
| fn=webcam_caption, | |
| inputs=None, | |
| outputs=[video, text], | |
| every=2 # call generator every 2 sec (adjust if you want) | |
| ) | |
| demo.launch() | |