Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import cv2 | |
| import threading | |
| import torch | |
| from transformers import BlipProcessor, BlipForConditionalGeneration | |
| from PIL import Image | |
| import spaces | |
| # Initialize the webcam | |
| cap = cv2.VideoCapture(0) | |
| # Load the Hugging Face model and processor | |
| processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base") | |
| model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-vqa-base").to("cuda" if torch.cuda.is_available() else "cpu") | |
| def query_the_image(query: str, image_data: bytes): | |
| try: | |
| image = Image.open(io.BytesIO(image_data)).convert("RGB") | |
| inputs = processor(image, query, return_tensors="pt").to(model.device) | |
| output = model.generate(**inputs) | |
| answer = processor.decode(output[0], skip_special_tokens=True) | |
| return answer | |
| except Exception as e: | |
| return f"Error: {e}" | |
| def get_frame(): | |
| ret, frame = cap.read() | |
| if not ret: | |
| return None | |
| _, buffer = cv2.imencode('.jpg', frame) | |
| return buffer.tobytes() | |
| def process_image(prompt): | |
| frame_data = get_frame() | |
| if frame_data: | |
| return query_the_image(prompt, frame_data) | |
| return "Error capturing image" | |
| def video_feed(): | |
| while True: | |
| ret, frame = cap.read() | |
| if ret: | |
| yield cv2.imencode('.jpg', frame)[1].tobytes() | |
| else: | |
| break | |
| gui = gr.Blocks() | |
| with gui: | |
| gr.Markdown("# Live Video AI Assistant") | |
| with gr.Row(): | |
| video_component = gr.Video() | |
| threading.Thread(target=video_feed, daemon=True).start() | |
| prompt = gr.Textbox(label="Enter your safety policy for the AI to analyse each frame in real time") | |
| response = gr.Textbox(label="AI Response") | |
| btn = gr.Button("Ask") | |
| btn.click(process_image, inputs=prompt, outputs=response) | |
| gui.launch() | |