Spaces:

Kazel
/

collarvision

Sleeping

App Files Files Community

collarvision / app.py

Kazel

Update app.py

5e6f79d verified 10 months ago

raw

history blame contribute delete

1.85 kB

	import gradio as gr
	import cv2
	import threading
	import torch
	from transformers import BlipProcessor, BlipForConditionalGeneration
	from PIL import Image
	import spaces

	# Initialize the webcam
	cap = cv2.VideoCapture(0)

	# Load the Hugging Face model and processor
	processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
	model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-vqa-base").to("cuda" if torch.cuda.is_available() else "cpu")
	@spaces.GPU
	def query_the_image(query: str, image_data: bytes):
	try:
	image = Image.open(io.BytesIO(image_data)).convert("RGB")
	inputs = processor(image, query, return_tensors="pt").to(model.device)
	output = model.generate(**inputs)
	answer = processor.decode(output[0], skip_special_tokens=True)
	return answer
	except Exception as e:
	return f"Error: {e}"
	@spaces.GPU
	def get_frame():
	ret, frame = cap.read()
	if not ret:
	return None
	_, buffer = cv2.imencode('.jpg', frame)
	return buffer.tobytes()
	@spaces.GPU
	def process_image(prompt):
	frame_data = get_frame()
	if frame_data:
	return query_the_image(prompt, frame_data)
	return "Error capturing image"
	@spaces.GPU
	def video_feed():
	while True:
	ret, frame = cap.read()
	if ret:
	yield cv2.imencode('.jpg', frame)[1].tobytes()
	else:
	break

	gui = gr.Blocks()
	with gui:
	gr.Markdown("# Live Video AI Assistant")
	with gr.Row():
	video_component = gr.Video()
	threading.Thread(target=video_feed, daemon=True).start()
	prompt = gr.Textbox(label="Enter your safety policy for the AI to analyse each frame in real time")
	response = gr.Textbox(label="AI Response")
	btn = gr.Button("Ask")
	btn.click(process_image, inputs=prompt, outputs=response)

	gui.launch()