Spaces:

WaysAheadGlobal
/

VLM

Sleeping

App Files Files Community

VLM / app.py

WaysAheadGlobal

Update app.py

24e5396 verified 6 months ago

raw

history blame

1.95 kB

	import streamlit as st
	from streamlit_webrtc import VideoTransformerBase, webrtc_streamer, RTCConfiguration
	from transformers import pipeline
	from PIL import Image
	import cv2
	import numpy as np
	import time

	# Load TinyLLaVA pipeline once
	pipe = pipeline(
	task="image-to-text",
	model="bczhou/tiny-llava-v1-hf",
	trust_remote_code=True,
	device_map="cpu"
	)

	st.set_page_config(page_title="TinyLLaVA Webcam", layout="centered")
	st.title("🦙 TinyLLaVA — Webcam Captioning")

	# Shared state
	st_frame = st.empty()
	result_box = st.empty()

	class VideoProcessor(VideoTransformerBase):
	def __init__(self):
	self.last_run = 0
	self.interval = 5 # seconds
	self.last_caption = ""

	def transform(self, frame):
	img = frame.to_ndarray(format="bgr24")

	now = time.time()
	if now - self.last_run > self.interval:
	self.last_run = now

	# Convert BGR to RGB
	img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	pil_image = Image.fromarray(img_rgb)

	# Run TinyLLaVA pipeline
	prompt = "Describe this scene in detail."
	query = f"USER: <image>\n{prompt}\nASSISTANT:"
	with st.spinner("TinyLLaVA is thinking..."):
	result = pipe(query, pil_image)
	self.last_caption = result[0]["generated_text"]

	# Return the same frame, unmodified
	return img

	# RTC config
	rtc_config = RTCConfiguration(
	{"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
	)

	webrtc_ctx = webrtc_streamer(
	key="example",
	video_processor_factory=VideoProcessor,
	rtc_configuration=rtc_config,
	media_stream_constraints={"video": True, "audio": False}
	)

	if webrtc_ctx.video_processor:
	st.info("Keep your webcam on. The app captures 1 frame every 5 seconds and generates a caption.")
	st.write("Latest Caption:")
	st.write(webrtc_ctx.video_processor.last_caption)