VLM / app.py
WaysAheadGlobal's picture
Update app.py
106eff3 verified
raw
history blame
1.39 kB
import streamlit as st
import cv2
from transformers import pipeline
from PIL import Image
import numpy as np
import time
st.set_page_config(page_title="πŸŽ₯ TinyLLaVA CCTV Alternative", layout="wide")
st.title("🧠 TinyLLaVA β€” Webcam Frame-by-Frame (No WebRTC)")
# Load TinyLLaVA pipeline
pipe = pipeline(
task="image-to-text",
model="bczhou/tiny-llava-v1-hf",
trust_remote_code=True,
device_map="cpu"
)
# OpenCV webcam
cap = cv2.VideoCapture(0)
FRAME_INTERVAL = 30 # process every 30 frames
frame_placeholder = st.empty()
caption_placeholder = st.empty()
frame_count = 0
last_caption = ""
while cap.isOpened():
ret, frame = cap.read()
if not ret:
st.warning("No webcam feed")
break
frame = cv2.flip(frame, 1) # selfie view
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_placeholder.image(rgb, channels="RGB", use_column_width=True)
# every FRAME_INTERVAL frames β†’ run TinyLLaVA
if frame_count % FRAME_INTERVAL == 0:
pil_image = Image.fromarray(rgb)
prompt = "Describe this scene in detail."
query = f"USER: <image>\n{prompt}\nASSISTANT:"
result = pipe(query, pil_image)
last_caption = result[0]["generated_text"]
caption_placeholder.markdown(f"**Latest:** {last_caption}")
frame_count += 1
# Slow down loop to save CPU (adjust if needed)
time.sleep(0.1)