Spaces:
Sleeping
Sleeping
File size: 1,393 Bytes
d78fda0 106eff3 621bb5d d78fda0 24e5396 d78fda0 106eff3 621bb5d d78fda0 106eff3 24e5396 106eff3 24e5396 106eff3 24e5396 106eff3 d78fda0 106eff3 d78fda0 106eff3 24e5396 106eff3 24e5396 106eff3 d78fda0 106eff3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import streamlit as st
import cv2
from transformers import pipeline
from PIL import Image
import numpy as np
import time
st.set_page_config(page_title="π₯ TinyLLaVA CCTV Alternative", layout="wide")
st.title("π§ TinyLLaVA β Webcam Frame-by-Frame (No WebRTC)")
# Load TinyLLaVA pipeline
pipe = pipeline(
task="image-to-text",
model="bczhou/tiny-llava-v1-hf",
trust_remote_code=True,
device_map="cpu"
)
# OpenCV webcam
cap = cv2.VideoCapture(0)
FRAME_INTERVAL = 30 # process every 30 frames
frame_placeholder = st.empty()
caption_placeholder = st.empty()
frame_count = 0
last_caption = ""
while cap.isOpened():
ret, frame = cap.read()
if not ret:
st.warning("No webcam feed")
break
frame = cv2.flip(frame, 1) # selfie view
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_placeholder.image(rgb, channels="RGB", use_column_width=True)
# every FRAME_INTERVAL frames β run TinyLLaVA
if frame_count % FRAME_INTERVAL == 0:
pil_image = Image.fromarray(rgb)
prompt = "Describe this scene in detail."
query = f"USER: <image>\n{prompt}\nASSISTANT:"
result = pipe(query, pil_image)
last_caption = result[0]["generated_text"]
caption_placeholder.markdown(f"**Latest:** {last_caption}")
frame_count += 1
# Slow down loop to save CPU (adjust if needed)
time.sleep(0.1)
|