Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import cv2 | |
| from transformers import pipeline | |
| from PIL import Image | |
| import numpy as np | |
| import time | |
| st.set_page_config(page_title="π₯ TinyLLaVA CCTV Alternative", layout="wide") | |
| st.title("π§ TinyLLaVA β Webcam Frame-by-Frame (No WebRTC)") | |
| # Load TinyLLaVA pipeline | |
| pipe = pipeline( | |
| task="image-to-text", | |
| model="bczhou/tiny-llava-v1-hf", | |
| trust_remote_code=True, | |
| device_map="cpu" | |
| ) | |
| # OpenCV webcam | |
| cap = cv2.VideoCapture(0) | |
| FRAME_INTERVAL = 30 # process every 30 frames | |
| frame_placeholder = st.empty() | |
| caption_placeholder = st.empty() | |
| frame_count = 0 | |
| last_caption = "" | |
| while cap.isOpened(): | |
| ret, frame = cap.read() | |
| if not ret: | |
| st.warning("No webcam feed") | |
| break | |
| frame = cv2.flip(frame, 1) # selfie view | |
| rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| frame_placeholder.image(rgb, channels="RGB", use_column_width=True) | |
| # every FRAME_INTERVAL frames β run TinyLLaVA | |
| if frame_count % FRAME_INTERVAL == 0: | |
| pil_image = Image.fromarray(rgb) | |
| prompt = "Describe this scene in detail." | |
| query = f"USER: <image>\n{prompt}\nASSISTANT:" | |
| result = pipe(query, pil_image) | |
| last_caption = result[0]["generated_text"] | |
| caption_placeholder.markdown(f"**Latest:** {last_caption}") | |
| frame_count += 1 | |
| # Slow down loop to save CPU (adjust if needed) | |
| time.sleep(0.1) | |