Spaces:

WaysAheadGlobal
/

VLM

Sleeping

WaysAheadGlobal commited on Jun 28

Commit

fb9ccfe

verified ·

1 Parent(s): 106eff3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,14 +1,12 @@
 import streamlit as st
-import cv2
 from transformers import pipeline
 from PIL import Image
-import numpy as np
-import time
-st.set_page_config(page_title="🎥 TinyLLaVA CCTV Alternative", layout="wide")
-st.title("🧠 TinyLLaVA — Webcam Frame-by-Frame (No WebRTC)")
-# Load TinyLLaVA pipeline
 pipe = pipeline(
     task="image-to-text",
     model="bczhou/tiny-llava-v1-hf",
@@ -16,37 +14,25 @@ pipe = pipeline(
     device_map="cpu"
 )
-# OpenCV webcam
-cap = cv2.VideoCapture(0)
-FRAME_INTERVAL = 30  # process every 30 frames
-frame_placeholder = st.empty()
-caption_placeholder = st.empty()
-frame_count = 0
-last_caption = ""
-while cap.isOpened():
-    ret, frame = cap.read()
-    if not ret:
-        st.warning("No webcam feed")
-        break
-    frame = cv2.flip(frame, 1)  # selfie view
-    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-    frame_placeholder.image(rgb, channels="RGB", use_column_width=True)
-    # every FRAME_INTERVAL frames → run TinyLLaVA
-    if frame_count % FRAME_INTERVAL == 0:
-        pil_image = Image.fromarray(rgb)
-        prompt = "Describe this scene in detail."
-        query = f"USER: <image>\n{prompt}\nASSISTANT:"
-        result = pipe(query, pil_image)
-        last_caption = result[0]["generated_text"]
-    caption_placeholder.markdown(f"**Latest:** {last_caption}")
-    frame_count += 1
-    # Slow down loop to save CPU (adjust if needed)
-    time.sleep(0.1)

 import streamlit as st
 from transformers import pipeline
 from PIL import Image
+# Configure the Streamlit page
+st.set_page_config(page_title="TinyLLaVA Snapshot", layout="centered")
+st.title("📷 TinyLLaVA — Snapshot Webcam (Spaces Compatible)")
+# Load TinyLLaVA pipeline once
 pipe = pipeline(
     task="image-to-text",
     model="bczhou/tiny-llava-v1-hf",
     device_map="cpu"
 )
+# Camera input
+uploaded_image = st.camera_input("📸 Take a photo")
+# Prompt input
+prompt = st.text_input("💬 Your question about the image:", value="Describe this scene in detail.")
+# Process when both image and prompt are present
+if uploaded_image and prompt:
+    # Convert to PIL image
+    img = Image.open(uploaded_image).convert("RGB")
+    # Build prompt in TinyLLaVA format
+    query = f"USER: <image>\n{prompt}\nASSISTANT:"
+    with st.spinner("TinyLLaVA is thinking..."):
+        result = pipe(query, img)
+        answer = result[0]["generated_text"]
+    # Show uploaded image + answer
+    st.image(img, caption="Your Snapshot", use_column_width=True)
+    st.subheader("📝 Answer")
+    st.write(answer)