WaysAheadGlobal commited on
Commit
fb9ccfe
Β·
verified Β·
1 Parent(s): 106eff3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -35
app.py CHANGED
@@ -1,14 +1,12 @@
1
  import streamlit as st
2
- import cv2
3
  from transformers import pipeline
4
  from PIL import Image
5
- import numpy as np
6
- import time
7
 
8
- st.set_page_config(page_title="πŸŽ₯ TinyLLaVA CCTV Alternative", layout="wide")
9
- st.title("🧠 TinyLLaVA β€” Webcam Frame-by-Frame (No WebRTC)")
 
10
 
11
- # Load TinyLLaVA pipeline
12
  pipe = pipeline(
13
  task="image-to-text",
14
  model="bczhou/tiny-llava-v1-hf",
@@ -16,37 +14,25 @@ pipe = pipeline(
16
  device_map="cpu"
17
  )
18
 
19
- # OpenCV webcam
20
- cap = cv2.VideoCapture(0)
21
- FRAME_INTERVAL = 30 # process every 30 frames
22
 
23
- frame_placeholder = st.empty()
24
- caption_placeholder = st.empty()
25
 
26
- frame_count = 0
27
- last_caption = ""
 
 
28
 
29
- while cap.isOpened():
30
- ret, frame = cap.read()
31
- if not ret:
32
- st.warning("No webcam feed")
33
- break
34
 
35
- frame = cv2.flip(frame, 1) # selfie view
36
- rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
37
- frame_placeholder.image(rgb, channels="RGB", use_column_width=True)
38
 
39
- # every FRAME_INTERVAL frames β†’ run TinyLLaVA
40
- if frame_count % FRAME_INTERVAL == 0:
41
- pil_image = Image.fromarray(rgb)
42
- prompt = "Describe this scene in detail."
43
- query = f"USER: <image>\n{prompt}\nASSISTANT:"
44
- result = pipe(query, pil_image)
45
- last_caption = result[0]["generated_text"]
46
-
47
- caption_placeholder.markdown(f"**Latest:** {last_caption}")
48
-
49
- frame_count += 1
50
-
51
- # Slow down loop to save CPU (adjust if needed)
52
- time.sleep(0.1)
 
1
  import streamlit as st
 
2
  from transformers import pipeline
3
  from PIL import Image
 
 
4
 
5
+ # Configure the Streamlit page
6
+ st.set_page_config(page_title="TinyLLaVA Snapshot", layout="centered")
7
+ st.title("πŸ“· TinyLLaVA β€” Snapshot Webcam (Spaces Compatible)")
8
 
9
+ # Load TinyLLaVA pipeline once
10
  pipe = pipeline(
11
  task="image-to-text",
12
  model="bczhou/tiny-llava-v1-hf",
 
14
  device_map="cpu"
15
  )
16
 
17
+ # Camera input
18
+ uploaded_image = st.camera_input("πŸ“Έ Take a photo")
 
19
 
20
+ # Prompt input
21
+ prompt = st.text_input("πŸ’¬ Your question about the image:", value="Describe this scene in detail.")
22
 
23
+ # Process when both image and prompt are present
24
+ if uploaded_image and prompt:
25
+ # Convert to PIL image
26
+ img = Image.open(uploaded_image).convert("RGB")
27
 
28
+ # Build prompt in TinyLLaVA format
29
+ query = f"USER: <image>\n{prompt}\nASSISTANT:"
 
 
 
30
 
31
+ with st.spinner("TinyLLaVA is thinking..."):
32
+ result = pipe(query, img)
33
+ answer = result[0]["generated_text"]
34
 
35
+ # Show uploaded image + answer
36
+ st.image(img, caption="Your Snapshot", use_column_width=True)
37
+ st.subheader("πŸ“ Answer")
38
+ st.write(answer)