WaysAheadGlobal commited on
Commit
106eff3
Β·
verified Β·
1 Parent(s): 1d2a390

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -45
app.py CHANGED
@@ -1,12 +1,14 @@
1
  import streamlit as st
2
- from streamlit_webrtc import VideoTransformerBase, webrtc_streamer, RTCConfiguration
3
  from transformers import pipeline
4
  from PIL import Image
5
- import cv2
6
  import numpy as np
7
  import time
8
 
9
- # Load TinyLLaVA pipeline once
 
 
 
10
  pipe = pipeline(
11
  task="image-to-text",
12
  model="bczhou/tiny-llava-v1-hf",
@@ -14,53 +16,37 @@ pipe = pipeline(
14
  device_map="cpu"
15
  )
16
 
17
- st.set_page_config(page_title="TinyLLaVA Webcam", layout="centered")
18
- st.title("πŸ¦™ TinyLLaVA β€” Webcam Captioning")
19
-
20
- # Shared state
21
- st_frame = st.empty()
22
- result_box = st.empty()
23
 
24
- class VideoProcessor(VideoTransformerBase):
25
- def __init__(self):
26
- self.last_run = 0
27
- self.interval = 5 # seconds
28
- self.last_caption = ""
29
 
30
- def transform(self, frame):
31
- img = frame.to_ndarray(format="bgr24")
32
 
33
- now = time.time()
34
- if now - self.last_run > self.interval:
35
- self.last_run = now
 
 
36
 
37
- # Convert BGR to RGB
38
- img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
39
- pil_image = Image.fromarray(img_rgb)
40
 
41
- # Run TinyLLaVA pipeline
42
- prompt = "Describe this scene in detail."
43
- query = f"USER: <image>\n{prompt}\nASSISTANT:"
44
- with st.spinner("TinyLLaVA is thinking..."):
45
- result = pipe(query, pil_image)
46
- self.last_caption = result[0]["generated_text"]
 
47
 
48
- # Return the same frame, unmodified
49
- return img
50
 
51
- # RTC config
52
- rtc_config = RTCConfiguration(
53
- {"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
54
- )
55
-
56
- webrtc_ctx = webrtc_streamer(
57
- key="example",
58
- video_processor_factory=VideoProcessor,
59
- rtc_configuration=rtc_config,
60
- media_stream_constraints={"video": True, "audio": False}
61
- )
62
 
63
- if webrtc_ctx.video_processor:
64
- st.info("Keep your webcam on. The app captures 1 frame every 5 seconds and generates a caption.")
65
- st.write("Latest Caption:")
66
- st.write(webrtc_ctx.video_processor.last_caption)
 
1
  import streamlit as st
2
+ import cv2
3
  from transformers import pipeline
4
  from PIL import Image
 
5
  import numpy as np
6
  import time
7
 
8
+ st.set_page_config(page_title="πŸŽ₯ TinyLLaVA CCTV Alternative", layout="wide")
9
+ st.title("🧠 TinyLLaVA β€” Webcam Frame-by-Frame (No WebRTC)")
10
+
11
+ # Load TinyLLaVA pipeline
12
  pipe = pipeline(
13
  task="image-to-text",
14
  model="bczhou/tiny-llava-v1-hf",
 
16
  device_map="cpu"
17
  )
18
 
19
+ # OpenCV webcam
20
+ cap = cv2.VideoCapture(0)
21
+ FRAME_INTERVAL = 30 # process every 30 frames
 
 
 
22
 
23
+ frame_placeholder = st.empty()
24
+ caption_placeholder = st.empty()
 
 
 
25
 
26
+ frame_count = 0
27
+ last_caption = ""
28
 
29
+ while cap.isOpened():
30
+ ret, frame = cap.read()
31
+ if not ret:
32
+ st.warning("No webcam feed")
33
+ break
34
 
35
+ frame = cv2.flip(frame, 1) # selfie view
36
+ rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
37
+ frame_placeholder.image(rgb, channels="RGB", use_column_width=True)
38
 
39
+ # every FRAME_INTERVAL frames β†’ run TinyLLaVA
40
+ if frame_count % FRAME_INTERVAL == 0:
41
+ pil_image = Image.fromarray(rgb)
42
+ prompt = "Describe this scene in detail."
43
+ query = f"USER: <image>\n{prompt}\nASSISTANT:"
44
+ result = pipe(query, pil_image)
45
+ last_caption = result[0]["generated_text"]
46
 
47
+ caption_placeholder.markdown(f"**Latest:** {last_caption}")
 
48
 
49
+ frame_count += 1
 
 
 
 
 
 
 
 
 
 
50
 
51
+ # Slow down loop to save CPU (adjust if needed)
52
+ time.sleep(0.1)