stevafernandes commited on
Commit
0564daa
Β·
verified Β·
1 Parent(s): 3533176

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -146
app.py CHANGED
@@ -3,12 +3,7 @@ import google.generativeai as genai
3
  import os
4
  import tempfile
5
  import time
6
- import cv2
7
- import numpy as np
8
- from streamlit_webrtc import webrtc_streamer, VideoProcessorBase, WebRtcMode
9
- import av
10
- import threading
11
- from pathlib import Path
12
 
13
  # --- Get API key from Hugging Face secret or environment ---
14
  def get_api_key():
@@ -29,44 +24,24 @@ def get_api_key():
29
 
30
  return GOOGLE_API_KEY
31
 
32
- # Video Recorder class to handle WebRTC recording
33
- class VideoRecorder(VideoProcessorBase):
34
- def __init__(self):
35
- self.frames = []
36
- self.recording = False
37
- self.lock = threading.Lock()
38
-
39
- def recv(self, frame):
40
- img = frame.to_ndarray(format="bgr24")
41
-
42
- with self.lock:
43
- if self.recording:
44
- self.frames.append(img.copy())
45
-
46
- return av.VideoFrame.from_ndarray(img, format="bgr24")
47
-
48
- def start_recording(self):
49
- with self.lock:
50
- self.recording = True
51
- self.frames = []
52
-
53
- def stop_recording(self):
54
- with self.lock:
55
- self.recording = False
56
- return self.frames.copy()
57
-
58
- def is_recording(self):
59
- with self.lock:
60
- return self.recording
61
-
62
  # VideoProcessor class for Gemini API
63
  class VideoProcessor:
64
  def __init__(self, api_key):
65
  genai.configure(api_key=api_key)
66
  self.model = genai.GenerativeModel("gemini-2.0-flash-exp")
67
 
68
- def upload_video(self, video_path, display_name="recorded_video"):
69
- return genai.upload_file(path=video_path, display_name=display_name)
 
 
 
 
 
 
 
 
 
 
70
 
71
  def wait_for_processing(self, video_file):
72
  while video_file.state.name == "PROCESSING":
@@ -90,21 +65,10 @@ class VideoProcessor:
90
 
91
  response = self.model.generate_content([video_file, prompt])
92
  return response.text
93
-
94
- def save_frames_as_video(frames, output_path, fps=30):
95
- """Save recorded frames as a video file"""
96
- if not frames:
97
- return False
98
-
99
- height, width, _ = frames[0].shape
100
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
101
- out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
102
 
103
- for frame in frames:
104
- out.write(frame)
105
-
106
- out.release()
107
- return True
108
 
109
  # Initialize session state
110
  if "video_processor" not in st.session_state:
@@ -113,21 +77,21 @@ if "video_file" not in st.session_state:
113
  st.session_state.video_file = None
114
  if "summary" not in st.session_state:
115
  st.session_state.summary = None
116
- if "recording_complete" not in st.session_state:
117
- st.session_state.recording_complete = False
118
- if "recorded_frames" not in st.session_state:
119
- st.session_state.recorded_frames = None
120
 
121
  # Main app function
122
  def main():
123
  st.set_page_config(
124
- page_title="Video Recording & Analysis",
125
  page_icon="πŸŽ₯",
126
  layout="wide"
127
  )
128
 
129
- st.title("πŸŽ₯ Video Recording & Automatic Analysis")
130
- st.markdown("Record a video directly from your camera and get an AI-generated summary")
131
  st.markdown("---")
132
 
133
  # Get API key
@@ -135,137 +99,214 @@ def main():
135
 
136
  if not api_key:
137
  st.error("⚠️ Please set your GOOGLE_API_KEY in Hugging Face Spaces secrets or environment variables")
 
 
 
 
 
 
 
138
  st.stop()
139
 
140
  # Initialize video processor
141
  if st.session_state.video_processor is None:
142
  st.session_state.video_processor = VideoProcessor(api_key)
143
 
144
- # Create two columns for layout
145
- col1, col2 = st.columns([1, 1])
146
 
147
- with col1:
148
- st.subheader("πŸ“Ή Video Recording")
149
 
150
- # WebRTC streamer for video recording
151
- ctx = webrtc_streamer(
152
- key="video-recorder",
153
- mode=WebRtcMode.SENDRECV,
154
- video_processor_factory=VideoRecorder,
155
- media_stream_constraints={"video": True, "audio": False},
156
- async_processing=True,
157
  )
158
 
159
- # Recording controls
160
- col_start, col_stop = st.columns(2)
161
-
162
- with col_start:
163
- if st.button("πŸ”΄ Start Recording", use_container_width=True):
164
- if ctx.video_processor:
165
- ctx.video_processor.start_recording()
166
- st.session_state.recording_complete = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  st.session_state.summary = None
168
- st.success("Recording started...")
 
 
 
 
 
 
 
 
 
169
 
170
- with col_stop:
171
- if st.button("⏹️ Stop Recording", use_container_width=True):
172
- if ctx.video_processor and ctx.video_processor.is_recording():
173
- frames = ctx.video_processor.stop_recording()
174
-
175
- if frames:
176
- st.session_state.recorded_frames = frames
177
- st.session_state.recording_complete = True
178
- st.success(f"Recording stopped! Captured {len(frames)} frames")
179
- else:
180
- st.warning("No frames were recorded")
181
 
182
- # Display recording status
183
- if ctx.video_processor and ctx.video_processor.is_recording():
184
- st.info("πŸ”΄ Recording in progress...")
185
 
186
- # Process the recorded video
187
- if st.session_state.recording_complete and st.session_state.recorded_frames:
188
- with st.spinner("Processing video..."):
189
- # Save frames as video
190
- with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp:
191
- tmp_path = tmp.name
192
-
193
- if save_frames_as_video(st.session_state.recorded_frames, tmp_path):
194
  try:
195
- # Upload to Gemini
196
- video_file = st.session_state.video_processor.upload_video(
197
- tmp_path,
198
- "recorded_video"
 
199
  )
200
 
201
  # Wait for processing
202
  processed_file = st.session_state.video_processor.wait_for_processing(
203
- video_file
204
  )
 
205
 
206
  # Generate summary
207
  st.session_state.summary = st.session_state.video_processor.generate_summary(
208
  processed_file
209
  )
210
 
211
- st.success("βœ… Video analyzed successfully!")
 
 
 
212
 
213
  except Exception as e:
214
- st.error(f"Error processing video: {str(e)}")
215
- finally:
216
- # Clean up temp file
217
- if os.path.exists(tmp_path):
218
- os.unlink(tmp_path)
219
- st.session_state.recording_complete = False
220
- else:
221
- st.error("Failed to save video")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
- with col2:
 
 
224
  st.subheader("πŸ“ Video Summary")
225
 
226
- if st.session_state.summary:
 
 
 
227
  st.markdown(st.session_state.summary)
228
-
229
- # Option to download summary
230
  st.download_button(
231
  label="πŸ“₯ Download Summary",
232
  data=st.session_state.summary,
233
- file_name="video_summary.txt",
234
- mime="text/plain"
 
235
  )
236
- else:
237
- st.info("Record a video and it will be automatically analyzed. The summary will appear here.")
238
 
239
- # Sidebar with instructions
240
  with st.sidebar:
241
  st.markdown("### πŸ“– How to Use")
242
  st.markdown("""
243
- 1. **Allow camera access** when prompted
244
- 2. Click **Start Recording** to begin
245
- 3. Perform your action or speak
246
- 4. Click **Stop Recording** to end
247
- 5. Wait for automatic analysis
248
- 6. View your video summary
249
  """)
250
 
251
- st.markdown("### βš™οΈ Settings")
252
- if st.button("πŸ”„ Reset Application"):
253
- st.session_state.clear()
254
- st.rerun()
255
-
256
- st.markdown("### πŸ“Œ Notes")
257
  st.markdown("""
258
- - Video is processed using Gemini 2.0
259
- - Recording is temporary and not stored
260
- - API key should be set in HF Spaces secrets
 
261
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
  if __name__ == "__main__":
264
- # Install required packages if not present
265
- try:
266
- import streamlit_webrtc
267
- except ImportError:
268
- st.error("Please install streamlit-webrtc: `pip install streamlit-webrtc`")
269
- st.stop()
270
-
271
  main()
 
3
  import os
4
  import tempfile
5
  import time
6
+ from datetime import datetime
 
 
 
 
 
7
 
8
  # --- Get API key from Hugging Face secret or environment ---
9
  def get_api_key():
 
24
 
25
  return GOOGLE_API_KEY
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  # VideoProcessor class for Gemini API
28
  class VideoProcessor:
29
  def __init__(self, api_key):
30
  genai.configure(api_key=api_key)
31
  self.model = genai.GenerativeModel("gemini-2.0-flash-exp")
32
 
33
+ def upload_video(self, video_bytes, display_name="uploaded_video"):
34
+ # Save bytes to temporary file
35
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp:
36
+ tmp.write(video_bytes)
37
+ tmp_path = tmp.name
38
+
39
+ try:
40
+ video_file = genai.upload_file(path=tmp_path, display_name=display_name)
41
+ return video_file, tmp_path
42
+ except Exception as e:
43
+ os.unlink(tmp_path)
44
+ raise e
45
 
46
  def wait_for_processing(self, video_file):
47
  while video_file.state.name == "PROCESSING":
 
65
 
66
  response = self.model.generate_content([video_file, prompt])
67
  return response.text
 
 
 
 
 
 
 
 
 
68
 
69
+ def chat_with_video(self, video_file, prompt):
70
+ response = self.model.generate_content([video_file, prompt])
71
+ return response.text
 
 
72
 
73
  # Initialize session state
74
  if "video_processor" not in st.session_state:
 
77
  st.session_state.video_file = None
78
  if "summary" not in st.session_state:
79
  st.session_state.summary = None
80
+ if "messages" not in st.session_state:
81
+ st.session_state.messages = []
82
+ if "video_name" not in st.session_state:
83
+ st.session_state.video_name = None
84
 
85
  # Main app function
86
  def main():
87
  st.set_page_config(
88
+ page_title="Video Analysis with Gemini",
89
  page_icon="πŸŽ₯",
90
  layout="wide"
91
  )
92
 
93
+ st.title("πŸŽ₯ Video Upload & AI Analysis")
94
+ st.markdown("Upload a video or record one to get AI-powered insights")
95
  st.markdown("---")
96
 
97
  # Get API key
 
99
 
100
  if not api_key:
101
  st.error("⚠️ Please set your GOOGLE_API_KEY in Hugging Face Spaces secrets or environment variables")
102
+ st.info("""
103
+ To set up the API key in Hugging Face Spaces:
104
+ 1. Go to your Space Settings
105
+ 2. Navigate to 'Repository secrets'
106
+ 3. Add a new secret named 'GOOGLE_API_KEY'
107
+ 4. Paste your Gemini API key as the value
108
+ """)
109
  st.stop()
110
 
111
  # Initialize video processor
112
  if st.session_state.video_processor is None:
113
  st.session_state.video_processor = VideoProcessor(api_key)
114
 
115
+ # Create tabs for different input methods
116
+ tab1, tab2, tab3 = st.tabs(["πŸ“€ Upload Video", "🎬 Record Video (Mobile)", "πŸ’¬ Chat with Video"])
117
 
118
+ with tab1:
119
+ st.subheader("Upload a video file")
120
 
121
+ uploaded_file = st.file_uploader(
122
+ "Choose a video file",
123
+ type=['mp4', 'mov', 'avi', 'mkv', 'webm', 'm4v'],
124
+ help="Maximum recommended size: 100MB"
 
 
 
125
  )
126
 
127
+ if uploaded_file is not None:
128
+ # Display video
129
+ st.video(uploaded_file)
130
+
131
+ # Check if this is a new video
132
+ if st.session_state.video_name != uploaded_file.name:
133
+ st.session_state.video_name = uploaded_file.name
134
+ st.session_state.video_file = None
135
+ st.session_state.summary = None
136
+ st.session_state.messages = []
137
+
138
+ col1, col2 = st.columns(2)
139
+
140
+ with col1:
141
+ if st.button("πŸ” Analyze Video", type="primary", use_container_width=True):
142
+ with st.spinner("Processing video... This may take a minute."):
143
+ try:
144
+ # Upload and process video
145
+ video_bytes = uploaded_file.read()
146
+ video_file, tmp_path = st.session_state.video_processor.upload_video(
147
+ video_bytes,
148
+ uploaded_file.name
149
+ )
150
+
151
+ # Wait for processing
152
+ processed_file = st.session_state.video_processor.wait_for_processing(
153
+ video_file
154
+ )
155
+ st.session_state.video_file = processed_file
156
+
157
+ # Generate summary
158
+ st.session_state.summary = st.session_state.video_processor.generate_summary(
159
+ processed_file
160
+ )
161
+
162
+ # Clean up
163
+ os.unlink(tmp_path)
164
+
165
+ st.success("βœ… Analysis complete!")
166
+
167
+ except Exception as e:
168
+ st.error(f"Error processing video: {str(e)}")
169
+
170
+ with col2:
171
+ if st.button("πŸ”„ Reset", use_container_width=True):
172
+ st.session_state.video_file = None
173
  st.session_state.summary = None
174
+ st.session_state.messages = []
175
+ st.session_state.video_name = None
176
+ st.rerun()
177
+
178
+ with tab2:
179
+ st.subheader("Record a video (works best on mobile)")
180
+
181
+ st.info("""
182
+ πŸ“± **For Mobile Users:**
183
+ Use the camera input below to record a video directly from your device.
184
 
185
+ πŸ’» **For Desktop Users:**
186
+ You may need to use the Upload tab instead, or record a video separately and upload it.
187
+ """)
 
 
 
 
 
 
 
 
188
 
189
+ # Use Streamlit's camera input for simple video recording
190
+ video_file = st.camera_input("Record a video")
 
191
 
192
+ if video_file is not None:
193
+ st.video(video_file)
194
+
195
+ if st.button("πŸ” Analyze Recorded Video", type="primary"):
196
+ with st.spinner("Processing your recording..."):
 
 
 
197
  try:
198
+ # Process the recorded video
199
+ video_bytes = video_file.read()
200
+ uploaded_video, tmp_path = st.session_state.video_processor.upload_video(
201
+ video_bytes,
202
+ f"recording_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
203
  )
204
 
205
  # Wait for processing
206
  processed_file = st.session_state.video_processor.wait_for_processing(
207
+ uploaded_video
208
  )
209
+ st.session_state.video_file = processed_file
210
 
211
  # Generate summary
212
  st.session_state.summary = st.session_state.video_processor.generate_summary(
213
  processed_file
214
  )
215
 
216
+ # Clean up
217
+ os.unlink(tmp_path)
218
+
219
+ st.success("βœ… Recording analyzed!")
220
 
221
  except Exception as e:
222
+ st.error(f"Error processing recording: {str(e)}")
223
+
224
+ with tab3:
225
+ st.subheader("Chat about your video")
226
+
227
+ if st.session_state.video_file:
228
+ # Display chat messages
229
+ for msg in st.session_state.messages:
230
+ with st.chat_message(msg["role"]):
231
+ st.markdown(msg["content"])
232
+
233
+ # Chat input
234
+ user_question = st.chat_input("Ask a question about the video...")
235
+
236
+ if user_question:
237
+ # Add user message
238
+ st.session_state.messages.append({"role": "user", "content": user_question})
239
+ with st.chat_message("user"):
240
+ st.markdown(user_question)
241
+
242
+ # Generate response
243
+ with st.chat_message("assistant"):
244
+ with st.spinner("Thinking..."):
245
+ try:
246
+ response = st.session_state.video_processor.chat_with_video(
247
+ st.session_state.video_file,
248
+ user_question
249
+ )
250
+ st.markdown(response)
251
+ st.session_state.messages.append({"role": "assistant", "content": response})
252
+ except Exception as e:
253
+ st.error(f"Error generating response: {str(e)}")
254
+ else:
255
+ st.info("Please upload or record a video first, then analyze it to start chatting.")
256
 
257
+ # Display summary if available
258
+ if st.session_state.summary:
259
+ st.markdown("---")
260
  st.subheader("πŸ“ Video Summary")
261
 
262
+ # Create columns for better layout
263
+ col1, col2 = st.columns([3, 1])
264
+
265
+ with col1:
266
  st.markdown(st.session_state.summary)
267
+
268
+ with col2:
269
  st.download_button(
270
  label="πŸ“₯ Download Summary",
271
  data=st.session_state.summary,
272
+ file_name=f"video_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
273
+ mime="text/plain",
274
+ use_container_width=True
275
  )
 
 
276
 
277
+ # Sidebar with instructions and info
278
  with st.sidebar:
279
  st.markdown("### πŸ“– How to Use")
280
  st.markdown("""
281
+ 1. **Set API Key**: Add your Gemini API key to HF Spaces secrets
282
+ 2. **Upload/Record**: Choose a video file or record one
283
+ 3. **Analyze**: Click the analyze button
284
+ 4. **Review**: Read the AI-generated summary
285
+ 5. **Chat**: Ask questions about the video content
 
286
  """)
287
 
288
+ st.markdown("### 🎯 Best Practices")
 
 
 
 
 
289
  st.markdown("""
290
+ - Keep videos under 100MB for faster processing
291
+ - Ensure good lighting for recordings
292
+ - Speak clearly if recording audio
293
+ - Videos with clear content work best
294
  """)
295
+
296
+ st.markdown("### βš™οΈ System Status")
297
+ if api_key:
298
+ st.success("βœ… API Key configured")
299
+ else:
300
+ st.error("❌ API Key missing")
301
+
302
+ if st.session_state.video_file:
303
+ st.success("βœ… Video loaded")
304
+ else:
305
+ st.info("⏳ No video loaded")
306
+
307
+ if st.button("πŸ”„ Reset Everything"):
308
+ st.session_state.clear()
309
+ st.rerun()
310
 
311
  if __name__ == "__main__":
 
 
 
 
 
 
 
312
  main()