stevafernandes commited on
Commit
3533176
·
verified ·
1 Parent(s): 6b90321

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +230 -75
app.py CHANGED
@@ -3,23 +3,69 @@ import google.generativeai as genai
3
  import os
4
  import tempfile
5
  import time
6
- import mimetypes
 
 
 
 
7
  from pathlib import Path
8
 
9
- # --- Get API key from environment variable or user input ---
10
  def get_api_key():
 
11
  GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
 
 
 
 
 
 
12
  if not GOOGLE_API_KEY:
13
- GOOGLE_API_KEY = st.text_input("Enter your Gemini API key", type="password")
14
- return GOOGLE_API_KEY or "AIzaSyDCMPwXHagWqYTQB3HL7FceHEmKUv3v4wc"
 
 
 
 
 
15
 
16
- # VideoProcessor class
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  class VideoProcessor:
18
  def __init__(self, api_key):
19
  genai.configure(api_key=api_key)
20
- self.model = genai.GenerativeModel("gemini-2.0-flash")
21
 
22
- def upload_video(self, video_path, display_name="uploaded_video"):
23
  return genai.upload_file(path=video_path, display_name=display_name)
24
 
25
  def wait_for_processing(self, video_file):
@@ -30,87 +76,196 @@ class VideoProcessor:
30
  raise RuntimeError("Video processing failed")
31
  return video_file
32
 
33
- def chat_with_video(self, video_file, prompt):
 
 
 
 
 
 
 
 
 
 
 
34
  response = self.model.generate_content([video_file, prompt])
35
  return response.text
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  # Initialize session state
38
- for key in ["video_processor", "video_file", "video_name", "messages"]:
39
- if key not in st.session_state:
40
- st.session_state[key] = None if key != "messages" else []
 
 
 
 
 
 
 
41
 
42
  # Main app function
43
  def main():
44
- st.set_page_config(page_title="Video Retrieval-Augmented Generation", page_icon="🎬", layout="wide")
45
- st.header("Video Retrieval-Augmented Generation - Gemini 2.0")
 
 
 
 
 
 
46
  st.markdown("---")
47
 
48
- # Step 1: API Key input
49
- st.subheader("Step 1: Enter your Gemini API key")
50
  api_key = get_api_key()
51
-
52
  if not api_key:
53
- st.error("Please enter your API key to proceed.")
54
  st.stop()
55
-
56
- # Step 2: Upload Video
57
- st.subheader("Step 2: Upload your video file")
58
- uploaded_file = st.file_uploader("Upload a video", type=['mp4', 'mov', 'avi', 'mkv', 'webm'])
59
-
60
- if uploaded_file:
61
- if mimetypes.guess_type(uploaded_file.name)[0].startswith("video"):
62
- file_size = len(uploaded_file.getvalue()) / (1024**2)
63
- st.info(f"Size: {file_size:.2f} MB")
64
-
65
- if st.session_state.video_name != uploaded_file.name:
66
- st.session_state.video_processor = VideoProcessor(api_key)
67
- with tempfile.NamedTemporaryFile(delete=False, suffix=Path(uploaded_file.name).suffix) as tmp:
68
- tmp.write(uploaded_file.getvalue())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  tmp_path = tmp.name
70
-
71
- with st.spinner("Uploading and processing..."):
72
- video_file = st.session_state.video_processor.upload_video(tmp_path, uploaded_file.name)
73
- processed_file = st.session_state.video_processor.wait_for_processing(video_file)
74
- st.session_state.video_file = processed_file
75
- st.session_state.video_name = uploaded_file.name
76
- st.session_state.messages.clear()
77
- st.success("✅ Video processed")
78
-
79
- os.unlink(tmp_path)
80
-
81
- st.video(uploaded_file.getvalue())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  else:
83
- st.error("Not a valid video file")
84
-
85
- if st.button("Reset Chat"):
86
- st.session_state.messages.clear()
87
-
88
- if st.button("Reset All"):
89
- st.session_state.clear()
90
-
91
- # Step 3: Chat about Video
92
- st.subheader("Step 3: Chat with your video")
93
-
94
- if st.session_state.video_file:
95
- for msg in st.session_state.messages:
96
- with st.chat_message(msg["role"]):
97
- st.markdown(msg["content"])
98
-
99
- user_question = st.chat_input("Ask a question about the video...")
100
- if user_question:
101
- st.session_state.messages.append({"role": "user", "content": user_question})
102
- with st.chat_message("user"):
103
- st.markdown(user_question)
104
-
105
- with st.chat_message("assistant"):
106
- placeholder = st.empty()
107
- with st.spinner("Generating response..."):
108
- response = st.session_state.video_processor.chat_with_video(st.session_state.video_file, user_question)
109
-
110
- placeholder.markdown(response)
111
- st.session_state.messages.append({"role": "assistant", "content": response})
112
- else:
113
- st.info("Please upload a video in step 2 to start chatting.")
114
 
115
  if __name__ == "__main__":
116
- main()
 
 
 
 
 
 
 
 
3
  import os
4
  import tempfile
5
  import time
6
+ import cv2
7
+ import numpy as np
8
+ from streamlit_webrtc import webrtc_streamer, VideoProcessorBase, WebRtcMode
9
+ import av
10
+ import threading
11
  from pathlib import Path
12
 
13
+ # --- Get API key from Hugging Face secret or environment ---
14
  def get_api_key():
15
+ # First try to get from Hugging Face Spaces secrets
16
  GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
17
+
18
+ # Fallback to Streamlit secrets if available
19
+ if not GOOGLE_API_KEY and hasattr(st, "secrets"):
20
+ GOOGLE_API_KEY = st.secrets.get("GOOGLE_API_KEY", "")
21
+
22
+ # Last resort: manual input (remove in production)
23
  if not GOOGLE_API_KEY:
24
+ GOOGLE_API_KEY = st.sidebar.text_input(
25
+ "Enter Gemini API key (for testing only)",
26
+ type="password",
27
+ help="In production, this should be set as a Hugging Face Space secret"
28
+ )
29
+
30
+ return GOOGLE_API_KEY
31
 
32
+ # Video Recorder class to handle WebRTC recording
33
+ class VideoRecorder(VideoProcessorBase):
34
+ def __init__(self):
35
+ self.frames = []
36
+ self.recording = False
37
+ self.lock = threading.Lock()
38
+
39
+ def recv(self, frame):
40
+ img = frame.to_ndarray(format="bgr24")
41
+
42
+ with self.lock:
43
+ if self.recording:
44
+ self.frames.append(img.copy())
45
+
46
+ return av.VideoFrame.from_ndarray(img, format="bgr24")
47
+
48
+ def start_recording(self):
49
+ with self.lock:
50
+ self.recording = True
51
+ self.frames = []
52
+
53
+ def stop_recording(self):
54
+ with self.lock:
55
+ self.recording = False
56
+ return self.frames.copy()
57
+
58
+ def is_recording(self):
59
+ with self.lock:
60
+ return self.recording
61
+
62
+ # VideoProcessor class for Gemini API
63
  class VideoProcessor:
64
  def __init__(self, api_key):
65
  genai.configure(api_key=api_key)
66
+ self.model = genai.GenerativeModel("gemini-2.0-flash-exp")
67
 
68
+ def upload_video(self, video_path, display_name="recorded_video"):
69
  return genai.upload_file(path=video_path, display_name=display_name)
70
 
71
  def wait_for_processing(self, video_file):
 
76
  raise RuntimeError("Video processing failed")
77
  return video_file
78
 
79
+ def generate_summary(self, video_file):
80
+ prompt = """Analyze this video and provide a comprehensive summary that includes:
81
+
82
+ 1. **Main Content**: What is happening in the video?
83
+ 2. **Key Points**: What are the most important moments or information?
84
+ 3. **Visual Elements**: Describe the scene, people, objects, or activities shown
85
+ 4. **Audio/Speech**: If there's speech, summarize what was said
86
+ 5. **Duration and Structure**: How is the video organized?
87
+ 6. **Purpose**: What appears to be the purpose or message of this video?
88
+
89
+ Please format the summary in a clear, structured way."""
90
+
91
  response = self.model.generate_content([video_file, prompt])
92
  return response.text
93
 
94
+ def save_frames_as_video(frames, output_path, fps=30):
95
+ """Save recorded frames as a video file"""
96
+ if not frames:
97
+ return False
98
+
99
+ height, width, _ = frames[0].shape
100
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
101
+ out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
102
+
103
+ for frame in frames:
104
+ out.write(frame)
105
+
106
+ out.release()
107
+ return True
108
+
109
  # Initialize session state
110
+ if "video_processor" not in st.session_state:
111
+ st.session_state.video_processor = None
112
+ if "video_file" not in st.session_state:
113
+ st.session_state.video_file = None
114
+ if "summary" not in st.session_state:
115
+ st.session_state.summary = None
116
+ if "recording_complete" not in st.session_state:
117
+ st.session_state.recording_complete = False
118
+ if "recorded_frames" not in st.session_state:
119
+ st.session_state.recorded_frames = None
120
 
121
  # Main app function
122
  def main():
123
+ st.set_page_config(
124
+ page_title="Video Recording & Analysis",
125
+ page_icon="🎥",
126
+ layout="wide"
127
+ )
128
+
129
+ st.title("🎥 Video Recording & Automatic Analysis")
130
+ st.markdown("Record a video directly from your camera and get an AI-generated summary")
131
  st.markdown("---")
132
 
133
+ # Get API key
 
134
  api_key = get_api_key()
135
+
136
  if not api_key:
137
+ st.error("⚠️ Please set your GOOGLE_API_KEY in Hugging Face Spaces secrets or environment variables")
138
  st.stop()
139
+
140
+ # Initialize video processor
141
+ if st.session_state.video_processor is None:
142
+ st.session_state.video_processor = VideoProcessor(api_key)
143
+
144
+ # Create two columns for layout
145
+ col1, col2 = st.columns([1, 1])
146
+
147
+ with col1:
148
+ st.subheader("📹 Video Recording")
149
+
150
+ # WebRTC streamer for video recording
151
+ ctx = webrtc_streamer(
152
+ key="video-recorder",
153
+ mode=WebRtcMode.SENDRECV,
154
+ video_processor_factory=VideoRecorder,
155
+ media_stream_constraints={"video": True, "audio": False},
156
+ async_processing=True,
157
+ )
158
+
159
+ # Recording controls
160
+ col_start, col_stop = st.columns(2)
161
+
162
+ with col_start:
163
+ if st.button("🔴 Start Recording", use_container_width=True):
164
+ if ctx.video_processor:
165
+ ctx.video_processor.start_recording()
166
+ st.session_state.recording_complete = False
167
+ st.session_state.summary = None
168
+ st.success("Recording started...")
169
+
170
+ with col_stop:
171
+ if st.button("⏹️ Stop Recording", use_container_width=True):
172
+ if ctx.video_processor and ctx.video_processor.is_recording():
173
+ frames = ctx.video_processor.stop_recording()
174
+
175
+ if frames:
176
+ st.session_state.recorded_frames = frames
177
+ st.session_state.recording_complete = True
178
+ st.success(f"Recording stopped! Captured {len(frames)} frames")
179
+ else:
180
+ st.warning("No frames were recorded")
181
+
182
+ # Display recording status
183
+ if ctx.video_processor and ctx.video_processor.is_recording():
184
+ st.info("🔴 Recording in progress...")
185
+
186
+ # Process the recorded video
187
+ if st.session_state.recording_complete and st.session_state.recorded_frames:
188
+ with st.spinner("Processing video..."):
189
+ # Save frames as video
190
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp:
191
  tmp_path = tmp.name
192
+
193
+ if save_frames_as_video(st.session_state.recorded_frames, tmp_path):
194
+ try:
195
+ # Upload to Gemini
196
+ video_file = st.session_state.video_processor.upload_video(
197
+ tmp_path,
198
+ "recorded_video"
199
+ )
200
+
201
+ # Wait for processing
202
+ processed_file = st.session_state.video_processor.wait_for_processing(
203
+ video_file
204
+ )
205
+
206
+ # Generate summary
207
+ st.session_state.summary = st.session_state.video_processor.generate_summary(
208
+ processed_file
209
+ )
210
+
211
+ st.success("✅ Video analyzed successfully!")
212
+
213
+ except Exception as e:
214
+ st.error(f"Error processing video: {str(e)}")
215
+ finally:
216
+ # Clean up temp file
217
+ if os.path.exists(tmp_path):
218
+ os.unlink(tmp_path)
219
+ st.session_state.recording_complete = False
220
+ else:
221
+ st.error("Failed to save video")
222
+
223
+ with col2:
224
+ st.subheader("📝 Video Summary")
225
+
226
+ if st.session_state.summary:
227
+ st.markdown(st.session_state.summary)
228
+
229
+ # Option to download summary
230
+ st.download_button(
231
+ label="📥 Download Summary",
232
+ data=st.session_state.summary,
233
+ file_name="video_summary.txt",
234
+ mime="text/plain"
235
+ )
236
  else:
237
+ st.info("Record a video and it will be automatically analyzed. The summary will appear here.")
238
+
239
+ # Sidebar with instructions
240
+ with st.sidebar:
241
+ st.markdown("### 📖 How to Use")
242
+ st.markdown("""
243
+ 1. **Allow camera access** when prompted
244
+ 2. Click **Start Recording** to begin
245
+ 3. Perform your action or speak
246
+ 4. Click **Stop Recording** to end
247
+ 5. Wait for automatic analysis
248
+ 6. View your video summary
249
+ """)
250
+
251
+ st.markdown("### ⚙️ Settings")
252
+ if st.button("🔄 Reset Application"):
253
+ st.session_state.clear()
254
+ st.rerun()
255
+
256
+ st.markdown("### 📌 Notes")
257
+ st.markdown("""
258
+ - Video is processed using Gemini 2.0
259
+ - Recording is temporary and not stored
260
+ - API key should be set in HF Spaces secrets
261
+ """)
 
 
 
 
 
 
262
 
263
  if __name__ == "__main__":
264
+ # Install required packages if not present
265
+ try:
266
+ import streamlit_webrtc
267
+ except ImportError:
268
+ st.error("Please install streamlit-webrtc: `pip install streamlit-webrtc`")
269
+ st.stop()
270
+
271
+ main()