CB commited on
Commit
18c6ab8
·
verified ·
1 Parent(s): 4633b20

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +119 -68
streamlit_app.py CHANGED
@@ -36,6 +36,7 @@ st.set_page_config(page_title="Generate the story of videos", layout="wide")
36
  DATA_DIR = Path("./data")
37
  DATA_DIR.mkdir(exist_ok=True)
38
 
 
39
  st.session_state.setdefault("videos", "")
40
  st.session_state.setdefault("loop_video", False)
41
  st.session_state.setdefault("uploaded_file", None)
@@ -46,6 +47,9 @@ st.session_state.setdefault("analysis_out", "")
46
  st.session_state.setdefault("last_error", "")
47
  st.session_state.setdefault("file_hash", None)
48
  st.session_state.setdefault("fast_mode", False)
 
 
 
49
 
50
  def sanitize_filename(path_str: str):
51
  name = Path(path_str).name
@@ -101,48 +105,36 @@ def file_name_or_id(file_obj):
101
  return file_obj.get("name") or file_obj.get("id")
102
  return getattr(file_obj, "name", None) or getattr(file_obj, "id", None) or getattr(file_obj, "fileId", None)
103
 
104
- if os.getenv("GOOGLE_API_KEY") and HAS_GENAI:
 
 
 
 
 
 
105
  try:
106
- genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
107
  except Exception:
 
108
  pass
109
-
110
- st.sidebar.header("Video Input")
111
- st.sidebar.text_input("Video URL", key="url", placeholder="https://")
112
-
113
- settings_exp = st.sidebar.expander("Settings", expanded=False)
114
- env_api_key = os.getenv("GOOGLE_API_KEY", "")
115
- API_KEY = settings_exp.text_input("Google API Key", value=env_api_key, placeholder="Set GOOGLE_API_KEY in .env or enter here", type="password")
116
- model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite")
117
- model_id = model_input.strip() or "gemini-2.0-flash-lite"
118
- model_arg = model_id if not model_id.startswith("models/") else model_id.split("/", 1)[1]
119
- default_prompt = (
120
- "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. "
121
- "Use vivid, anatomically rich descriptions with numeric estimates for measurements. Include a list of detailed anatomical observations and measurements. "
122
- "Adopt a playful, inquisitive persona and ensure the report is engaging and informative."
123
- )
124
- analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
125
- settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
126
- settings_exp.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
127
-
128
- if not API_KEY and not os.getenv("GOOGLE_API_KEY"):
129
- settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
130
-
131
- safety_settings = [
132
- {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "OFF"},
133
- {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "OFF"},
134
- {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "OFF"},
135
- {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
136
- ]
137
 
138
  _agent = None
139
- if HAS_PHI and HAS_GENAI and (API_KEY or os.getenv("GOOGLE_API_KEY")):
 
 
 
 
 
 
 
140
  try:
141
- key_to_use = API_KEY if API_KEY else os.getenv("GOOGLE_API_KEY")
142
- genai.configure(api_key=key_to_use)
143
  _agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
 
144
  except Exception:
145
  _agent = None
 
146
 
147
  def clear_all_video_state():
148
  st.session_state.pop("uploaded_file", None)
@@ -165,46 +157,43 @@ if current_url != st.session_state.get("last_url_value"):
165
  clear_all_video_state()
166
  st.session_state["last_url_value"] = current_url
167
 
168
- if st.sidebar.button("Load Video", use_container_width=True):
169
- try:
170
- vpw = st.session_state.get("video-password", "")
171
- path = download_video_ytdlp(st.session_state.get("url", ""), str(DATA_DIR), vpw)
172
- st.session_state["videos"] = path
173
- st.session_state["last_loaded_path"] = path
174
- st.session_state.pop("uploaded_file", None)
175
- st.session_state.pop("processed_file", None)
176
- st.session_state["file_hash"] = file_sha256(path)
177
- except Exception as e:
178
- st.sidebar.error(f"Failed to load video: {e}")
179
-
180
- if st.session_state["videos"]:
181
- try:
182
- st.sidebar.video(st.session_state["videos"], loop=st.session_state.get("loop_video", False))
183
- except Exception:
184
- st.sidebar.write("Couldn't preview video")
185
 
186
- with st.sidebar.expander("Options", expanded=False):
187
- loop_checkbox = st.checkbox("Enable Loop", value=st.session_state.get("loop_video", False))
188
- st.session_state["loop_video"] = loop_checkbox
 
 
 
 
 
 
 
 
189
 
190
- if st.button("Clear Video(s)"):
191
- clear_all_video_state()
 
192
 
193
- try:
194
- with open(st.session_state["videos"], "rb") as vf:
195
- st.download_button("Download Video", data=vf, file_name=sanitize_filename(st.session_state["videos"]), mime="video/mp4", use_container_width=True)
196
- except Exception:
197
- st.sidebar.error("Failed to prepare download")
198
 
199
- st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
 
 
 
 
 
200
 
201
  def upload_video_sdk(filepath: str):
202
- key = API_KEY if API_KEY else os.getenv("GOOGLE_API_KEY")
203
  if not key:
204
  raise RuntimeError("No API key provided")
205
  if not HAS_GENAI or upload_file is None:
206
  raise RuntimeError("google.generativeai SDK not available; cannot upload")
207
  genai.configure(api_key=key)
 
208
  return upload_file(filepath)
209
 
210
  def wait_for_processed(file_obj, timeout=180):
@@ -246,20 +235,75 @@ def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_thres
246
 
247
  col1, col2 = st.columns([1, 3])
248
  with col1:
249
- generate_now = st.button("Generate the story", type="primary")
250
  with col2:
251
  pass
252
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  if generate_now and not st.session_state.get("busy"):
254
  if not st.session_state.get("videos"):
255
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
256
  else:
257
- key_to_use = API_KEY if API_KEY else os.getenv("GOOGLE_API_KEY")
258
  if not key_to_use:
259
  st.error("Google API key not set.")
260
  else:
261
  try:
262
  st.session_state["busy"] = True
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  processed = st.session_state.get("processed_file")
264
  current_path = st.session_state.get("videos")
265
  try:
@@ -302,15 +346,21 @@ if generate_now and not st.session_state.get("busy"):
302
 
303
  out = ""
304
  if st.session_state.get("fast_mode"):
305
- model_used = model_arg if model_arg else "gemini-2.0-flash-lite"
306
  max_tokens = 512
307
  else:
308
- model_used = model_arg
309
  max_tokens = 1024
310
 
311
- if _agent:
 
 
 
 
 
 
312
  with st.spinner("Generating description via Agent..."):
313
- response = _agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
314
  out = getattr(response, "content", None) or getattr(response, "outputText", None) or str(response)
315
  else:
316
  if not HAS_GENAI or genai is None:
@@ -379,6 +429,7 @@ if generate_now and not st.session_state.get("busy"):
379
  st.session_state["last_error"] = ""
380
  st.subheader("Analysis Result")
381
  st.markdown(out)
 
382
  except Exception as e:
383
  st.session_state["last_error"] = str(e)
384
  st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
 
36
  DATA_DIR = Path("./data")
37
  DATA_DIR.mkdir(exist_ok=True)
38
 
39
+ # Session defaults
40
  st.session_state.setdefault("videos", "")
41
  st.session_state.setdefault("loop_video", False)
42
  st.session_state.setdefault("uploaded_file", None)
 
47
  st.session_state.setdefault("last_error", "")
48
  st.session_state.setdefault("file_hash", None)
49
  st.session_state.setdefault("fast_mode", False)
50
+ st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
51
+ st.session_state.setdefault("last_model", "")
52
+ st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
53
 
54
  def sanitize_filename(path_str: str):
55
  name = Path(path_str).name
 
105
  return file_obj.get("name") or file_obj.get("id")
106
  return getattr(file_obj, "name", None) or getattr(file_obj, "id", None) or getattr(file_obj, "fileId", None)
107
 
108
+ def get_effective_api_key():
109
+ return st.session_state.get("api_key") or os.getenv("GOOGLE_API_KEY")
110
+
111
+ def configure_genai_if_needed():
112
+ key = get_effective_api_key()
113
+ if not key:
114
+ return False
115
  try:
116
+ genai.configure(api_key=key)
117
  except Exception:
118
+ # ignore here; callers will handle failures
119
  pass
120
+ return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
  _agent = None
123
+ def maybe_create_agent(model_id: str):
124
+ global _agent
125
+ key = get_effective_api_key()
126
+ if not (HAS_PHI and HAS_GENAI and key):
127
+ _agent = None
128
+ return None
129
+ if _agent and st.session_state.get("last_model") == model_id:
130
+ return _agent
131
  try:
132
+ genai.configure(api_key=key)
 
133
  _agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
134
+ st.session_state["last_model"] = model_id
135
  except Exception:
136
  _agent = None
137
+ return _agent
138
 
139
  def clear_all_video_state():
140
  st.session_state.pop("uploaded_file", None)
 
157
  clear_all_video_state()
158
  st.session_state["last_url_value"] = current_url
159
 
160
+ st.sidebar.header("Video Input")
161
+ st.sidebar.text_input("Video URL", key="url", placeholder="https://")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
+ settings_exp = st.sidebar.expander("Settings", expanded=False)
164
+ model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite", key="model_input")
165
+ # session API key widget (session-first, fallback to .env)
166
+ settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
167
+ default_prompt = (
168
+ "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. "
169
+ "Keep language professional and avoid anatomically explicit or sensitive detail. Include a list of observations and any timestamps for notable events."
170
+ )
171
+ analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
172
+ settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
173
+ settings_exp.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
174
 
175
+ # Show which key is active
176
+ key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
177
+ settings_exp.caption(f"Using API key from: **{key_source}**")
178
 
179
+ if not get_effective_api_key():
180
+ settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
 
 
 
181
 
182
+ safety_settings = [
183
+ {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "OFF"},
184
+ {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "OFF"},
185
+ {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "OFF"},
186
+ {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
187
+ ]
188
 
189
  def upload_video_sdk(filepath: str):
190
+ key = get_effective_api_key()
191
  if not key:
192
  raise RuntimeError("No API key provided")
193
  if not HAS_GENAI or upload_file is None:
194
  raise RuntimeError("google.generativeai SDK not available; cannot upload")
195
  genai.configure(api_key=key)
196
+ # upload_file may stream; wrap to update session progress if supported
197
  return upload_file(filepath)
198
 
199
  def wait_for_processed(file_obj, timeout=180):
 
235
 
236
  col1, col2 = st.columns([1, 3])
237
  with col1:
238
+ generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
239
  with col2:
240
  pass
241
 
242
+ if st.sidebar.button("Load Video", use_container_width=True):
243
+ try:
244
+ vpw = st.session_state.get("video-password", "")
245
+ path = download_video_ytdlp(st.session_state.get("url", ""), str(DATA_DIR), vpw)
246
+ st.session_state["videos"] = path
247
+ st.session_state["last_loaded_path"] = path
248
+ st.session_state.pop("uploaded_file", None)
249
+ st.session_state.pop("processed_file", None)
250
+ st.session_state["file_hash"] = file_sha256(path)
251
+ except Exception as e:
252
+ st.sidebar.error(f"Failed to load video: {e}")
253
+
254
+ if st.session_state["videos"]:
255
+ try:
256
+ st.sidebar.video(st.session_state["videos"], loop=st.session_state.get("loop_video", False))
257
+ except Exception:
258
+ st.sidebar.write("Couldn't preview video")
259
+
260
+ with st.sidebar.expander("Options", expanded=False):
261
+ loop_checkbox = st.checkbox("Enable Loop", value=st.session_state.get("loop_video", False))
262
+ st.session_state["loop_video"] = loop_checkbox
263
+
264
+ if st.button("Clear Video(s)"):
265
+ clear_all_video_state()
266
+
267
+ try:
268
+ with open(st.session_state["videos"], "rb") as vf:
269
+ st.download_button("Download Video", data=vf, file_name=sanitize_filename(st.session_state["videos"]), mime="video/mp4", use_container_width=True)
270
+ except Exception:
271
+ st.sidebar.error("Failed to prepare download")
272
+
273
+ st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
274
+ # show file size and compression suggestion
275
+ try:
276
+ file_size_mb = os.path.getsize(st.session_state["videos"]) / (1024 * 1024)
277
+ st.sidebar.caption(f"File size: {file_size_mb:.1f} MB")
278
+ if file_size_mb > 50 and not st.session_state.get("fast_mode", False):
279
+ st.sidebar.warning("Large file detected — consider enabling Fast mode or compression.", icon="⚠️")
280
+ st.session_state["fast_mode"] = True
281
+ except Exception:
282
+ pass
283
+
284
  if generate_now and not st.session_state.get("busy"):
285
  if not st.session_state.get("videos"):
286
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
287
  else:
288
+ key_to_use = get_effective_api_key()
289
  if not key_to_use:
290
  st.error("Google API key not set.")
291
  else:
292
  try:
293
  st.session_state["busy"] = True
294
+ # ensure genai is configured now
295
+ try:
296
+ genai.configure(api_key=key_to_use)
297
+ except Exception:
298
+ pass
299
+
300
+ # recreate/clear agent if key or model changed
301
+ model_id = (st.session_state.get("model_input") or "gemini-2.0-flash-lite").strip()
302
+ if st.session_state.get("last_model") != model_id:
303
+ # clear cached agent to rebuild with new model/key
304
+ st.session_state["last_model"] = ""
305
+ maybe_create_agent(model_id)
306
+
307
  processed = st.session_state.get("processed_file")
308
  current_path = st.session_state.get("videos")
309
  try:
 
346
 
347
  out = ""
348
  if st.session_state.get("fast_mode"):
349
+ model_used = model_id if model_id else "gemini-2.0-flash-lite"
350
  max_tokens = 512
351
  else:
352
+ model_used = model_id
353
  max_tokens = 1024
354
 
355
+ # cost/tokens estimate (very rough)
356
+ est_tokens = max_tokens
357
+ est_cost_caption = f"Est. max tokens: {est_tokens}"
358
+
359
+ # Generate via Agent if available
360
+ agent = maybe_create_agent(model_used)
361
+ if agent:
362
  with st.spinner("Generating description via Agent..."):
363
+ response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
364
  out = getattr(response, "content", None) or getattr(response, "outputText", None) or str(response)
365
  else:
366
  if not HAS_GENAI or genai is None:
 
429
  st.session_state["last_error"] = ""
430
  st.subheader("Analysis Result")
431
  st.markdown(out)
432
+ st.caption(est_cost_caption)
433
  except Exception as e:
434
  st.session_state["last_error"] = str(e)
435
  st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")