CB commited on
Commit
1158077
·
verified ·
1 Parent(s): e61ff31

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +132 -115
streamlit_app.py CHANGED
@@ -8,6 +8,7 @@ from glob import glob
8
  from pathlib import Path
9
  from difflib import SequenceMatcher
10
  import concurrent.futures
 
11
 
12
  import yt_dlp
13
  import ffmpeg
@@ -16,6 +17,7 @@ from dotenv import load_dotenv
16
 
17
  load_dotenv()
18
 
 
19
  try:
20
  from phi.agent import Agent
21
  from phi.model.google import Gemini
@@ -25,6 +27,7 @@ except Exception:
25
  Agent = Gemini = DuckDuckGo = None
26
  HAS_PHI = False
27
 
 
28
  try:
29
  import google.generativeai as genai
30
  from google.generativeai import upload_file, get_file # type: ignore
@@ -38,7 +41,7 @@ st.set_page_config(page_title="Generate the story of videos", layout="wide")
38
  DATA_DIR = Path("./data")
39
  DATA_DIR.mkdir(exist_ok=True)
40
 
41
- # Session defaults
42
  st.session_state.setdefault("videos", "")
43
  st.session_state.setdefault("loop_video", False)
44
  st.session_state.setdefault("uploaded_file", None)
@@ -52,8 +55,10 @@ st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
52
  st.session_state.setdefault("last_model", "")
53
  st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
54
  st.session_state.setdefault("last_url_value", "")
55
- st.session_state.setdefault("processing_timeout", 600) # default 10 minutes
 
56
 
 
57
  def sanitize_filename(path_str: str):
58
  name = Path(path_str).name
59
  return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
@@ -124,6 +129,7 @@ def configure_genai_if_needed():
124
  pass
125
  return True
126
 
 
127
  _agent = None
128
  def maybe_create_agent(model_id: str):
129
  global _agent
@@ -155,12 +161,13 @@ def clear_all_video_state():
155
  except Exception:
156
  pass
157
 
158
- # track url changes
159
  current_url = st.session_state.get("url", "")
160
  if current_url != st.session_state.get("last_url_value"):
161
  clear_all_video_state()
162
  st.session_state["last_url_value"] = current_url
163
 
 
164
  st.sidebar.header("Video Input")
165
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
166
 
@@ -173,19 +180,19 @@ default_prompt = (
173
  analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
174
  settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
175
 
176
- # Expose processing timeout
177
  settings_exp.number_input(
178
- "Processing timeout (s)",
179
- min_value=60,
180
- max_value=3600,
181
- value=st.session_state.get("processing_timeout", 600),
182
- step=30,
183
  key="processing_timeout",
184
  )
 
 
 
 
 
185
 
186
  key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
187
  settings_exp.caption(f"Using API key from: **{key_source}**")
188
-
189
  if not get_effective_api_key():
190
  settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
191
 
@@ -196,6 +203,7 @@ safety_settings = [
196
  {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
197
  ]
198
 
 
199
  def upload_video_sdk(filepath: str):
200
  key = get_effective_api_key()
201
  if not key:
@@ -207,13 +215,11 @@ def upload_video_sdk(filepath: str):
207
 
208
  def wait_for_processed(file_obj, timeout: int = None):
209
  """
210
- Poll get_file until file is no longer in PROCESSING state.
211
- Uses st.session_state['processing_timeout'] if timeout is None.
212
- Retries on transient get_file errors with exponential backoff.
213
- Raises TimeoutError on timeout.
214
  """
215
  if timeout is None:
216
- timeout = st.session_state.get("processing_timeout", 600)
217
  if not HAS_GENAI or get_file is None:
218
  return file_obj
219
  start = time.time()
@@ -221,12 +227,10 @@ def wait_for_processed(file_obj, timeout: int = None):
221
  if not name:
222
  return file_obj
223
  backoff = 1.0
224
- last_exc = None
225
  while True:
226
  try:
227
  obj = get_file(name)
228
  except Exception as e:
229
- last_exc = e
230
  if time.time() - start > timeout:
231
  raise TimeoutError(f"Failed to fetch file status before timeout: {e}")
232
  time.sleep(backoff)
@@ -262,11 +266,6 @@ def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_thres
262
  return text
263
 
264
  def compress_video_if_large(local_path: str, threshold_mb: int = 50):
265
- """
266
- Returns (path_to_upload, compressed_flag).
267
- If compression fails or isn't needed returns (local_path, False).
268
- Logs errors to st.session_state['last_error'].
269
- """
270
  try:
271
  file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
272
  except Exception as e:
@@ -286,7 +285,8 @@ def compress_video_if_large(local_path: str, threshold_mb: int = 50):
286
  st.session_state["last_error"] = f"Video compression failed: {e}\n{traceback.format_exc()}"
287
  return local_path, False
288
 
289
- def generate_via_responses_api(prompt_text: str, processed, model_used: str, max_tokens: int = 1024):
 
290
  key = get_effective_api_key()
291
  if not key:
292
  raise RuntimeError("No API key provided")
@@ -296,104 +296,111 @@ def generate_via_responses_api(prompt_text: str, processed, model_used: str, max
296
  fname = file_name_or_id(processed)
297
  if not fname:
298
  raise RuntimeError("Uploaded file missing name/id")
 
299
  system_msg = {"role": "system", "content": prompt_text}
300
  user_msg = {"role": "user", "content": "Please summarize the attached video."}
301
- try:
302
- response = genai.responses.generate(
303
- model=model_used,
304
- messages=[system_msg, user_msg],
305
- files=[{"name": fname}],
306
- safety_settings=safety_settings,
307
- max_output_tokens=max_tokens,
308
- )
309
- except TypeError:
310
- response = genai.responses.generate(
311
- model=model_used,
312
- input=[{"text": prompt_text, "files": [{"name": fname}]}],
313
- safety_settings=safety_settings,
314
- max_output_tokens=max_tokens,
315
- )
316
-
317
- # Normalize outputs into text pieces
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  outputs = []
319
  if response is None:
320
- outputs = []
321
- elif isinstance(response, dict):
322
- for key in ("output", "candidates", "items", "responses"):
323
- val = response.get(key)
324
- if isinstance(val, list) and val:
325
- outputs = val
326
- break
327
- if not outputs:
 
 
 
 
 
 
 
 
 
 
 
328
  for v in response.values():
329
  if isinstance(v, list) and v:
330
- outputs = v
331
  break
332
- else:
333
- for attr in ("output", "candidates", "items", "responses"):
334
- val = getattr(response, attr, None)
335
- if isinstance(val, list) and val:
336
- outputs = val
337
- break
338
-
339
- if not isinstance(outputs, list):
340
- outputs = list(outputs) if outputs else []
341
 
342
  text_pieces = []
343
- for item in outputs:
344
- if item is None:
345
- continue
346
- cand_contents = None
347
- if isinstance(item, dict):
348
- for k in ("content", "text", "message", "output_text", "output"):
349
- if k in item and item[k]:
350
- cand_contents = item[k]
351
- break
352
- else:
353
- for k in ("content", "text", "message", "output", "output_text"):
354
- cand_contents = getattr(item, k, None)
355
- if cand_contents:
356
- break
357
-
358
- if isinstance(cand_contents, str):
359
- if cand_contents.strip():
360
- text_pieces.append(cand_contents.strip())
361
- continue
362
-
363
- if isinstance(cand_contents, (list, tuple)):
364
- for c in cand_contents:
365
- if c is None:
366
- continue
367
- if isinstance(c, str):
368
- if c.strip():
369
- text_pieces.append(c.strip())
370
- continue
371
- if isinstance(c, dict):
372
- t = c.get("text") or c.get("content")
373
  else:
374
- t = getattr(c, "text", None) or getattr(c, "content", None)
375
- if t:
376
- text_pieces.append(str(t).strip())
377
- continue
378
-
379
- direct = None
380
- if isinstance(item, dict):
381
- direct = item.get("text") or item.get("output_text") or item.get("message")
382
- else:
383
- direct = getattr(item, "text", None) or getattr(item, "output_text", None) or getattr(item, "message", None)
384
- if direct:
385
- text_pieces.append(str(direct).strip())
 
 
 
 
 
 
 
386
 
387
- if not text_pieces:
388
- top_text = None
389
- if isinstance(response, dict):
390
- top_text = response.get("text") or response.get("message")
391
- else:
392
- top_text = getattr(response, "text", None) or getattr(response, "message", None)
393
- if top_text:
394
- text_pieces.append(str(top_text).strip())
395
 
396
- # Deduplicate, preserve order
397
  seen = set()
398
  filtered = []
399
  for t in text_pieces:
@@ -402,8 +409,9 @@ def generate_via_responses_api(prompt_text: str, processed, model_used: str, max
402
  if t and t not in seen:
403
  filtered.append(t)
404
  seen.add(t)
405
- return "\n\n".join(filtered)
406
 
 
407
  col1, col2 = st.columns([1, 3])
408
  with col1:
409
  generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
@@ -453,7 +461,7 @@ if st.session_state["videos"]:
453
  except Exception:
454
  pass
455
 
456
- # --- Generation flow ---
457
  if generate_now and not st.session_state.get("busy"):
458
  if not st.session_state.get("videos"):
459
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
@@ -493,6 +501,9 @@ if generate_now and not st.session_state.get("busy"):
493
  upload_path, compressed = compress_video_if_large(local_path)
494
 
495
  with st.spinner(f"Uploading video{' (compressed)' if compressed else ''}..."):
 
 
 
496
  try:
497
  uploaded = upload_video_sdk(upload_path)
498
  except Exception as e:
@@ -501,7 +512,13 @@ if generate_now and not st.session_state.get("busy"):
501
  raise
502
 
503
  try:
504
- processed = wait_for_processed(uploaded, timeout=st.session_state.get("processing_timeout", 600))
 
 
 
 
 
 
505
  except Exception as e:
506
  st.session_state["last_error"] = f"Processing failed/wait timeout: {e}\n\nTraceback:\n{traceback.format_exc()}"
507
  st.error("Video processing failed or timed out. See Last Error.")
@@ -515,7 +532,7 @@ if generate_now and not st.session_state.get("busy"):
515
  prompt_text = (analysis_prompt.strip() or default_prompt).strip()
516
  out = ""
517
  model_used = model_id
518
- max_tokens = 1024
519
  est_tokens = max_tokens
520
 
521
  # Try Agent first, fallback to Responses API
@@ -532,7 +549,7 @@ if generate_now and not st.session_state.get("busy"):
532
  if not agent_text:
533
  try:
534
  if isinstance(agent_response, dict):
535
- for k in ("content", "outputText", "text"):
536
  if k in agent_response and agent_response[k]:
537
  agent_text = agent_response[k]
538
  break
@@ -551,7 +568,7 @@ if generate_now and not st.session_state.get("busy"):
551
  if not out:
552
  try:
553
  with st.spinner("Generating description via Responses API..."):
554
- out = generate_via_responses_api(prompt_text, processed, model_used, max_tokens=max_tokens)
555
  except Exception as e:
556
  tb = traceback.format_exc()
557
  st.session_state["last_error"] = f"Responses API error: {e}\n\nDebug: {debug_info}\n\nTraceback:\n{tb}"
 
8
  from pathlib import Path
9
  from difflib import SequenceMatcher
10
  import concurrent.futures
11
+ import json
12
 
13
  import yt_dlp
14
  import ffmpeg
 
17
 
18
  load_dotenv()
19
 
20
+ # Optional phi integration (Agent + Gemini wrapper)
21
  try:
22
  from phi.agent import Agent
23
  from phi.model.google import Gemini
 
27
  Agent = Gemini = DuckDuckGo = None
28
  HAS_PHI = False
29
 
30
+ # google.generativeai SDK
31
  try:
32
  import google.generativeai as genai
33
  from google.generativeai import upload_file, get_file # type: ignore
 
41
  DATA_DIR = Path("./data")
42
  DATA_DIR.mkdir(exist_ok=True)
43
 
44
+ # ---- Session defaults ----
45
  st.session_state.setdefault("videos", "")
46
  st.session_state.setdefault("loop_video", False)
47
  st.session_state.setdefault("uploaded_file", None)
 
55
  st.session_state.setdefault("last_model", "")
56
  st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
57
  st.session_state.setdefault("last_url_value", "")
58
+ st.session_state.setdefault("processing_timeout", 900) # increased default to 15m
59
+ st.session_state.setdefault("generation_timeout", 300) # for Responses generate
60
 
61
+ # ---- Helpers ----
62
  def sanitize_filename(path_str: str):
63
  name = Path(path_str).name
64
  return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
 
129
  pass
130
  return True
131
 
132
+ # ---- Agent management ----
133
  _agent = None
134
  def maybe_create_agent(model_id: str):
135
  global _agent
 
161
  except Exception:
162
  pass
163
 
164
+ # Reset when URL changes
165
  current_url = st.session_state.get("url", "")
166
  if current_url != st.session_state.get("last_url_value"):
167
  clear_all_video_state()
168
  st.session_state["last_url_value"] = current_url
169
 
170
+ # ---- Sidebar UI ----
171
  st.sidebar.header("Video Input")
172
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
173
 
 
180
  analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
181
  settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
182
 
 
183
  settings_exp.number_input(
184
+ "Processing timeout (s)", min_value=60, max_value=3600,
185
+ value=st.session_state.get("processing_timeout", 900), step=30,
 
 
 
186
  key="processing_timeout",
187
  )
188
+ settings_exp.number_input(
189
+ "Generation timeout (s)", min_value=30, max_value=1800,
190
+ value=st.session_state.get("generation_timeout", 300), step=10,
191
+ key="generation_timeout",
192
+ )
193
 
194
  key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
195
  settings_exp.caption(f"Using API key from: **{key_source}**")
 
196
  if not get_effective_api_key():
197
  settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
198
 
 
203
  {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
204
  ]
205
 
206
+ # ---- Upload & processing helpers ----
207
  def upload_video_sdk(filepath: str):
208
  key = get_effective_api_key()
209
  if not key:
 
215
 
216
  def wait_for_processed(file_obj, timeout: int = None):
217
  """
218
+ Poll get_file until file is no longer PROCESSING.
219
+ Retries get_file on transient errors with exponential backoff.
 
 
220
  """
221
  if timeout is None:
222
+ timeout = st.session_state.get("processing_timeout", 900)
223
  if not HAS_GENAI or get_file is None:
224
  return file_obj
225
  start = time.time()
 
227
  if not name:
228
  return file_obj
229
  backoff = 1.0
 
230
  while True:
231
  try:
232
  obj = get_file(name)
233
  except Exception as e:
 
234
  if time.time() - start > timeout:
235
  raise TimeoutError(f"Failed to fetch file status before timeout: {e}")
236
  time.sleep(backoff)
 
266
  return text
267
 
268
  def compress_video_if_large(local_path: str, threshold_mb: int = 50):
 
 
 
 
 
269
  try:
270
  file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
271
  except Exception as e:
 
285
  st.session_state["last_error"] = f"Video compression failed: {e}\n{traceback.format_exc()}"
286
  return local_path, False
287
 
288
+ # ---- Robust Responses API caller adapted for varying model versions ----
289
+ def generate_via_responses_api(prompt_text: str, processed, model_used: str, max_tokens: int = 1024, timeout: int = 300):
290
  key = get_effective_api_key()
291
  if not key:
292
  raise RuntimeError("No API key provided")
 
296
  fname = file_name_or_id(processed)
297
  if not fname:
298
  raise RuntimeError("Uploaded file missing name/id")
299
+
300
  system_msg = {"role": "system", "content": prompt_text}
301
  user_msg = {"role": "user", "content": "Please summarize the attached video."}
302
+
303
+ # Some model versions and SDK releases expect messages, some older ones expect input with files.
304
+ call_variants = [
305
+ {"messages": [system_msg, user_msg], "files": [{"name": fname}], "safety_settings": safety_settings, "max_output_tokens": max_tokens},
306
+ {"input": [{"text": prompt_text, "files": [{"name": fname}]}], "safety_settings": safety_settings, "max_output_tokens": max_tokens},
307
+ ]
308
+
309
+ last_exc = None
310
+ start = time.time()
311
+ backoff = 1.0
312
+ while True:
313
+ for payload in call_variants:
314
+ try:
315
+ response = genai.responses.generate(model=model_used, **payload)
316
+ # If successful, normalize below
317
+ return _normalize_genai_response(response)
318
+ except Exception as e:
319
+ last_exc = e
320
+ # If it's a transient server error, let outer retry/backoff handle it
321
+ # Quick heuristic: inspect message for INTERNAL/UNAVAILABLE/DeadlineExceeded
322
+ msg = str(e).lower()
323
+ if any(k in msg for k in ("internal", "unavailable", "deadlineexceeded", "deadline exceeded", "timeout", "rate limit")):
324
+ # will retry below
325
+ pass
326
+ else:
327
+ # If it's a clear invalid-argument or permission error, bubble up immediately
328
+ raise
329
+ if time.time() - start > timeout:
330
+ raise TimeoutError(f"Responses.generate timed out after {timeout}s: last error: {last_exc}")
331
+ time.sleep(backoff)
332
+ backoff = min(backoff * 2, 8.0)
333
+
334
+ def _normalize_genai_response(response):
335
+ # Accept dict or object shapes. Extract text pieces robustly and join.
336
  outputs = []
337
  if response is None:
338
+ return ""
339
+
340
+ # If it's an object with attributes
341
+ if not isinstance(response, dict):
342
+ try:
343
+ response = json.loads(str(response))
344
+ except Exception:
345
+ # fallback to attribute access
346
+ pass
347
+
348
+ # Strategy: check common keys
349
+ candidate_lists = []
350
+ for key in ("output", "candidates", "items", "responses", "choices"):
351
+ val = response.get(key) if isinstance(response, dict) else None
352
+ if isinstance(val, list) and val:
353
+ candidate_lists.append(val)
354
+ if not candidate_lists:
355
+ # fallback: any list value
356
+ if isinstance(response, dict):
357
  for v in response.values():
358
  if isinstance(v, list) and v:
359
+ candidate_lists.append(v)
360
  break
 
 
 
 
 
 
 
 
 
361
 
362
  text_pieces = []
363
+ for lst in candidate_lists:
364
+ for item in lst:
365
+ if not item:
366
+ continue
367
+ if isinstance(item, dict):
368
+ # common text keys
369
+ for k in ("content", "text", "message", "output_text", "output"):
370
+ t = item.get(k)
371
+ if t:
372
+ text_pieces.append(str(t).strip())
373
+ break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  else:
375
+ # nested forms
376
+ if "content" in item and isinstance(item["content"], list):
377
+ for part in item["content"]:
378
+ if isinstance(part, dict):
379
+ t = part.get("text") or part.get("content")
380
+ if t:
381
+ text_pieces.append(str(t).strip())
382
+ elif isinstance(part, str):
383
+ text_pieces.append(part.strip())
384
+ elif isinstance(item, str):
385
+ text_pieces.append(item.strip())
386
+ else:
387
+ # try attribute access
388
+ try:
389
+ t = getattr(item, "text", None) or getattr(item, "content", None)
390
+ if t:
391
+ text_pieces.append(str(t).strip())
392
+ except Exception:
393
+ pass
394
 
395
+ # If still empty, try top-level text fields
396
+ if not text_pieces and isinstance(response, dict):
397
+ for k in ("text", "message", "output_text"):
398
+ v = response.get(k)
399
+ if v:
400
+ text_pieces.append(str(v).strip())
401
+ break
 
402
 
403
+ # deduplicate preserving order
404
  seen = set()
405
  filtered = []
406
  for t in text_pieces:
 
409
  if t and t not in seen:
410
  filtered.append(t)
411
  seen.add(t)
412
+ return "\n\n".join(filtered).strip()
413
 
414
+ # ---- Layout ----
415
  col1, col2 = st.columns([1, 3])
416
  with col1:
417
  generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
 
461
  except Exception:
462
  pass
463
 
464
+ # ---- Main generation flow ----
465
  if generate_now and not st.session_state.get("busy"):
466
  if not st.session_state.get("videos"):
467
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
 
501
  upload_path, compressed = compress_video_if_large(local_path)
502
 
503
  with st.spinner(f"Uploading video{' (compressed)' if compressed else ''}..."):
504
+ # Provide an upload progress bar UI while calling upload_file.
505
+ progress_placeholder = st.empty()
506
+ progress_bar = None
507
  try:
508
  uploaded = upload_video_sdk(upload_path)
509
  except Exception as e:
 
512
  raise
513
 
514
  try:
515
+ # Show a more informative processing progress area
516
+ processing_placeholder = st.empty()
517
+ processing_bar = processing_placeholder.progress(0)
518
+ start_wait = time.time()
519
+ processed = wait_for_processed(uploaded, timeout=st.session_state.get("processing_timeout", 900))
520
+ processing_bar.progress(100)
521
+ processing_placeholder.success("Processing complete")
522
  except Exception as e:
523
  st.session_state["last_error"] = f"Processing failed/wait timeout: {e}\n\nTraceback:\n{traceback.format_exc()}"
524
  st.error("Video processing failed or timed out. See Last Error.")
 
532
  prompt_text = (analysis_prompt.strip() or default_prompt).strip()
533
  out = ""
534
  model_used = model_id
535
+ max_tokens = 2048 if "2.5" in model_used else 1024
536
  est_tokens = max_tokens
537
 
538
  # Try Agent first, fallback to Responses API
 
549
  if not agent_text:
550
  try:
551
  if isinstance(agent_response, dict):
552
+ for k in ("content", "outputText", "text", "message"):
553
  if k in agent_response and agent_response[k]:
554
  agent_text = agent_response[k]
555
  break
 
568
  if not out:
569
  try:
570
  with st.spinner("Generating description via Responses API..."):
571
+ out = generate_via_responses_api(prompt_text, processed, model_used, max_tokens=max_tokens, timeout=st.session_state.get("generation_timeout", 300))
572
  except Exception as e:
573
  tb = traceback.format_exc()
574
  st.session_state["last_error"] = f"Responses API error: {e}\n\nDebug: {debug_info}\n\nTraceback:\n{tb}"