CB commited on
Commit
8fea353
·
verified ·
1 Parent(s): 369c934

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +86 -26
streamlit_app.py CHANGED
@@ -8,9 +8,10 @@ from glob import glob
8
  from pathlib import Path
9
  from difflib import SequenceMatcher
10
  import json
 
11
 
12
  import yt_dlp
13
- import ffmpeg
14
  import streamlit as st
15
  from dotenv import load_dotenv
16
 
@@ -21,6 +22,7 @@ try:
21
  from phi.agent import Agent
22
  from phi.model.google import Gemini
23
  from phi.tools.duckduckgo import DuckDuckGo
 
24
  HAS_PHI = True
25
  except Exception:
26
  Agent = Gemini = DuckDuckGo = None
@@ -30,12 +32,15 @@ except Exception:
30
  try:
31
  import google.generativeai as genai
32
  from google.generativeai import upload_file, get_file
 
33
  HAS_GENAI = True
34
  except Exception:
35
  genai = None
36
  upload_file = get_file = None
37
  HAS_GENAI = False
38
 
 
 
39
  st.set_page_config(page_title="Generate the story of videos", layout="wide")
40
  DATA_DIR = Path("./data")
41
  DATA_DIR.mkdir(exist_ok=True)
@@ -92,11 +97,19 @@ def convert_video_to_mp4(video_path: str) -> str:
92
  target_path = str(Path(video_path).with_suffix(".mp4"))
93
  if os.path.exists(target_path):
94
  return target_path
95
- ffmpeg.input(video_path).output(target_path).run(overwrite_output=True, quiet=True)
96
  try:
97
- os.remove(video_path)
98
- except Exception:
99
- pass
 
 
 
 
 
 
 
 
 
100
  return target_path
101
 
102
  def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
@@ -104,8 +117,12 @@ def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str
104
  ffmpeg.input(input_path).output(
105
  target_path, vcodec="libx264", crf=crf, preset=preset
106
  ).run(overwrite_output=True, quiet=True)
107
- return target_path
 
 
 
108
  except Exception:
 
109
  return input_path
110
 
111
  def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
@@ -114,24 +131,46 @@ def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) ->
114
  outtmpl = str(Path(save_dir) / "%(id)s.%(ext)s")
115
  ydl_opts = {"outtmpl": outtmpl, "format": "best"}
116
  if video_password:
 
117
  ydl_opts["videopassword"] = video_password
118
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
119
  info = ydl.extract_info(url, download=True)
120
- video_id = info.get("id") if isinstance(info, dict) else None
121
- if video_id:
122
- matches = glob(os.path.join(save_dir, f"{video_id}.*"))
123
- else:
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  all_files = glob(os.path.join(save_dir, "*"))
125
- matches = sorted(all_files, key=os.path.getmtime, reverse=True)[:1] if all_files else []
126
- if not matches:
127
- raise FileNotFoundError("Downloaded video not found")
128
- return convert_video_to_mp4(matches[0])
 
 
 
 
 
 
 
129
 
130
  def file_name_or_id(file_obj):
131
  if file_obj is None:
132
  return None
133
  if isinstance(file_obj, dict):
134
  return file_obj.get("name") or file_obj.get("id")
 
135
  return getattr(file_obj, "name", None) or getattr(file_obj, "id", None) or getattr(file_obj, "fileId", None)
136
 
137
  def get_effective_api_key():
@@ -144,7 +183,7 @@ def configure_genai_if_needed():
144
  try:
145
  genai.configure(api_key=key)
146
  except Exception:
147
- pass
148
  return True
149
 
150
  # ---- Agent management (reuse) ----
@@ -162,6 +201,7 @@ def maybe_create_agent(model_id: str):
162
  _agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
163
  st.session_state["last_model"] = model_id
164
  except Exception:
 
165
  _agent = None
166
  return _agent
167
 
@@ -177,7 +217,7 @@ def clear_all_video_state():
177
  try:
178
  os.remove(f)
179
  except Exception:
180
- pass
181
 
182
  # Reset when URL changes
183
  current_url = st.session_state.get("url", "")
@@ -195,6 +235,7 @@ if model_choice == "custom":
195
  model_input = settings_exp.text_input("Custom model id", value=DEFAULT_MODEL, key="model_input")
196
  model_selected = model_input.strip() or DEFAULT_MODEL
197
  else:
 
198
  st.session_state["model_input"] = model_choice
199
  model_selected = model_choice
200
 
@@ -238,6 +279,7 @@ def upload_video_sdk(filepath: str):
238
  if not HAS_GENAI or upload_file is None:
239
  raise RuntimeError("google.generativeai SDK not available; cannot upload")
240
  genai.configure(api_key=key)
 
241
  return upload_file(filepath)
242
 
243
  def wait_for_processed(file_obj, timeout: int = None):
@@ -298,10 +340,14 @@ def compress_video_if_large(local_path: str, threshold_mb: int = 200):
298
  if file_size_mb <= threshold_mb:
299
  return local_path, False
300
 
301
- compressed_path = str(Path(local_path).with_name(Path(local_path).stem + "_compressed.mp4"))
 
 
 
 
302
  try:
303
  result = compress_video(local_path, compressed_path, crf=28, preset="fast")
304
- if result and os.path.exists(result):
305
  return result, True
306
  return local_path, False
307
  except Exception as e:
@@ -339,10 +385,12 @@ def generate_via_responses_api(prompt_text: str, processed, model_used: str, max
339
  except Exception as e:
340
  last_exc = e
341
  msg = str(e).lower()
 
342
  if any(k in msg for k in ("internal", "unavailable", "deadlineexceeded", "deadline exceeded", "timeout", "rate limit")):
343
- pass
344
- else:
345
- raise
 
346
  if time.time() - start > timeout:
347
  raise TimeoutError(f"Responses.generate timed out after {timeout}s: last error: {last_exc}")
348
  time.sleep(backoff)
@@ -423,6 +471,14 @@ def safe_traceback(max_chars=2000):
423
  tb = traceback.format_exc()
424
  return tb if len(tb) <= max_chars else tb[:max_chars] + "\n...[truncated]"
425
 
 
 
 
 
 
 
 
 
426
  # ---- Layout ----
427
  col1, col2 = st.columns([1, 3])
428
  with col1:
@@ -443,6 +499,7 @@ if st.sidebar.button("Load Video", use_container_width=True):
443
  except Exception:
444
  st.session_state["file_hash"] = None
445
  except Exception as e:
 
446
  st.sidebar.error(f"Failed to load video: {e}")
447
 
448
  if st.session_state["videos"]:
@@ -488,7 +545,7 @@ if generate_now and not st.session_state.get("busy"):
488
  if HAS_GENAI and genai is not None:
489
  genai.configure(api_key=key_to_use)
490
  except Exception:
491
- pass
492
 
493
  model_id = (st.session_state.get("model_input") or model_selected or DEFAULT_MODEL).strip()
494
  if st.session_state.get("last_model") != model_id:
@@ -519,7 +576,8 @@ if generate_now and not st.session_state.get("busy"):
519
  try:
520
  uploaded = upload_video_sdk(upload_path)
521
  except Exception as e:
522
- st.session_state["last_error"] = f"Upload failed: {e}\n\nTraceback:\n{safe_traceback()}"
 
523
  st.error("Upload failed. See Last Error for details.")
524
  raise
525
 
@@ -535,7 +593,8 @@ if generate_now and not st.session_state.get("busy"):
535
  processing_bar.progress(pct)
536
  processing_placeholder.success("Processing complete")
537
  except Exception as e:
538
- st.session_state["last_error"] = f"Processing failed/wait timeout: {e}\n\nTraceback:\n{safe_traceback()}"
 
539
  st.error("Video processing failed or timed out. See Last Error.")
540
  raise
541
 
@@ -586,7 +645,7 @@ if generate_now and not st.session_state.get("busy"):
586
  out = generate_via_responses_api(prompt_text, processed, model_used, max_tokens=max_tokens, timeout=st.session_state.get("generation_timeout", 300))
587
  except Exception as e:
588
  tb = traceback.format_exc()
589
- st.session_state["last_error"] = f"Responses API error: {e}\n\nDebug: {debug_info}\n\nTraceback:\n{safe_traceback()}"
590
  st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
591
  out = ""
592
 
@@ -611,7 +670,8 @@ if generate_now and not st.session_state.get("busy"):
611
 
612
  except Exception as e:
613
  tb = traceback.format_exc()
614
- st.session_state["last_error"] = f"{e}\n\nDebug: {locals().get('debug_info', {})}\n\nTraceback:\n{safe_traceback()}"
 
615
  st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
616
  finally:
617
  st.session_state["busy"] = False
 
8
  from pathlib import Path
9
  from difflib import SequenceMatcher
10
  import json
11
+ import logging
12
 
13
  import yt_dlp
14
+ import ffmpeg # ffmpeg-python
15
  import streamlit as st
16
  from dotenv import load_dotenv
17
 
 
22
  from phi.agent import Agent
23
  from phi.model.google import Gemini
24
  from phi.tools.duckduckgo import DuckDuckGo
25
+
26
  HAS_PHI = True
27
  except Exception:
28
  Agent = Gemini = DuckDuckGo = None
 
32
  try:
33
  import google.generativeai as genai
34
  from google.generativeai import upload_file, get_file
35
+
36
  HAS_GENAI = True
37
  except Exception:
38
  genai = None
39
  upload_file = get_file = None
40
  HAS_GENAI = False
41
 
42
+ logging.basicConfig(level=logging.INFO)
43
+
44
  st.set_page_config(page_title="Generate the story of videos", layout="wide")
45
  DATA_DIR = Path("./data")
46
  DATA_DIR.mkdir(exist_ok=True)
 
97
  target_path = str(Path(video_path).with_suffix(".mp4"))
98
  if os.path.exists(target_path):
99
  return target_path
 
100
  try:
101
+ ffmpeg.input(video_path).output(target_path).run(overwrite_output=True, quiet=True)
102
+ except Exception as e:
103
+ logging.exception("ffmpeg conversion failed")
104
+ # If conversion fails, do not delete original; re-raise for caller to handle if needed
105
+ raise
106
+ # Only remove source if target exists and is non-empty
107
+ if os.path.exists(target_path) and os.path.getsize(target_path) > 0:
108
+ try:
109
+ if str(Path(video_path).resolve()) != str(Path(target_path).resolve()):
110
+ os.remove(video_path)
111
+ except Exception:
112
+ logging.exception("Failed to remove original video after conversion")
113
  return target_path
114
 
115
  def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
 
117
  ffmpeg.input(input_path).output(
118
  target_path, vcodec="libx264", crf=crf, preset=preset
119
  ).run(overwrite_output=True, quiet=True)
120
+ if os.path.exists(target_path) and os.path.getsize(target_path) > 0:
121
+ return target_path
122
+ logging.warning("Compression completed but target missing or empty; returning input path")
123
+ return input_path
124
  except Exception:
125
+ logging.exception("Video compression failed")
126
  return input_path
127
 
128
  def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
 
131
  outtmpl = str(Path(save_dir) / "%(id)s.%(ext)s")
132
  ydl_opts = {"outtmpl": outtmpl, "format": "best"}
133
  if video_password:
134
+ # yt-dlp accepts 'videopassword' in options for password-protected videos
135
  ydl_opts["videopassword"] = video_password
136
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
137
  info = ydl.extract_info(url, download=True)
138
+ # info may be a dict for single video or playlist; prefer single entry if present
139
+ video_candidates = []
140
+ if isinstance(info, dict):
141
+ # playlist -> entries list
142
+ entries = info.get("entries")
143
+ if entries:
144
+ # get last-downloaded entry (entries may be nested); map to filesystem files by ids
145
+ for e in entries:
146
+ if isinstance(e, dict) and e.get("id"):
147
+ video_candidates.append(str(Path(save_dir) / f"{e['id']}.mp4"))
148
+ else:
149
+ vid = info.get("id")
150
+ ext = info.get("ext") or "mp4"
151
+ if vid:
152
+ video_candidates.append(str(Path(save_dir) / f"{vid}.{ext}"))
153
+ # fallback: pick most recent file in dir
154
+ if not video_candidates:
155
  all_files = glob(os.path.join(save_dir, "*"))
156
+ if not all_files:
157
+ raise FileNotFoundError("Downloaded video not found")
158
+ matches = sorted(all_files, key=os.path.getmtime, reverse=True)
159
+ chosen = matches[0]
160
+ else:
161
+ # prefer existing files among candidates; pick first that exists, else fall back to newest
162
+ existing = [p for p in video_candidates if os.path.exists(p)]
163
+ chosen = existing[0] if existing else (sorted(glob(os.path.join(save_dir, "*")), key=os.path.getmtime, reverse=True)[0])
164
+ # Ensure mp4 target
165
+ final = convert_video_to_mp4(chosen)
166
+ return final
167
 
168
  def file_name_or_id(file_obj):
169
  if file_obj is None:
170
  return None
171
  if isinstance(file_obj, dict):
172
  return file_obj.get("name") or file_obj.get("id")
173
+ # common SDK wrappers may expose 'name', 'id', 'fileId'
174
  return getattr(file_obj, "name", None) or getattr(file_obj, "id", None) or getattr(file_obj, "fileId", None)
175
 
176
  def get_effective_api_key():
 
183
  try:
184
  genai.configure(api_key=key)
185
  except Exception:
186
+ logging.exception("genai.configure failed")
187
  return True
188
 
189
  # ---- Agent management (reuse) ----
 
201
  _agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
202
  st.session_state["last_model"] = model_id
203
  except Exception:
204
+ logging.exception("Failed to create PHI Agent")
205
  _agent = None
206
  return _agent
207
 
 
217
  try:
218
  os.remove(f)
219
  except Exception:
220
+ logging.exception("Failed to remove data file during clear_all_video_state")
221
 
222
  # Reset when URL changes
223
  current_url = st.session_state.get("url", "")
 
235
  model_input = settings_exp.text_input("Custom model id", value=DEFAULT_MODEL, key="model_input")
236
  model_selected = model_input.strip() or DEFAULT_MODEL
237
  else:
238
+ # keep model_input in session_state for later reads
239
  st.session_state["model_input"] = model_choice
240
  model_selected = model_choice
241
 
 
279
  if not HAS_GENAI or upload_file is None:
280
  raise RuntimeError("google.generativeai SDK not available; cannot upload")
281
  genai.configure(api_key=key)
282
+ # upload_file may return object with id or name, keep as-is
283
  return upload_file(filepath)
284
 
285
  def wait_for_processed(file_obj, timeout: int = None):
 
340
  if file_size_mb <= threshold_mb:
341
  return local_path, False
342
 
343
+ # build compressed path reliably
344
+ p = Path(local_path)
345
+ compressed_name = f"{p.stem}_compressed.mp4"
346
+ compressed_path = str(p.with_name(compressed_name))
347
+
348
  try:
349
  result = compress_video(local_path, compressed_path, crf=28, preset="fast")
350
+ if result and os.path.exists(result) and os.path.getsize(result) > 0:
351
  return result, True
352
  return local_path, False
353
  except Exception as e:
 
385
  except Exception as e:
386
  last_exc = e
387
  msg = str(e).lower()
388
+ # retry for transient/server errors
389
  if any(k in msg for k in ("internal", "unavailable", "deadlineexceeded", "deadline exceeded", "timeout", "rate limit")):
390
+ logging.warning("Transient error from Responses API, will retry: %s", e)
391
+ continue
392
+ logging.exception("Non-retryable Responses API error")
393
+ raise
394
  if time.time() - start > timeout:
395
  raise TimeoutError(f"Responses.generate timed out after {timeout}s: last error: {last_exc}")
396
  time.sleep(backoff)
 
471
  tb = traceback.format_exc()
472
  return tb if len(tb) <= max_chars else tb[:max_chars] + "\n...[truncated]"
473
 
474
+ def scrub_api_keys(s: str) -> str:
475
+ if not s:
476
+ return s
477
+ key = get_effective_api_key()
478
+ if key and key in s:
479
+ return s.replace(key, "[REDACTED_API_KEY]")
480
+ return s
481
+
482
  # ---- Layout ----
483
  col1, col2 = st.columns([1, 3])
484
  with col1:
 
499
  except Exception:
500
  st.session_state["file_hash"] = None
501
  except Exception as e:
502
+ logging.exception("Failed to load video")
503
  st.sidebar.error(f"Failed to load video: {e}")
504
 
505
  if st.session_state["videos"]:
 
545
  if HAS_GENAI and genai is not None:
546
  genai.configure(api_key=key_to_use)
547
  except Exception:
548
+ logging.exception("genai.configure failed at start")
549
 
550
  model_id = (st.session_state.get("model_input") or model_selected or DEFAULT_MODEL).strip()
551
  if st.session_state.get("last_model") != model_id:
 
576
  try:
577
  uploaded = upload_video_sdk(upload_path)
578
  except Exception as e:
579
+ err = scrub_api_keys(f"Upload failed: {e}\n\nTraceback:\n{safe_traceback()}")
580
+ st.session_state["last_error"] = err
581
  st.error("Upload failed. See Last Error for details.")
582
  raise
583
 
 
593
  processing_bar.progress(pct)
594
  processing_placeholder.success("Processing complete")
595
  except Exception as e:
596
+ err = scrub_api_keys(f"Processing failed/wait timeout: {e}\n\nTraceback:\n{safe_traceback()}")
597
+ st.session_state["last_error"] = err
598
  st.error("Video processing failed or timed out. See Last Error.")
599
  raise
600
 
 
645
  out = generate_via_responses_api(prompt_text, processed, model_used, max_tokens=max_tokens, timeout=st.session_state.get("generation_timeout", 300))
646
  except Exception as e:
647
  tb = traceback.format_exc()
648
+ st.session_state["last_error"] = scrub_api_keys(f"Responses API error: {e}\n\nDebug: {debug_info}\n\nTraceback:\n{safe_traceback()}")
649
  st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
650
  out = ""
651
 
 
670
 
671
  except Exception as e:
672
  tb = traceback.format_exc()
673
+ err = scrub_api_keys(f"{e}\n\nDebug: {locals().get('debug_info', {})}\n\nTraceback:\n{safe_traceback()}")
674
+ st.session_state["last_error"] = err
675
  st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
676
  finally:
677
  st.session_state["busy"] = False