CB commited on
Commit
7a6ab5d
·
verified ·
1 Parent(s): 0802fcb

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +55 -78
streamlit_app.py CHANGED
@@ -1,24 +1,16 @@
1
  # streamlit_app.py
2
  """
3
  Streamlit app for video captioning / analysis using Google GenAI Responses API.
4
- Removed phi-agent support. Uses google.generativeai SDK (Responses).
5
- Requires GOOGLE_API_KEY in environment or entered in UI.
6
 
7
  Features:
8
  - Download video via yt-dlp
9
- - Optional compression for files > 200 MB (configurable)
10
  - Upload video via google.generativeai.upload_file and wait for processing via get_file
11
- - Generate analysis via Responses.generate (or Responses.create legacy compatibility)
12
- - Basic UI for model selection, prompts, timeouts, and status/progress reporting
13
- """
14
-
15
- import logging
16
- import google.generativeai as genai
17
- logger = logging.getLogger("video_ai")
18
- logger.info("genai attrs: %s", [k for k in dir(genai) if not k.startswith('_')])
19
- logger.info("has genai.responses: %s, has genai.Responses: %s, has NewClient: %s",
20
- hasattr(genai, "responses"), hasattr(genai, "Responses"), hasattr(genai, "NewClient"))
21
 
 
 
22
  import os
23
  import time
24
  import string
@@ -33,13 +25,15 @@ import yt_dlp
33
  import ffmpeg
34
  import streamlit as st
35
  from dotenv import load_dotenv
 
36
 
37
- # Google GenAI SDK
38
  try:
39
- import google.generativeai as genai
 
40
  genai_responses = getattr(genai, "responses", None) or getattr(genai, "Responses", None)
41
- upload_file = getattr(genai, "upload_file", None)
42
- get_file = getattr(genai, "get_file", None)
43
  HAS_GENAI = True
44
  except Exception:
45
  genai = None
@@ -54,6 +48,13 @@ load_dotenv()
54
  logging.basicConfig(level=logging.INFO)
55
  logger = logging.getLogger("video_ai")
56
 
 
 
 
 
 
 
 
57
  # App config
58
  st.set_page_config(page_title="Generate the story of videos", layout="wide")
59
  DATA_DIR = Path("./data")
@@ -76,7 +77,7 @@ st.session_state.setdefault("last_url_value", "")
76
  st.session_state.setdefault("processing_timeout", 900)
77
  st.session_state.setdefault("generation_timeout", 300)
78
  st.session_state.setdefault("preferred_model", "gemini-2.5-flash-lite")
79
- st.session_state.setdefault("compression_threshold_mb", 200) # new threshold per plan
80
 
81
  MODEL_OPTIONS = [
82
  "gemini-2.5-flash",
@@ -110,19 +111,12 @@ def convert_video_to_mp4(video_path: str) -> str:
110
  return target_path
111
 
112
  def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast", bitrate: str = None):
113
- """
114
- Compress video using ffmpeg; tune via crf or bitrate.
115
- Returns target_path on success, else original input_path.
116
- """
117
  try:
118
  out = ffmpeg.input(input_path)
119
- params = {"vcodec": "libx264", "crf": crf, "preset": preset}
120
  if bitrate:
121
- params["video_bitrate"] = bitrate
122
- # ffmpeg-python uses keyword 'b' for bitrate if passed via output string; using bitrate via args below
123
- stream = out.output(target_path, **{"vcodec": "libx264", "preset": preset}, video_bitrate=bitrate)
124
  else:
125
- stream = out.output(target_path, **params)
126
  stream.run(overwrite_output=True, quiet=True)
127
  if os.path.exists(target_path):
128
  return target_path
@@ -171,25 +165,19 @@ def configure_genai_if_needed():
171
  logger.exception("Failed to configure genai")
172
  return True
173
 
174
- # Upload & processing helpers (using google.generativeai SDK functions upload_file/get_file)
175
- def upload_video_sdk(filepath: str, progress_callback=None):
176
- """
177
- Upload a local file using google.generativeai.upload_file.
178
- Assumes genai.configure(api_key=...) was called.
179
- """
180
  key = get_effective_api_key()
181
  if not key:
182
  raise RuntimeError("No API key provided")
183
  if not HAS_GENAI or upload_file is None:
184
  raise RuntimeError("google.generativeai SDK not available; cannot upload")
185
- # SDK upload_file typically takes path and returns file object
186
  try:
187
  if genai is not None and hasattr(genai, "configure"):
188
  genai.configure(api_key=key)
189
  except Exception:
190
  pass
191
 
192
- # call upload_file and return its result
193
  try:
194
  return upload_file(filepath)
195
  except Exception as e:
@@ -197,9 +185,6 @@ def upload_video_sdk(filepath: str, progress_callback=None):
197
  raise
198
 
199
  def wait_for_processed(file_obj, timeout: int = None, progress_callback=None):
200
- """
201
- Poll get_file(name_or_id) until processing state changes away from 'PROCESSING' or timeout.
202
- """
203
  if timeout is None:
204
  timeout = st.session_state.get("processing_timeout", 900)
205
  if not HAS_GENAI or get_file is None:
@@ -302,7 +287,7 @@ def _normalize_genai_response(response):
302
  seen.add(t)
303
  return "\n\n".join(filtered).strip()
304
 
305
- # Generation via Responses API (supports modern and legacy patterns)
306
  def generate_via_responses_api(prompt_text: str, processed, model_used: str, max_tokens: int = 1024, timeout: int = 300, progress_callback=None):
307
  key = get_effective_api_key()
308
  if not key:
@@ -317,14 +302,14 @@ def generate_via_responses_api(prompt_text: str, processed, model_used: str, max
317
 
318
  system_msg = {"role": "system", "content": prompt_text}
319
  user_msg = {"role": "user", "content": "Please summarize the attached video."}
320
- call_variants = []
321
 
322
- # preferred modern call
323
- call_variants.append({"method": "responses.generate", "payload": {"model": model_used, "messages": [system_msg, user_msg], "files": [{"name": fname}], "max_output_tokens": max_tokens}})
324
- # alternate modern payload shape
325
- call_variants.append({"method": "responses.generate_alt", "payload": {"model": model_used, "input": [{"text": prompt_text, "files": [{"name": fname}]}], "max_output_tokens": max_tokens}})
326
- # legacy
327
- call_variants.append({"method": "legacy_responses_create", "payload": {"model": model_used, "input": prompt_text, "file": fname, "max_output_tokens": max_tokens}})
 
328
 
329
  def is_transient_error(e_text: str):
330
  txt = str(e_text).lower()
@@ -335,25 +320,29 @@ def generate_via_responses_api(prompt_text: str, processed, model_used: str, max
335
  backoff = 1.0
336
  attempts = 0
337
  while True:
338
- for attempt_payload in call_variants:
339
  attempts += 1
340
- method = attempt_payload["method"]
341
- payload = attempt_payload["payload"]
342
  try:
343
  if progress_callback:
344
- progress_callback("starting", int(time.time() - start), {"model": model_used, "attempt": attempts, "method": method})
 
 
345
  if genai_responses is not None and hasattr(genai_responses, "generate"):
346
- response = genai_responses.generate(**payload)
347
- text = _normalize_genai_response(response)
348
  if progress_callback:
349
- progress_callback("done", int(time.time() - start), {"model": model_used, "attempt": attempts, "method": method})
350
  return text
 
 
351
  if hasattr(genai, "Responses") and hasattr(genai.Responses, "create"):
352
- response = genai.Responses.create(**payload) # type: ignore
353
- text = _normalize_genai_response(response)
354
  if progress_callback:
355
- progress_callback("done", int(time.time() - start), {"model": model_used, "attempt": attempts, "method": method})
356
  return text
 
 
357
  if hasattr(genai, "GenerativeModel"):
358
  try:
359
  model_obj = genai.GenerativeModel(model_name=model_used)
@@ -366,18 +355,19 @@ def generate_via_responses_api(prompt_text: str, processed, model_used: str, max
366
  progress_callback("done", int(time.time() - start), {"model": model_used, "attempt": attempts, "method": "GenerativeModel.chat"})
367
  return text
368
  except Exception:
369
- pass
 
 
370
  raise RuntimeError("No supported response generation method available in installed google-generativeai package.")
371
  except Exception as e:
372
  last_exc = e
373
  msg = str(e)
374
- logger.warning("Responses.generate error (model=%s attempt=%s method=%s): %s", model_used, attempts, method, msg)
375
  if not is_transient_error(msg):
376
- if "No supported response generation method" in msg or "has no attribute" in msg or "module 'google.generativeai' has no attribute" in msg:
377
  raise RuntimeError(
378
  "Installed google-generativeai package does not expose a compatible Responses API. "
379
- "Please upgrade to a recent release or install the Google GenAI SDK. "
380
- "Run: pip install --upgrade google-generativeai"
381
  ) from e
382
  raise
383
  if time.time() - start > timeout:
@@ -386,7 +376,6 @@ def generate_via_responses_api(prompt_text: str, processed, model_used: str, max
386
  backoff = min(backoff * 2, 8.0)
387
 
388
  # Prompt echo removal
389
- from difflib import SequenceMatcher
390
  def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_threshold: float = 0.68):
391
  if not prompt or not text:
392
  return text
@@ -406,10 +395,12 @@ def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_thres
406
  return b_full[len(ph):].lstrip(" \n:-")
407
  return text
408
 
 
409
  # UI
 
410
  current_url = st.session_state.get("url", "")
411
  if current_url != st.session_state.get("last_url_value"):
412
- # clear per-plan
413
  st.session_state["videos"] = ""
414
  st.session_state["last_loaded_path"] = ""
415
  st.session_state["uploaded_file"] = None
@@ -453,7 +444,6 @@ settings_exp.number_input(
453
  key="generation_timeout",
454
  )
455
 
456
- # Compression threshold control (per plan: 200 MB)
457
  settings_exp.number_input(
458
  "Compression threshold (MB)", min_value=10, max_value=2000,
459
  value=st.session_state.get("compression_threshold_mb", 200), step=10,
@@ -466,14 +456,6 @@ settings_exp.caption(f"Using API key from: **{key_source}**")
466
  if not get_effective_api_key():
467
  settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
468
 
469
- # Safety settings placeholder (kept minimal)
470
- safety_settings = [
471
- {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "OFF"},
472
- {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "OFF"},
473
- {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "OFF"},
474
- {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
475
- ]
476
-
477
  # Buttons / UI layout
478
  col1, col2 = st.columns([1, 3])
479
  with col1:
@@ -507,7 +489,6 @@ if st.session_state["videos"]:
507
  st.session_state["loop_video"] = loop_checkbox
508
 
509
  if st.button("Clear Video(s)"):
510
- # minimal clear
511
  st.session_state["videos"] = ""
512
  st.session_state["last_loaded_path"] = ""
513
  st.session_state["uploaded_file"] = None
@@ -550,7 +531,7 @@ if generate_now and not st.session_state.get("busy"):
550
  try:
551
  st.session_state["busy"] = True
552
  try:
553
- if HAS_GENAI and genai is not None:
554
  genai.configure(api_key=key_to_use)
555
  except Exception:
556
  logger.exception("genai configure failed")
@@ -558,7 +539,6 @@ if generate_now and not st.session_state.get("busy"):
558
  model_id = model_input_value or st.session_state.get("preferred_model") or "gemini-2.5-flash-lite"
559
  if st.session_state.get("last_model") != model_id:
560
  st.session_state["last_model"] = ""
561
- # no phi agent creation per plan
562
 
563
  processed = st.session_state.get("processed_file")
564
  current_path = st.session_state.get("videos")
@@ -576,7 +556,6 @@ if generate_now and not st.session_state.get("busy"):
576
  raise RuntimeError("google.generativeai SDK not available; install it.")
577
  local_path = current_path
578
 
579
- # Decide whether to compress based on threshold (per plan ≤ threshold upload unchanged)
580
  try:
581
  file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
582
  except Exception:
@@ -586,7 +565,6 @@ if generate_now and not st.session_state.get("busy"):
586
  upload_path = local_path
587
  threshold_mb = st.session_state.get("compression_threshold_mb", 200)
588
  if file_size_mb is not None and file_size_mb > threshold_mb:
589
- # compress with conservative settings; allow user to tune via constants if desired
590
  compressed_path = str(Path(local_path).with_name(Path(local_path).stem + "_compressed.mp4"))
591
  with st.spinner("Compressing video before upload..."):
592
  upload_path = compress_video(local_path, compressed_path, crf=28, preset="fast")
@@ -629,7 +607,6 @@ if generate_now and not st.session_state.get("busy"):
629
  max_tokens = 2048 if "2.5" in model_used else 1024
630
  est_tokens = max_tokens
631
 
632
- # Generate via Responses API
633
  try:
634
  gen_progress_placeholder = st.empty()
635
  gen_status = gen_progress_placeholder.text("Starting generation...")
 
1
  # streamlit_app.py
2
  """
3
  Streamlit app for video captioning / analysis using Google GenAI Responses API.
 
 
4
 
5
  Features:
6
  - Download video via yt-dlp
7
+ - Optional compression for files > compression_threshold_mb
8
  - Upload video via google.generativeai.upload_file and wait for processing via get_file
9
+ - Generate analysis via Responses API supporting multiple SDK versions and fallbacks
10
+ - Minimal UI for model selection, prompts, timeouts, and status/progress reporting
 
 
 
 
 
 
 
 
11
 
12
+ Requirements: see requirements.txt provided by user.
13
+ """
14
  import os
15
  import time
16
  import string
 
25
  import ffmpeg
26
  import streamlit as st
27
  from dotenv import load_dotenv
28
+ from difflib import SequenceMatcher
29
 
30
+ # Google GenAI SDK detection (support multiple SDK shapes)
31
  try:
32
+ import google.generativeai as genai # type: ignore
33
+ # Newer SDKs expose genai.responses; older/other expose different members
34
  genai_responses = getattr(genai, "responses", None) or getattr(genai, "Responses", None)
35
+ upload_file = getattr(genai, "upload_file", None) or getattr(genai, "upload_file", None)
36
+ get_file = getattr(genai, "get_file", None) or getattr(genai, "get_file", None)
37
  HAS_GENAI = True
38
  except Exception:
39
  genai = None
 
48
  logging.basicConfig(level=logging.INFO)
49
  logger = logging.getLogger("video_ai")
50
 
51
+ logger.info("genai attrs: %s", sorted(dir(genai)) if genai is not None else "None")
52
+ logger.info("has genai.responses: %s, has genai.Responses: %s, has upload_file: %s, has get_file: %s",
53
+ bool(getattr(genai, "responses", None)),
54
+ bool(getattr(genai, "Responses", None)),
55
+ bool(upload_file),
56
+ bool(get_file))
57
+
58
  # App config
59
  st.set_page_config(page_title="Generate the story of videos", layout="wide")
60
  DATA_DIR = Path("./data")
 
77
  st.session_state.setdefault("processing_timeout", 900)
78
  st.session_state.setdefault("generation_timeout", 300)
79
  st.session_state.setdefault("preferred_model", "gemini-2.5-flash-lite")
80
+ st.session_state.setdefault("compression_threshold_mb", 200)
81
 
82
  MODEL_OPTIONS = [
83
  "gemini-2.5-flash",
 
111
  return target_path
112
 
113
  def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast", bitrate: str = None):
 
 
 
 
114
  try:
115
  out = ffmpeg.input(input_path)
 
116
  if bitrate:
117
+ stream = out.output(target_path, vcodec="libx264", preset=preset, video_bitrate=bitrate)
 
 
118
  else:
119
+ stream = out.output(target_path, vcodec="libx264", crf=crf, preset=preset)
120
  stream.run(overwrite_output=True, quiet=True)
121
  if os.path.exists(target_path):
122
  return target_path
 
165
  logger.exception("Failed to configure genai")
166
  return True
167
 
168
+ # Upload & processing helpers
169
+ def upload_video_sdk(filepath: str):
 
 
 
 
170
  key = get_effective_api_key()
171
  if not key:
172
  raise RuntimeError("No API key provided")
173
  if not HAS_GENAI or upload_file is None:
174
  raise RuntimeError("google.generativeai SDK not available; cannot upload")
 
175
  try:
176
  if genai is not None and hasattr(genai, "configure"):
177
  genai.configure(api_key=key)
178
  except Exception:
179
  pass
180
 
 
181
  try:
182
  return upload_file(filepath)
183
  except Exception as e:
 
185
  raise
186
 
187
  def wait_for_processed(file_obj, timeout: int = None, progress_callback=None):
 
 
 
188
  if timeout is None:
189
  timeout = st.session_state.get("processing_timeout", 900)
190
  if not HAS_GENAI or get_file is None:
 
287
  seen.add(t)
288
  return "\n\n".join(filtered).strip()
289
 
290
+ # Generation via Responses API (modern + legacy fallbacks)
291
  def generate_via_responses_api(prompt_text: str, processed, model_used: str, max_tokens: int = 1024, timeout: int = 300, progress_callback=None):
292
  key = get_effective_api_key()
293
  if not key:
 
302
 
303
  system_msg = {"role": "system", "content": prompt_text}
304
  user_msg = {"role": "user", "content": "Please summarize the attached video."}
 
305
 
306
+ call_variants = []
307
+ # modern responses.generate
308
+ call_variants.append(("generate", {"model": model_used, "messages": [system_msg, user_msg], "files": [{"name": fname}], "max_output_tokens": max_tokens}))
309
+ # alternate modern shape
310
+ call_variants.append(("generate_alt", {"model": model_used, "input": [{"text": prompt_text, "files": [{"name": fname}]}], "max_output_tokens": max_tokens}))
311
+ # legacy create
312
+ call_variants.append(("legacy_create", {"model": model_used, "input": prompt_text, "file": fname, "max_output_tokens": max_tokens}))
313
 
314
  def is_transient_error(e_text: str):
315
  txt = str(e_text).lower()
 
320
  backoff = 1.0
321
  attempts = 0
322
  while True:
323
+ for method_name, payload in call_variants:
324
  attempts += 1
 
 
325
  try:
326
  if progress_callback:
327
+ progress_callback("starting", int(time.time() - start), {"model": model_used, "attempt": attempts, "method": method_name})
328
+
329
+ # Preferred modern path: genai.responses.generate (or genai_responses.generate)
330
  if genai_responses is not None and hasattr(genai_responses, "generate"):
331
+ resp = genai_responses.generate(**payload)
332
+ text = _normalize_genai_response(resp)
333
  if progress_callback:
334
+ progress_callback("done", int(time.time() - start), {"model": model_used, "attempt": attempts, "method": method_name})
335
  return text
336
+
337
+ # Older path: genai.Responses.create
338
  if hasattr(genai, "Responses") and hasattr(genai.Responses, "create"):
339
+ resp = genai.Responses.create(**payload) # type: ignore
340
+ text = _normalize_genai_response(resp)
341
  if progress_callback:
342
+ progress_callback("done", int(time.time() - start), {"model": model_used, "attempt": attempts, "method": method_name})
343
  return text
344
+
345
+ # Fallback: GenerativeModel chat
346
  if hasattr(genai, "GenerativeModel"):
347
  try:
348
  model_obj = genai.GenerativeModel(model_name=model_used)
 
355
  progress_callback("done", int(time.time() - start), {"model": model_used, "attempt": attempts, "method": "GenerativeModel.chat"})
356
  return text
357
  except Exception:
358
+ # ignore and try next variant/fallback
359
+ logger.exception("GenerativeModel.chat fallback failed")
360
+
361
  raise RuntimeError("No supported response generation method available in installed google-generativeai package.")
362
  except Exception as e:
363
  last_exc = e
364
  msg = str(e)
365
+ logger.warning("Responses.generate error (model=%s attempt=%s method=%s): %s", model_used, attempts, method_name, msg)
366
  if not is_transient_error(msg):
367
+ if "No supported response generation method" in msg or "has no attribute" in msg:
368
  raise RuntimeError(
369
  "Installed google-generativeai package does not expose a compatible Responses API. "
370
+ "Please upgrade to a recent release: pip install --upgrade google-generativeai"
 
371
  ) from e
372
  raise
373
  if time.time() - start > timeout:
 
376
  backoff = min(backoff * 2, 8.0)
377
 
378
  # Prompt echo removal
 
379
  def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_threshold: float = 0.68):
380
  if not prompt or not text:
381
  return text
 
395
  return b_full[len(ph):].lstrip(" \n:-")
396
  return text
397
 
398
+ # -----------------------
399
  # UI
400
+ # -----------------------
401
  current_url = st.session_state.get("url", "")
402
  if current_url != st.session_state.get("last_url_value"):
403
+ # clear per new URL
404
  st.session_state["videos"] = ""
405
  st.session_state["last_loaded_path"] = ""
406
  st.session_state["uploaded_file"] = None
 
444
  key="generation_timeout",
445
  )
446
 
 
447
  settings_exp.number_input(
448
  "Compression threshold (MB)", min_value=10, max_value=2000,
449
  value=st.session_state.get("compression_threshold_mb", 200), step=10,
 
456
  if not get_effective_api_key():
457
  settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
458
 
 
 
 
 
 
 
 
 
459
  # Buttons / UI layout
460
  col1, col2 = st.columns([1, 3])
461
  with col1:
 
489
  st.session_state["loop_video"] = loop_checkbox
490
 
491
  if st.button("Clear Video(s)"):
 
492
  st.session_state["videos"] = ""
493
  st.session_state["last_loaded_path"] = ""
494
  st.session_state["uploaded_file"] = None
 
531
  try:
532
  st.session_state["busy"] = True
533
  try:
534
+ if HAS_GENAI and genai is not None and hasattr(genai, "configure"):
535
  genai.configure(api_key=key_to_use)
536
  except Exception:
537
  logger.exception("genai configure failed")
 
539
  model_id = model_input_value or st.session_state.get("preferred_model") or "gemini-2.5-flash-lite"
540
  if st.session_state.get("last_model") != model_id:
541
  st.session_state["last_model"] = ""
 
542
 
543
  processed = st.session_state.get("processed_file")
544
  current_path = st.session_state.get("videos")
 
556
  raise RuntimeError("google.generativeai SDK not available; install it.")
557
  local_path = current_path
558
 
 
559
  try:
560
  file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
561
  except Exception:
 
565
  upload_path = local_path
566
  threshold_mb = st.session_state.get("compression_threshold_mb", 200)
567
  if file_size_mb is not None and file_size_mb > threshold_mb:
 
568
  compressed_path = str(Path(local_path).with_name(Path(local_path).stem + "_compressed.mp4"))
569
  with st.spinner("Compressing video before upload..."):
570
  upload_path = compress_video(local_path, compressed_path, crf=28, preset="fast")
 
607
  max_tokens = 2048 if "2.5" in model_used else 1024
608
  est_tokens = max_tokens
609
 
 
610
  try:
611
  gen_progress_placeholder = st.empty()
612
  gen_status = gen_progress_placeholder.text("Starting generation...")