CB commited on
Commit
9101836
·
verified ·
1 Parent(s): 7a6ab5d

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +35 -58
streamlit_app.py CHANGED
@@ -1,16 +1,4 @@
1
  # streamlit_app.py
2
- """
3
- Streamlit app for video captioning / analysis using Google GenAI Responses API.
4
-
5
- Features:
6
- - Download video via yt-dlp
7
- - Optional compression for files > compression_threshold_mb
8
- - Upload video via google.generativeai.upload_file and wait for processing via get_file
9
- - Generate analysis via Responses API supporting multiple SDK versions and fallbacks
10
- - Minimal UI for model selection, prompts, timeouts, and status/progress reporting
11
-
12
- Requirements: see requirements.txt provided by user.
13
- """
14
  import os
15
  import time
16
  import string
@@ -27,13 +15,12 @@ import streamlit as st
27
  from dotenv import load_dotenv
28
  from difflib import SequenceMatcher
29
 
30
- # Google GenAI SDK detection (support multiple SDK shapes)
31
  try:
32
  import google.generativeai as genai # type: ignore
33
- # Newer SDKs expose genai.responses; older/other expose different members
34
  genai_responses = getattr(genai, "responses", None) or getattr(genai, "Responses", None)
35
- upload_file = getattr(genai, "upload_file", None) or getattr(genai, "upload_file", None)
36
- get_file = getattr(genai, "get_file", None) or getattr(genai, "get_file", None)
37
  HAS_GENAI = True
38
  except Exception:
39
  genai = None
@@ -44,16 +31,10 @@ except Exception:
44
 
45
  load_dotenv()
46
 
47
- # Logging
48
  logging.basicConfig(level=logging.INFO)
49
  logger = logging.getLogger("video_ai")
50
-
51
- logger.info("genai attrs: %s", sorted(dir(genai)) if genai is not None else "None")
52
- logger.info("has genai.responses: %s, has genai.Responses: %s, has upload_file: %s, has get_file: %s",
53
- bool(getattr(genai, "responses", None)),
54
- bool(getattr(genai, "Responses", None)),
55
- bool(upload_file),
56
- bool(get_file))
57
 
58
  # App config
59
  st.set_page_config(page_title="Generate the story of videos", layout="wide")
@@ -72,11 +53,10 @@ st.session_state.setdefault("last_error", "")
72
  st.session_state.setdefault("file_hash", None)
73
  st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
74
  st.session_state.setdefault("last_model", "")
75
- st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
76
  st.session_state.setdefault("last_url_value", "")
77
  st.session_state.setdefault("processing_timeout", 900)
78
  st.session_state.setdefault("generation_timeout", 300)
79
- st.session_state.setdefault("preferred_model", "gemini-2.5-flash-lite")
80
  st.session_state.setdefault("compression_threshold_mb", 200)
81
 
82
  MODEL_OPTIONS = [
@@ -118,9 +98,7 @@ def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str
118
  else:
119
  stream = out.output(target_path, vcodec="libx264", crf=crf, preset=preset)
120
  stream.run(overwrite_output=True, quiet=True)
121
- if os.path.exists(target_path):
122
- return target_path
123
- return input_path
124
  except Exception:
125
  logger.exception("Compression failed")
126
  return input_path
@@ -165,7 +143,7 @@ def configure_genai_if_needed():
165
  logger.exception("Failed to configure genai")
166
  return True
167
 
168
- # Upload & processing helpers
169
  def upload_video_sdk(filepath: str):
170
  key = get_effective_api_key()
171
  if not key:
@@ -177,10 +155,9 @@ def upload_video_sdk(filepath: str):
177
  genai.configure(api_key=key)
178
  except Exception:
179
  pass
180
-
181
  try:
182
  return upload_file(filepath)
183
- except Exception as e:
184
  logger.exception("Upload failed")
185
  raise
186
 
@@ -222,7 +199,7 @@ def wait_for_processed(file_obj, timeout: int = None, progress_callback=None):
222
  time.sleep(backoff)
223
  backoff = min(backoff * 2, 8.0)
224
 
225
- # Response normalization
226
  def _normalize_genai_response(response):
227
  if response is None:
228
  return ""
@@ -287,7 +264,7 @@ def _normalize_genai_response(response):
287
  seen.add(t)
288
  return "\n\n".join(filtered).strip()
289
 
290
- # Generation via Responses API (modern + legacy fallbacks)
291
  def generate_via_responses_api(prompt_text: str, processed, model_used: str, max_tokens: int = 1024, timeout: int = 300, progress_callback=None):
292
  key = get_effective_api_key()
293
  if not key:
@@ -303,13 +280,11 @@ def generate_via_responses_api(prompt_text: str, processed, model_used: str, max
303
  system_msg = {"role": "system", "content": prompt_text}
304
  user_msg = {"role": "user", "content": "Please summarize the attached video."}
305
 
306
- call_variants = []
307
- # modern responses.generate
308
- call_variants.append(("generate", {"model": model_used, "messages": [system_msg, user_msg], "files": [{"name": fname}], "max_output_tokens": max_tokens}))
309
- # alternate modern shape
310
- call_variants.append(("generate_alt", {"model": model_used, "input": [{"text": prompt_text, "files": [{"name": fname}]}], "max_output_tokens": max_tokens}))
311
- # legacy create
312
- call_variants.append(("legacy_create", {"model": model_used, "input": prompt_text, "file": fname, "max_output_tokens": max_tokens}))
313
 
314
  def is_transient_error(e_text: str):
315
  txt = str(e_text).lower()
@@ -326,12 +301,12 @@ def generate_via_responses_api(prompt_text: str, processed, model_used: str, max
326
  if progress_callback:
327
  progress_callback("starting", int(time.time() - start), {"model": model_used, "attempt": attempts, "method": method_name})
328
 
329
- # Preferred modern path: genai.responses.generate (or genai_responses.generate)
330
  if genai_responses is not None and hasattr(genai_responses, "generate"):
331
  resp = genai_responses.generate(**payload)
332
  text = _normalize_genai_response(resp)
333
  if progress_callback:
334
- progress_callback("done", int(time.time() - start), {"model": model_used, "attempt": attempts, "method": method_name})
335
  return text
336
 
337
  # Older path: genai.Responses.create
@@ -339,30 +314,36 @@ def generate_via_responses_api(prompt_text: str, processed, model_used: str, max
339
  resp = genai.Responses.create(**payload) # type: ignore
340
  text = _normalize_genai_response(resp)
341
  if progress_callback:
342
- progress_callback("done", int(time.time() - start), {"model": model_used, "attempt": attempts, "method": method_name})
343
  return text
344
 
345
- # Fallback: GenerativeModel chat
346
  if hasattr(genai, "GenerativeModel"):
347
  try:
348
  model_obj = genai.GenerativeModel(model_name=model_used)
349
  if hasattr(model_obj, "start_chat"):
350
  chat = model_obj.start_chat()
351
- resp = chat.send_message(prompt_text, timeout=timeout)
 
 
 
 
 
 
 
352
  text = getattr(resp, "text", None) or str(resp)
353
  text = text if text else _normalize_genai_response(resp)
354
  if progress_callback:
355
- progress_callback("done", int(time.time() - start), {"model": model_used, "attempt": attempts, "method": "GenerativeModel.chat"})
356
  return text
357
  except Exception:
358
- # ignore and try next variant/fallback
359
  logger.exception("GenerativeModel.chat fallback failed")
360
 
361
  raise RuntimeError("No supported response generation method available in installed google-generativeai package.")
362
  except Exception as e:
363
  last_exc = e
364
  msg = str(e)
365
- logger.warning("Responses.generate error (model=%s attempt=%s method=%s): %s", model_used, attempts, method_name, msg)
366
  if not is_transient_error(msg):
367
  if "No supported response generation method" in msg or "has no attribute" in msg:
368
  raise RuntimeError(
@@ -375,7 +356,7 @@ def generate_via_responses_api(prompt_text: str, processed, model_used: str, max
375
  time.sleep(backoff)
376
  backoff = min(backoff * 2, 8.0)
377
 
378
- # Prompt echo removal
379
  def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_threshold: float = 0.68):
380
  if not prompt or not text:
381
  return text
@@ -395,12 +376,9 @@ def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_thres
395
  return b_full[len(ph):].lstrip(" \n:-")
396
  return text
397
 
398
- # -----------------------
399
- # UI
400
- # -----------------------
401
  current_url = st.session_state.get("url", "")
402
  if current_url != st.session_state.get("last_url_value"):
403
- # clear per new URL
404
  st.session_state["videos"] = ""
405
  st.session_state["last_loaded_path"] = ""
406
  st.session_state["uploaded_file"] = None
@@ -419,10 +397,10 @@ st.sidebar.header("Video Input")
419
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
420
 
421
  settings_exp = st.sidebar.expander("Settings", expanded=False)
422
- chosen = settings_exp.selectbox("Gemini model", MODEL_OPTIONS, index=MODEL_OPTIONS.index(st.session_state.get("preferred_model", "gemini-2.5-flash-lite")))
423
  custom_model = ""
424
  if chosen == "custom":
425
- custom_model = settings_exp.text_input("Custom model name", value=st.session_state.get("preferred_model", "gemini-2.5-flash-lite"))
426
  model_input_value = (custom_model.strip() if chosen == "custom" else chosen).strip()
427
 
428
  settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
@@ -456,7 +434,6 @@ settings_exp.caption(f"Using API key from: **{key_source}**")
456
  if not get_effective_api_key():
457
  settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
458
 
459
- # Buttons / UI layout
460
  col1, col2 = st.columns([1, 3])
461
  with col1:
462
  generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
@@ -536,7 +513,7 @@ if generate_now and not st.session_state.get("busy"):
536
  except Exception:
537
  logger.exception("genai configure failed")
538
 
539
- model_id = model_input_value or st.session_state.get("preferred_model") or "gemini-2.5-flash-lite"
540
  if st.session_state.get("last_model") != model_id:
541
  st.session_state["last_model"] = ""
542
 
 
1
  # streamlit_app.py
 
 
 
 
 
 
 
 
 
 
 
 
2
  import os
3
  import time
4
  import string
 
15
  from dotenv import load_dotenv
16
  from difflib import SequenceMatcher
17
 
18
+ # Try import google.generativeai, support multiple SDK shapes
19
  try:
20
  import google.generativeai as genai # type: ignore
 
21
  genai_responses = getattr(genai, "responses", None) or getattr(genai, "Responses", None)
22
+ upload_file = getattr(genai, "upload_file", None)
23
+ get_file = getattr(genai, "get_file", None)
24
  HAS_GENAI = True
25
  except Exception:
26
  genai = None
 
31
 
32
  load_dotenv()
33
 
34
+ # Logging (minimal)
35
  logging.basicConfig(level=logging.INFO)
36
  logger = logging.getLogger("video_ai")
37
+ logger.propagate = False
 
 
 
 
 
 
38
 
39
  # App config
40
  st.set_page_config(page_title="Generate the story of videos", layout="wide")
 
53
  st.session_state.setdefault("file_hash", None)
54
  st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
55
  st.session_state.setdefault("last_model", "")
 
56
  st.session_state.setdefault("last_url_value", "")
57
  st.session_state.setdefault("processing_timeout", 900)
58
  st.session_state.setdefault("generation_timeout", 300)
59
+ st.session_state.setdefault("preferred_model", "gemini-2.0-flash-lite")
60
  st.session_state.setdefault("compression_threshold_mb", 200)
61
 
62
  MODEL_OPTIONS = [
 
98
  else:
99
  stream = out.output(target_path, vcodec="libx264", crf=crf, preset=preset)
100
  stream.run(overwrite_output=True, quiet=True)
101
+ return target_path if os.path.exists(target_path) else input_path
 
 
102
  except Exception:
103
  logger.exception("Compression failed")
104
  return input_path
 
143
  logger.exception("Failed to configure genai")
144
  return True
145
 
146
+ # Upload & processing
147
  def upload_video_sdk(filepath: str):
148
  key = get_effective_api_key()
149
  if not key:
 
155
  genai.configure(api_key=key)
156
  except Exception:
157
  pass
 
158
  try:
159
  return upload_file(filepath)
160
+ except Exception:
161
  logger.exception("Upload failed")
162
  raise
163
 
 
199
  time.sleep(backoff)
200
  backoff = min(backoff * 2, 8.0)
201
 
202
+ # Normalize responses into text
203
  def _normalize_genai_response(response):
204
  if response is None:
205
  return ""
 
264
  seen.add(t)
265
  return "\n\n".join(filtered).strip()
266
 
267
+ # Generation (supports various SDK shapes)
268
  def generate_via_responses_api(prompt_text: str, processed, model_used: str, max_tokens: int = 1024, timeout: int = 300, progress_callback=None):
269
  key = get_effective_api_key()
270
  if not key:
 
280
  system_msg = {"role": "system", "content": prompt_text}
281
  user_msg = {"role": "user", "content": "Please summarize the attached video."}
282
 
283
+ call_variants = [
284
+ ("responses.generate", {"model": model_used, "messages": [system_msg, user_msg], "files": [{"name": fname}], "max_output_tokens": max_tokens}),
285
+ ("responses.generate_alt", {"model": model_used, "input": [{"text": prompt_text, "files": [{"name": fname}]}], "max_output_tokens": max_tokens}),
286
+ ("legacy_create", {"model": model_used, "input": prompt_text, "file": fname, "max_output_tokens": max_tokens}),
287
+ ]
 
 
288
 
289
  def is_transient_error(e_text: str):
290
  txt = str(e_text).lower()
 
301
  if progress_callback:
302
  progress_callback("starting", int(time.time() - start), {"model": model_used, "attempt": attempts, "method": method_name})
303
 
304
+ # Preferred modern: genai.responses.generate or genai_responses.generate
305
  if genai_responses is not None and hasattr(genai_responses, "generate"):
306
  resp = genai_responses.generate(**payload)
307
  text = _normalize_genai_response(resp)
308
  if progress_callback:
309
+ progress_callback("done", int(time.time() - start), {"method": method_name})
310
  return text
311
 
312
  # Older path: genai.Responses.create
 
314
  resp = genai.Responses.create(**payload) # type: ignore
315
  text = _normalize_genai_response(resp)
316
  if progress_callback:
317
+ progress_callback("done", int(time.time() - start), {"method": method_name})
318
  return text
319
 
320
+ # Fallback: GenerativeModel API (ChatSession). This SDK's ChatSession.send_message may not accept timeout kw.
321
  if hasattr(genai, "GenerativeModel"):
322
  try:
323
  model_obj = genai.GenerativeModel(model_name=model_used)
324
  if hasattr(model_obj, "start_chat"):
325
  chat = model_obj.start_chat()
326
+ # Some SDKs' send_message signature differs; call without timeout kw when necessary.
327
+ send = getattr(chat, "send_message", None)
328
+ if send is None:
329
+ raise RuntimeError("ChatSession has no send_message")
330
+ try:
331
+ resp = send(prompt_text, timeout=timeout) # try with timeout
332
+ except TypeError:
333
+ resp = send(prompt_text) # fallback without timeout
334
  text = getattr(resp, "text", None) or str(resp)
335
  text = text if text else _normalize_genai_response(resp)
336
  if progress_callback:
337
+ progress_callback("done", int(time.time() - start), {"method": "GenerativeModel.chat"})
338
  return text
339
  except Exception:
 
340
  logger.exception("GenerativeModel.chat fallback failed")
341
 
342
  raise RuntimeError("No supported response generation method available in installed google-generativeai package.")
343
  except Exception as e:
344
  last_exc = e
345
  msg = str(e)
346
+ logger.warning("Generation error (model=%s attempt=%s method=%s): %s", model_used, attempts, method_name, msg)
347
  if not is_transient_error(msg):
348
  if "No supported response generation method" in msg or "has no attribute" in msg:
349
  raise RuntimeError(
 
356
  time.sleep(backoff)
357
  backoff = min(backoff * 2, 8.0)
358
 
359
+ # Trim prompt echoes
360
  def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_threshold: float = 0.68):
361
  if not prompt or not text:
362
  return text
 
376
  return b_full[len(ph):].lstrip(" \n:-")
377
  return text
378
 
379
+ # UI: reset per new URL value
 
 
380
  current_url = st.session_state.get("url", "")
381
  if current_url != st.session_state.get("last_url_value"):
 
382
  st.session_state["videos"] = ""
383
  st.session_state["last_loaded_path"] = ""
384
  st.session_state["uploaded_file"] = None
 
397
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
398
 
399
  settings_exp = st.sidebar.expander("Settings", expanded=False)
400
+ chosen = settings_exp.selectbox("Gemini model", MODEL_OPTIONS, index=MODEL_OPTIONS.index(st.session_state.get("preferred_model", "gemini-2.0-flash-lite")))
401
  custom_model = ""
402
  if chosen == "custom":
403
+ custom_model = settings_exp.text_input("Custom model name", value=st.session_state.get("preferred_model", "gemini-2.0-flash-lite"))
404
  model_input_value = (custom_model.strip() if chosen == "custom" else chosen).strip()
405
 
406
  settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
 
434
  if not get_effective_api_key():
435
  settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
436
 
 
437
  col1, col2 = st.columns([1, 3])
438
  with col1:
439
  generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
 
513
  except Exception:
514
  logger.exception("genai configure failed")
515
 
516
+ model_id = model_input_value or st.session_state.get("preferred_model") or "gemini-2.0-flash-lite"
517
  if st.session_state.get("last_model") != model_id:
518
  st.session_state["last_model"] = ""
519