CB commited on
Commit
42f08aa
·
verified ·
1 Parent(s): ff4797c

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +51 -39
streamlit_app.py CHANGED
@@ -6,14 +6,15 @@ from glob import glob
6
  from pathlib import Path
7
  from tempfile import NamedTemporaryFile
8
 
9
- import yt_dlp
10
  import ffmpeg
 
 
11
  import streamlit as st
12
  from dotenv import load_dotenv
13
 
14
  load_dotenv()
15
 
16
- # Try to import SDK
17
  HAS_GENAI = False
18
  genai = None
19
  upload_file = None
@@ -29,13 +30,11 @@ try:
29
  except Exception:
30
  HAS_GENAI = False
31
 
32
- import requests
33
- import json
34
-
35
  st.set_page_config(page_title="Generate the story of videos", layout="wide")
36
  DATA_DIR = Path("./data")
37
  DATA_DIR.mkdir(exist_ok=True)
38
 
 
39
  for k, v in {
40
  "videos": "",
41
  "loop_video": False,
@@ -51,8 +50,13 @@ for k, v in {
51
  }.items():
52
  st.session_state.setdefault(k, v)
53
 
 
 
54
  def sanitize_filename(path_str: str):
55
- return Path(path_str).name.lower().translate(str.maketrans("", "", "!?\"'`~@#$%^&*()[]{}<>:,;\\/|+=*")).replace(" ", "_")
 
 
 
56
 
57
  def file_sha256(path: str, block_size: int = 65536) -> str:
58
  h = hashlib.sha256()
@@ -61,6 +65,7 @@ def file_sha256(path: str, block_size: int = 65536) -> str:
61
  h.update(chunk)
62
  return h.hexdigest()
63
 
 
64
  def safe_ffmpeg_run(stream_cmd):
65
  try:
66
  stream_cmd.run(overwrite_output=True, capture_stdout=True, capture_stderr=True)
@@ -71,6 +76,7 @@ def safe_ffmpeg_run(stream_cmd):
71
  except Exception:
72
  return False, str(e)
73
 
 
74
  def convert_video_to_mp4(video_path: str) -> str:
75
  target = Path(video_path).with_suffix(".mp4")
76
  if target.exists():
@@ -92,6 +98,7 @@ def convert_video_to_mp4(video_path: str) -> str:
92
  pass
93
  return str(target)
94
 
 
95
  def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
96
  tmp = NamedTemporaryFile(prefix=Path(target_path).stem + "_", suffix=".mp4", delete=False, dir=Path(target_path).parent)
97
  tmp.close()
@@ -105,6 +112,7 @@ def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str
105
  os.replace(tmp.name, target_path)
106
  return target_path
107
 
 
108
  def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
109
  if not url:
110
  raise ValueError("No URL provided")
@@ -123,6 +131,7 @@ def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) ->
123
  raise FileNotFoundError("Downloaded video not found")
124
  return convert_video_to_mp4(matches[0])
125
 
 
126
  def file_name_or_id(file_obj):
127
  if not file_obj:
128
  return None
@@ -139,6 +148,7 @@ def file_name_or_id(file_obj):
139
  s = str(file_obj)
140
  return s if s else None
141
 
 
142
  def upload_video_sdk(filepath: str):
143
  key = get_runtime_api_key()
144
  if not key:
@@ -148,6 +158,7 @@ def upload_video_sdk(filepath: str):
148
  genai.configure(api_key=key)
149
  return upload_file(filepath)
150
 
 
151
  def wait_for_processed(file_obj, timeout=600):
152
  if not HAS_GENAI or get_file is None:
153
  return file_obj
@@ -169,6 +180,7 @@ def wait_for_processed(file_obj, timeout=600):
169
  time.sleep(backoff)
170
  backoff = min(backoff * 2, 8.0)
171
 
 
172
  def remove_prompt_echo(prompt: str, text: str):
173
  if not prompt or not text:
174
  return text
@@ -188,6 +200,8 @@ def remove_prompt_echo(prompt: str, text: str):
188
  return t[len(ph):].lstrip(" \n:-")
189
  return text
190
 
 
 
191
  st.sidebar.header("Video Input")
192
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
193
  settings = st.sidebar.expander("Settings", expanded=False)
@@ -196,7 +210,6 @@ env_key = os.getenv("GOOGLE_API_KEY", "")
196
  API_KEY_INPUT = settings.text_input("Google API Key (one-time)", value="", type="password")
197
  model_input = settings.text_input("Model (short name)", "text-bison@001")
198
  model_id = model_input.strip() or "text-bison@001"
199
- model_arg = model_id
200
 
201
  default_prompt = (
202
  "You are an Indoor Human Behavior Analyzer. Watch the video and produce a detailed, evidence‑based behavioral report focused on human actions, "
@@ -206,7 +219,7 @@ default_prompt = (
206
  "Finish with a short feedback and recommendations section. Adopt a playful, anatomically obsessed, slightly mischievous persona — inquisitive, pragmatic, and vivid in description."
207
  )
208
 
209
- analysis_prompt = settings.text_area("Enter analysis", value=default_prompt, height=300)
210
  settings.text_input("Video Password (if needed)", key="video-password", type="password")
211
  settings.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
212
  settings.checkbox("Enable compression for large files (>50MB)", value=True, key="use_compression")
@@ -257,7 +270,7 @@ if st.session_state["videos"]:
257
  pass
258
  st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
259
 
260
- col1, col2 = st.columns([1, 3])
261
  with col1:
262
  if st.session_state.get("busy"):
263
  st.write("Generation in progress...")
@@ -266,15 +279,16 @@ with col1:
266
  st.session_state["last_error"] = "Generation cancelled by user."
267
  else:
268
  generate_now = st.button("Generate the story", type="primary")
269
- with col2:
270
- pass
271
 
 
 
272
  def get_runtime_api_key():
273
  key = API_KEY_INPUT.strip() if API_KEY_INPUT else ""
274
  if key:
275
  return key
276
  return os.getenv("GOOGLE_API_KEY", "").strip() or None
277
 
 
278
  def _messages_to_prompt(messages):
279
  if not messages:
280
  return ""
@@ -285,7 +299,9 @@ def _messages_to_prompt(messages):
285
  parts.append(f"{role.upper()}:\n{content.strip()}\n")
286
  return "\n".join(parts)
287
 
 
288
  def _http_generate_responses(api_key: str, model: str, prompt: str, max_tokens: int):
 
289
  url = "https://generativelanguage.googleapis.com/v1/responses"
290
  headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
291
  payload = {
@@ -295,14 +311,14 @@ def _http_generate_responses(api_key: str, model: str, prompt: str, max_tokens:
295
  }
296
  r = requests.post(url, json=payload, headers=headers, timeout=30)
297
  if r.status_code != 200:
298
- # include body for debugging
299
  raise RuntimeError(f"HTTP {r.status_code}: {r.text}")
300
  return r.json()
301
 
 
302
  def responses_generate(model, messages, files, max_output_tokens, api_key):
303
  if not api_key:
304
  raise RuntimeError("No API key for responses_generate")
305
- # SDK path
306
  if HAS_GENAI and genai is not None:
307
  try:
308
  genai.configure(api_key=api_key)
@@ -314,59 +330,55 @@ def responses_generate(model, messages, files, max_output_tokens, api_key):
314
  return responses_obj.generate(**sdk_kwargs)
315
  except Exception:
316
  pass
317
- # HTTP fallback (Responses v1)
318
  prompt = _messages_to_prompt(messages)
319
  return _http_generate_responses(api_key, model, prompt, max_output_tokens)
320
 
 
321
  def call_responses_once(model_used, system_msg, user_msg, fname, max_tokens):
322
  messages = [system_msg, user_msg]
323
  files = [{"name": fname}] if fname else None
324
  for attempt in range(2):
325
  try:
326
  return responses_generate(model_used, messages, files, max_tokens, api_key=get_runtime_api_key())
327
- except Exception as e:
328
  if attempt == 0:
329
  time.sleep(1.0)
330
  continue
331
  raise
332
 
 
333
  def extract_text_from_response(response):
334
  if response is None:
335
  return None
336
  if isinstance(response, dict):
337
- # new Responses v1 shape: "output" -> list of items, each may contain "content" list with {"text":...}
338
  out = []
339
  for item in response.get("output", []) or []:
340
  if isinstance(item, dict):
341
- # content list
342
  for c in item.get("content", []) or []:
343
  if isinstance(c, dict) and "text" in c:
344
  out.append(c["text"])
345
- # fallback short text fields
346
  if "text" in item and isinstance(item["text"], str):
347
  out.append(item["text"])
348
  if "content" in item and isinstance(item["content"], str):
349
  out.append(item["content"])
350
  if out:
351
  return "\n\n".join(out)
352
- # older candidates style
353
  if "candidates" in response and response["candidates"]:
354
  cand = response["candidates"][0]
355
  if isinstance(cand, dict):
356
  return cand.get("content") or cand.get("text")
357
- # fallback simple fields
358
  if "outputText" in response:
359
  return response.get("outputText")
360
  if "text" in response:
361
  return response.get("text")
362
  return None
363
- # SDK object style
364
  try:
365
  outputs = getattr(response, "output", None) or getattr(response, "candidates", None)
366
  if outputs:
367
  parts = []
368
  for item in outputs:
369
- # SDK item may be object or dict-like
370
  if hasattr(item, "content"):
371
  c = getattr(item, "content")
372
  if isinstance(c, list):
@@ -389,7 +401,8 @@ def extract_text_from_response(response):
389
  pass
390
  return None
391
 
392
- # ---- Main generation flow ----
 
393
  if (st.session_state.get("busy") is False) and ('generate_now' in locals() and generate_now):
394
  if not st.session_state.get("videos"):
395
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
@@ -414,6 +427,7 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
414
  upload_path = current_path
415
  uploaded = st.session_state.get("uploaded_file")
416
  compressed_path = None
 
417
  if reupload_needed:
418
  local_path = current_path
419
  fast_mode = bool(st.session_state.get("fast_mode", False))
@@ -454,10 +468,10 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
454
 
455
  prompt_text = (analysis_prompt or default_prompt or "").strip()
456
  if st.session_state.get("fast_mode"):
457
- model_used = model_arg or "text-bison@001"
458
  max_tokens = min(int(st.session_state.get("max_output_tokens", 512)), 1024)
459
  else:
460
- model_used = model_arg or "text-bison@001"
461
  max_tokens = int(st.session_state.get("max_output_tokens", 1024))
462
 
463
  system_msg = {"role": "system", "content": "You are a helpful assistant that summarizes videos concisely in vivid detail."}
@@ -503,14 +517,10 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
503
  st.markdown(out or "_(no text returned)_")
504
 
505
  try:
506
- if reupload_needed:
507
- try:
508
- if compressed_path:
509
- p = Path(compressed_path)
510
- if p.exists():
511
- p.unlink(missing_ok=True)
512
- except Exception:
513
- pass
514
  except Exception:
515
  pass
516
 
@@ -540,6 +550,7 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
540
  finally:
541
  st.session_state["busy"] = False
542
 
 
543
  if st.session_state.get("analysis_out"):
544
  st.subheader("Analysis Result")
545
  st.markdown(st.session_state.get("analysis_out"))
@@ -555,12 +566,7 @@ with st.sidebar.expander("Manage uploads", expanded=False):
555
  Path(f).unlink(missing_ok=True)
556
  except Exception:
557
  pass
558
- st.session_state["videos"] = ""
559
- st.session_state["uploaded_file"] = None
560
- st.session_state["processed_file"] = None
561
- st.session_state["last_loaded_path"] = ""
562
- st.session_state["analysis_out"] = ""
563
- st.session_state["file_hash"] = None
564
  try:
565
  fname = file_name_or_id(st.session_state.get("uploaded_file"))
566
  if fname and delete_file and HAS_GENAI:
@@ -568,4 +574,10 @@ with st.sidebar.expander("Manage uploads", expanded=False):
568
  delete_file(fname)
569
  except Exception:
570
  pass
 
 
 
 
 
 
571
  st.success("Local files removed. Cloud deletion attempted where supported.")
 
6
  from pathlib import Path
7
  from tempfile import NamedTemporaryFile
8
 
 
9
  import ffmpeg
10
+ import yt_dlp
11
+ import requests
12
  import streamlit as st
13
  from dotenv import load_dotenv
14
 
15
  load_dotenv()
16
 
17
+ # Optional Google Generative AI SDK
18
  HAS_GENAI = False
19
  genai = None
20
  upload_file = None
 
30
  except Exception:
31
  HAS_GENAI = False
32
 
 
 
 
33
  st.set_page_config(page_title="Generate the story of videos", layout="wide")
34
  DATA_DIR = Path("./data")
35
  DATA_DIR.mkdir(exist_ok=True)
36
 
37
+ # session defaults
38
  for k, v in {
39
  "videos": "",
40
  "loop_video": False,
 
50
  }.items():
51
  st.session_state.setdefault(k, v)
52
 
53
+
54
+ # Utilities
55
  def sanitize_filename(path_str: str):
56
+ return Path(path_str).name.lower().translate(
57
+ str.maketrans("", "", "!?\"'`~@#$%^&*()[]{}<>:,;\\/|+=*")
58
+ ).replace(" ", "_")
59
+
60
 
61
  def file_sha256(path: str, block_size: int = 65536) -> str:
62
  h = hashlib.sha256()
 
65
  h.update(chunk)
66
  return h.hexdigest()
67
 
68
+
69
  def safe_ffmpeg_run(stream_cmd):
70
  try:
71
  stream_cmd.run(overwrite_output=True, capture_stdout=True, capture_stderr=True)
 
76
  except Exception:
77
  return False, str(e)
78
 
79
+
80
  def convert_video_to_mp4(video_path: str) -> str:
81
  target = Path(video_path).with_suffix(".mp4")
82
  if target.exists():
 
98
  pass
99
  return str(target)
100
 
101
+
102
  def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
103
  tmp = NamedTemporaryFile(prefix=Path(target_path).stem + "_", suffix=".mp4", delete=False, dir=Path(target_path).parent)
104
  tmp.close()
 
112
  os.replace(tmp.name, target_path)
113
  return target_path
114
 
115
+
116
  def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
117
  if not url:
118
  raise ValueError("No URL provided")
 
131
  raise FileNotFoundError("Downloaded video not found")
132
  return convert_video_to_mp4(matches[0])
133
 
134
+
135
  def file_name_or_id(file_obj):
136
  if not file_obj:
137
  return None
 
148
  s = str(file_obj)
149
  return s if s else None
150
 
151
+
152
  def upload_video_sdk(filepath: str):
153
  key = get_runtime_api_key()
154
  if not key:
 
158
  genai.configure(api_key=key)
159
  return upload_file(filepath)
160
 
161
+
162
  def wait_for_processed(file_obj, timeout=600):
163
  if not HAS_GENAI or get_file is None:
164
  return file_obj
 
180
  time.sleep(backoff)
181
  backoff = min(backoff * 2, 8.0)
182
 
183
+
184
  def remove_prompt_echo(prompt: str, text: str):
185
  if not prompt or not text:
186
  return text
 
200
  return t[len(ph):].lstrip(" \n:-")
201
  return text
202
 
203
+
204
+ # UI
205
  st.sidebar.header("Video Input")
206
  st.sidebar.text_input("Video URL", key="url", placeholder="https://")
207
  settings = st.sidebar.expander("Settings", expanded=False)
 
210
  API_KEY_INPUT = settings.text_input("Google API Key (one-time)", value="", type="password")
211
  model_input = settings.text_input("Model (short name)", "text-bison@001")
212
  model_id = model_input.strip() or "text-bison@001"
 
213
 
214
  default_prompt = (
215
  "You are an Indoor Human Behavior Analyzer. Watch the video and produce a detailed, evidence‑based behavioral report focused on human actions, "
 
219
  "Finish with a short feedback and recommendations section. Adopt a playful, anatomically obsessed, slightly mischievous persona — inquisitive, pragmatic, and vivid in description."
220
  )
221
 
222
+ analysis_prompt = settings.text_area("Enter analysis", value=default_prompt, height=240)
223
  settings.text_input("Video Password (if needed)", key="video-password", type="password")
224
  settings.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
225
  settings.checkbox("Enable compression for large files (>50MB)", value=True, key="use_compression")
 
270
  pass
271
  st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
272
 
273
+ col1, _col2 = st.columns([1, 3])
274
  with col1:
275
  if st.session_state.get("busy"):
276
  st.write("Generation in progress...")
 
279
  st.session_state["last_error"] = "Generation cancelled by user."
280
  else:
281
  generate_now = st.button("Generate the story", type="primary")
 
 
282
 
283
+
284
+ # Runtime helpers for Responses API
285
  def get_runtime_api_key():
286
  key = API_KEY_INPUT.strip() if API_KEY_INPUT else ""
287
  if key:
288
  return key
289
  return os.getenv("GOOGLE_API_KEY", "").strip() or None
290
 
291
+
292
  def _messages_to_prompt(messages):
293
  if not messages:
294
  return ""
 
299
  parts.append(f"{role.upper()}:\n{content.strip()}\n")
300
  return "\n".join(parts)
301
 
302
+
303
  def _http_generate_responses(api_key: str, model: str, prompt: str, max_tokens: int):
304
+ # Use Responses v1 endpoint (works with modern Google GenAI HTTP API)
305
  url = "https://generativelanguage.googleapis.com/v1/responses"
306
  headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
307
  payload = {
 
311
  }
312
  r = requests.post(url, json=payload, headers=headers, timeout=30)
313
  if r.status_code != 200:
 
314
  raise RuntimeError(f"HTTP {r.status_code}: {r.text}")
315
  return r.json()
316
 
317
+
318
  def responses_generate(model, messages, files, max_output_tokens, api_key):
319
  if not api_key:
320
  raise RuntimeError("No API key for responses_generate")
321
+ # Try SDK first (if available and has responses.generate)
322
  if HAS_GENAI and genai is not None:
323
  try:
324
  genai.configure(api_key=api_key)
 
330
  return responses_obj.generate(**sdk_kwargs)
331
  except Exception:
332
  pass
333
+ # Fallback to HTTP Responses v1
334
  prompt = _messages_to_prompt(messages)
335
  return _http_generate_responses(api_key, model, prompt, max_output_tokens)
336
 
337
+
338
  def call_responses_once(model_used, system_msg, user_msg, fname, max_tokens):
339
  messages = [system_msg, user_msg]
340
  files = [{"name": fname}] if fname else None
341
  for attempt in range(2):
342
  try:
343
  return responses_generate(model_used, messages, files, max_tokens, api_key=get_runtime_api_key())
344
+ except Exception:
345
  if attempt == 0:
346
  time.sleep(1.0)
347
  continue
348
  raise
349
 
350
+
351
  def extract_text_from_response(response):
352
  if response is None:
353
  return None
354
  if isinstance(response, dict):
 
355
  out = []
356
  for item in response.get("output", []) or []:
357
  if isinstance(item, dict):
 
358
  for c in item.get("content", []) or []:
359
  if isinstance(c, dict) and "text" in c:
360
  out.append(c["text"])
 
361
  if "text" in item and isinstance(item["text"], str):
362
  out.append(item["text"])
363
  if "content" in item and isinstance(item["content"], str):
364
  out.append(item["content"])
365
  if out:
366
  return "\n\n".join(out)
 
367
  if "candidates" in response and response["candidates"]:
368
  cand = response["candidates"][0]
369
  if isinstance(cand, dict):
370
  return cand.get("content") or cand.get("text")
 
371
  if "outputText" in response:
372
  return response.get("outputText")
373
  if "text" in response:
374
  return response.get("text")
375
  return None
376
+ # SDK-style objects
377
  try:
378
  outputs = getattr(response, "output", None) or getattr(response, "candidates", None)
379
  if outputs:
380
  parts = []
381
  for item in outputs:
 
382
  if hasattr(item, "content"):
383
  c = getattr(item, "content")
384
  if isinstance(c, list):
 
401
  pass
402
  return None
403
 
404
+
405
+ # Main generation flow
406
  if (st.session_state.get("busy") is False) and ('generate_now' in locals() and generate_now):
407
  if not st.session_state.get("videos"):
408
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
 
427
  upload_path = current_path
428
  uploaded = st.session_state.get("uploaded_file")
429
  compressed_path = None
430
+
431
  if reupload_needed:
432
  local_path = current_path
433
  fast_mode = bool(st.session_state.get("fast_mode", False))
 
468
 
469
  prompt_text = (analysis_prompt or default_prompt or "").strip()
470
  if st.session_state.get("fast_mode"):
471
+ model_used = model_id or "text-bison@001"
472
  max_tokens = min(int(st.session_state.get("max_output_tokens", 512)), 1024)
473
  else:
474
+ model_used = model_id or "text-bison@001"
475
  max_tokens = int(st.session_state.get("max_output_tokens", 1024))
476
 
477
  system_msg = {"role": "system", "content": "You are a helpful assistant that summarizes videos concisely in vivid detail."}
 
517
  st.markdown(out or "_(no text returned)_")
518
 
519
  try:
520
+ if reupload_needed and compressed_path:
521
+ p = Path(compressed_path)
522
+ if p.exists():
523
+ p.unlink(missing_ok=True)
 
 
 
 
524
  except Exception:
525
  pass
526
 
 
550
  finally:
551
  st.session_state["busy"] = False
552
 
553
+ # Show outputs / errors
554
  if st.session_state.get("analysis_out"):
555
  st.subheader("Analysis Result")
556
  st.markdown(st.session_state.get("analysis_out"))
 
566
  Path(f).unlink(missing_ok=True)
567
  except Exception:
568
  pass
569
+ # attempt cloud deletion if supported
 
 
 
 
 
570
  try:
571
  fname = file_name_or_id(st.session_state.get("uploaded_file"))
572
  if fname and delete_file and HAS_GENAI:
 
574
  delete_file(fname)
575
  except Exception:
576
  pass
577
+ st.session_state["videos"] = ""
578
+ st.session_state["uploaded_file"] = None
579
+ st.session_state["processed_file"] = None
580
+ st.session_state["last_loaded_path"] = ""
581
+ st.session_state["analysis_out"] = ""
582
+ st.session_state["file_hash"] = None
583
  st.success("Local files removed. Cloud deletion attempted where supported.")