CB commited on
Commit
8085632
·
verified ·
1 Parent(s): 33d86c2

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +104 -163
streamlit_app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import time
3
  import hashlib
@@ -12,6 +13,25 @@ from dotenv import load_dotenv
12
 
13
  load_dotenv()
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  st.set_page_config(page_title="Generate the story of videos", layout="wide")
16
  DATA_DIR = Path("./data")
17
  DATA_DIR.mkdir(exist_ok=True)
@@ -106,60 +126,19 @@ def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) ->
106
  def file_name_or_id(file_obj):
107
  if not file_obj:
108
  return None
 
109
  if isinstance(file_obj, dict):
110
  for key in ("name", "id", "fileId", "file_id", "uri", "url"):
111
  val = file_obj.get(key)
112
  if val:
113
- s = str(val)
114
- if s.startswith("http://") or s.startswith("https://"):
115
- tail = s.rstrip("/").split("/")[-1]
116
- return tail if tail.startswith("files/") else f"files/{tail}"
117
- if s.startswith("files/"):
118
- return s
119
- if "/" not in s and 6 <= len(s) <= 128:
120
- return f"files/{s}"
121
- return s
122
- uri = file_obj.get("uri") or file_obj.get("url")
123
- if uri:
124
- tail = str(uri).rstrip("/").split("/")[-1]
125
- return tail if tail.startswith("files/") else f"files/{tail}"
126
  return None
127
  for attr in ("name", "id", "fileId", "file_id", "uri", "url"):
128
  val = getattr(file_obj, attr, None)
129
  if val:
130
- s = str(val)
131
- if s.startswith("http://") or s.startswith("https://"):
132
- tail = s.rstrip("/").split("/")[-1]
133
- return tail if tail.startswith("files/") else f"files/{tail}"
134
- if s.startswith("files/"):
135
- return s
136
- if "/" not in s and 6 <= len(s) <= 128:
137
- return f"files/{s}"
138
- return s
139
  s = str(file_obj)
140
- if "http://" in s or "https://" in s:
141
- tail = s.rstrip("/").split("/")[-1]
142
- return tail if tail.startswith("files/") else f"files/{tail}"
143
- if "files/" in s:
144
- idx = s.find("files/")
145
- return s[idx:] if s[idx:].startswith("files/") else f"files/{s[idx+6:]}"
146
- return None
147
-
148
- HAS_GENAI = False
149
- genai = None
150
- upload_file = None
151
- get_file = None
152
- delete_file = None
153
- if os.getenv("GOOGLE_API_KEY"):
154
- try:
155
- import google.generativeai as genai_mod
156
- genai = genai_mod
157
- upload_file = getattr(genai_mod, "upload_file", None)
158
- get_file = getattr(genai_mod, "get_file", None)
159
- delete_file = getattr(genai_mod, "delete_file", None)
160
- HAS_GENAI = True
161
- except Exception:
162
- HAS_GENAI = False
163
 
164
  def upload_video_sdk(filepath: str):
165
  key = get_runtime_api_key()
@@ -216,9 +195,12 @@ settings = st.sidebar.expander("Settings", expanded=False)
216
 
217
  env_key = os.getenv("GOOGLE_API_KEY", "")
218
  API_KEY_INPUT = settings.text_input("Google API Key (one-time)", value="", type="password")
219
- model_input = settings.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite")
220
- model_id = model_input.strip() or "gemini-2.0-flash-lite"
 
 
221
  model_arg = model_id if not model_id.startswith("models/") else model_id.split("/", 1)[1]
 
222
 
223
  default_prompt = (
224
  "You are an Indoor Human Behavior Analyzer. Watch the video and produce a detailed, evidence‑based behavioral report focused on human actions, "
@@ -297,17 +279,15 @@ def get_runtime_api_key():
297
  return key
298
  return os.getenv("GOOGLE_API_KEY", "").strip() or None
299
 
300
- # --- patched responses / generate compatibility layer ---
301
- import json
302
- import requests
303
-
304
- def _normalize_model_for_url(model: str) -> str:
305
  if not model:
306
- return "gemini-2.0"
307
- return model.split("/", 1)[-1] if model.startswith("models/") else model
 
 
308
 
309
- def _build_prompt_from_messages(messages):
310
- # messages expected as list of {"role":..., "content":...}
311
  if not messages:
312
  return ""
313
  parts = []
@@ -317,145 +297,103 @@ def _build_prompt_from_messages(messages):
317
  parts.append(f"{role.upper()}:\n{content.strip()}\n")
318
  return "\n".join(parts)
319
 
320
- def _parse_http_generate_response(rjson):
321
- # Attempt to extract text from various generate shapes
322
- if not rjson:
323
- return None
324
- # common new GL formats: {'candidates':[{'content': '...'}]} or {'output': [{'content': ...}]}
325
- if isinstance(rjson, dict):
326
- # try 'candidates'
327
- if "candidates" in rjson and isinstance(rjson["candidates"], list) and rjson["candidates"]:
328
- cand = rjson["candidates"][0]
329
- return cand.get("content") or cand.get("text") or rjson.get("text")
330
- # try 'output' array with 'content' items
331
- out = rjson.get("output")
332
- if isinstance(out, list) and out:
333
- texts = []
334
- for item in out:
335
- if isinstance(item, dict):
336
- c = item.get("content") or item.get("contents") or item.get("text")
337
- if isinstance(c, str):
338
- texts.append(c)
339
- elif isinstance(c, list):
340
- for sub in c:
341
- if isinstance(sub, dict):
342
- t = sub.get("text") or sub.get("content")
343
- if t:
344
- texts.append(t)
345
- if texts:
346
- return "\n\n".join(texts)
347
- # fallback to top-level text
348
- if "text" in rjson and isinstance(rjson["text"], str):
349
- return rjson["text"]
350
- return None
351
-
352
- def responses_generate(model, messages, files, max_output_tokens, api_key):
353
- if not api_key:
354
- raise RuntimeError("No API key for responses_generate")
355
- sdk_err = None
356
-
357
- # try SDK responses.generate (preferred)
358
- if HAS_GENAI and genai is not None:
359
- try:
360
- genai.configure(api_key=api_key)
361
- responses_obj = getattr(genai, "responses", None)
362
- if responses_obj is not None and hasattr(responses_obj, "generate"):
363
- # SDK expects messages and files in their SDK-specific shapes
364
- sdk_kwargs = {"model": model, "messages": messages, "max_output_tokens": int(max_output_tokens or 512)}
365
- if files:
366
- sdk_kwargs["files"] = files
367
- return responses_obj.generate(**sdk_kwargs)
368
- except Exception as e:
369
- sdk_err = str(e)
370
-
371
- # HTTP fallback to Generative Language "generate" endpoints.
372
  host = "https://generativelanguage.googleapis.com"
373
- norm_model = _normalize_model_for_url(model)
374
  candidates = [
375
- f"{host}/v1/models/{norm_model}:generate",
376
- f"{host}/v1beta3/models/{norm_model}:generate",
377
- f"{host}/v1beta2/models/{norm_model}:generate",
378
  ]
379
-
380
- prompt_text = _build_prompt_from_messages(messages)
381
- payload = {"prompt": {"text": prompt_text}, "maxOutputTokens": int(max_output_tokens or 512)}
382
  headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
383
  last_exc = None
384
-
385
  for url in candidates:
386
  try:
387
- r = requests.post(url, json=payload, headers=headers, timeout=15)
388
  if r.status_code == 200:
389
  try:
390
  return r.json()
391
  except Exception:
392
  return {"text": r.text}
393
- # if 404, try next; collect last
394
  last_exc = RuntimeError(f"HTTP {r.status_code}: {r.text}")
395
  except Exception as e:
396
  last_exc = e
 
397
 
398
- diag = {"sdk_error": sdk_err, "http_error": str(last_exc), "tried_urls": candidates}
399
- raise RuntimeError(f"genai.responses not available and HTTP fallback failed: {diag}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
 
401
  def call_responses_once(model_used, system_msg, user_msg, fname, max_tokens):
402
- # messages as [system_msg, user_msg]
403
  messages = [system_msg, user_msg]
404
  files = [{"name": fname}] if fname else None
405
  for attempt in range(2):
406
  try:
407
  return responses_generate(model_used, messages, files, max_tokens, api_key=get_runtime_api_key())
408
- except Exception:
409
  if attempt == 0:
410
  time.sleep(1.0)
411
  continue
412
  raise
413
 
414
- # Helper to extract text from either SDK response object or HTTP dict
415
  def extract_text_from_response(response):
416
- # SDK may return an object with .output, .candidates, or .text
417
- # HTTP returns a dict with various shapes
418
- # If it's an object (not dict), try attribute access
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
  try:
420
- if response is None:
421
- return None
422
- if isinstance(response, dict):
423
- # HTTP-style
424
- text = _parse_http_generate_response(response)
425
- if text:
426
- return text
427
- # try 'output' field shaped differently
428
- outputs = response.get("output") or response.get("candidates")
429
- if outputs:
430
- pieces = []
431
- for o in outputs:
432
- if isinstance(o, dict):
433
- t = o.get("content") or o.get("text")
434
- if isinstance(t, str):
435
- pieces.append(t)
436
- if pieces:
437
- return "\n\n".join(pieces)
438
- return response.get("text") or None
439
- else:
440
- # object-like SDK response
441
- outputs = getattr(response, "output", None) or getattr(response, "candidates", None) or None
442
- if outputs:
443
- pieces = []
444
- for item in outputs:
445
- # each item may have 'content' or 'text'
446
- txt = getattr(item, "content", None) or getattr(item, "text", None) or (item.get("content") if isinstance(item, dict) else None)
447
- if txt:
448
- pieces.append(txt)
449
- if pieces:
450
- return "\n\n".join(pieces)
451
- # try top-level text
452
- txt = getattr(response, "text", None)
453
- if txt:
454
- return txt
455
  except Exception:
456
  pass
457
  return None
458
- # --- end patched section ---
 
459
 
460
  if (st.session_state.get("busy") is False) and ('generate_now' in locals() and generate_now):
461
  if not st.session_state.get("videos"):
@@ -517,12 +455,13 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
517
 
518
  prompt_text = (analysis_prompt or default_prompt).strip()
519
  if st.session_state.get("fast_mode"):
520
- model_used = model_arg or "gemini-2.0-flash-lite"
521
  max_tokens = min(st.session_state.get("max_output_tokens", 512), 1024)
522
  else:
523
  model_used = model_arg
524
  max_tokens = st.session_state.get("max_output_tokens", 1024)
525
 
 
526
  system_msg = {"role": "system", "content": "You are a helpful assistant that summarizes videos concisely in vivid detail."}
527
  user_msg = {"role": "user", "content": prompt_text}
528
 
@@ -531,6 +470,7 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
531
 
532
  out = extract_text_from_response(response)
533
 
 
534
  meta = getattr(response, "metrics", None) or (response.get("metrics") if isinstance(response, dict) else None) or {}
535
  output_tokens = 0
536
  try:
@@ -541,6 +481,7 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
541
  except Exception:
542
  output_tokens = 0
543
 
 
544
  if (not out or output_tokens == 0) and model_used:
545
  retry_prompt = "Summarize the video content briefly and vividly (2-4 paragraphs)."
546
  try:
 
1
+ # streamlit_app.py
2
  import os
3
  import time
4
  import hashlib
 
13
 
14
  load_dotenv()
15
 
16
+ # Optional SDK import; we try to use it when available.
17
+ HAS_GENAI = False
18
+ genai = None
19
+ upload_file = None
20
+ get_file = None
21
+ delete_file = None
22
+ try:
23
+ import google.generativeai as genai_mod # type: ignore
24
+ genai = genai_mod
25
+ upload_file = getattr(genai_mod, "upload_file", None)
26
+ get_file = getattr(genai_mod, "get_file", None)
27
+ delete_file = getattr(genai_mod, "delete_file", None)
28
+ HAS_GENAI = True
29
+ except Exception:
30
+ HAS_GENAI = False
31
+
32
+ import requests
33
+ import json
34
+
35
  st.set_page_config(page_title="Generate the story of videos", layout="wide")
36
  DATA_DIR = Path("./data")
37
  DATA_DIR.mkdir(exist_ok=True)
 
126
  def file_name_or_id(file_obj):
127
  if not file_obj:
128
  return None
129
+ # simple handling for dict or object - return a plausible id/name string
130
  if isinstance(file_obj, dict):
131
  for key in ("name", "id", "fileId", "file_id", "uri", "url"):
132
  val = file_obj.get(key)
133
  if val:
134
+ return str(val)
 
 
 
 
 
 
 
 
 
 
 
 
135
  return None
136
  for attr in ("name", "id", "fileId", "file_id", "uri", "url"):
137
  val = getattr(file_obj, attr, None)
138
  if val:
139
+ return str(val)
 
 
 
 
 
 
 
 
140
  s = str(file_obj)
141
+ return s if s else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
  def upload_video_sdk(filepath: str):
144
  key = get_runtime_api_key()
 
195
 
196
  env_key = os.getenv("GOOGLE_API_KEY", "")
197
  API_KEY_INPUT = settings.text_input("Google API Key (one-time)", value="", type="password")
198
+ # Default model changed to text-bison@001 (broadly available). Replace if you have another.
199
+ model_input = settings.text_input("Model (short name)", "text-bison@001")
200
+ model_id = model_input.strip() or "text-bison@001"
201
+ # model_arg used with SDK; model_for_url used for HTTP
202
  model_arg = model_id if not model_id.startswith("models/") else model_id.split("/", 1)[1]
203
+ model_for_url_default = model_arg.split("/", 1)[0] if "@" not in model_arg else model_arg # keep @ if present
204
 
205
  default_prompt = (
206
  "You are an Indoor Human Behavior Analyzer. Watch the video and produce a detailed, evidence‑based behavioral report focused on human actions, "
 
279
  return key
280
  return os.getenv("GOOGLE_API_KEY", "").strip() or None
281
 
282
+ # ---- Simplified SDK-first + HTTP-fallback layer ----
283
+ def _normalize_model_for_http(model: str) -> str:
 
 
 
284
  if not model:
285
+ return "text-bison@001"
286
+ # if user provided "models/..." strip prefix
287
+ m = model.split("/", 1)[-1] if model.startswith("models/") else model
288
+ return m
289
 
290
+ def _messages_to_prompt(messages):
 
291
  if not messages:
292
  return ""
293
  parts = []
 
297
  parts.append(f"{role.upper()}:\n{content.strip()}\n")
298
  return "\n".join(parts)
299
 
300
+ def _http_generate(api_key: str, model: str, prompt: str, max_tokens: int):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  host = "https://generativelanguage.googleapis.com"
302
+ norm = _normalize_model_for_http(model)
303
  candidates = [
304
+ f"{host}/v1/models/{norm}:generate",
305
+ f"{host}/v1beta3/models/{norm}:generate",
306
+ f"{host}/v1beta2/models/{norm}:generate",
307
  ]
308
+ payload = {"prompt": {"text": prompt}, "maxOutputTokens": int(max_tokens or 512)}
 
 
309
  headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
310
  last_exc = None
 
311
  for url in candidates:
312
  try:
313
+ r = requests.post(url, json=payload, headers=headers, timeout=20)
314
  if r.status_code == 200:
315
  try:
316
  return r.json()
317
  except Exception:
318
  return {"text": r.text}
 
319
  last_exc = RuntimeError(f"HTTP {r.status_code}: {r.text}")
320
  except Exception as e:
321
  last_exc = e
322
+ raise RuntimeError(f"HTTP generate failed: {last_exc}; tried: {candidates}")
323
 
324
+ def responses_generate(model, messages, files, max_output_tokens, api_key):
325
+ if not api_key:
326
+ raise RuntimeError("No API key for responses_generate")
327
+ # Try SDK responses.generate when available and working
328
+ if HAS_GENAI and genai is not None:
329
+ try:
330
+ genai.configure(api_key=api_key)
331
+ responses_obj = getattr(genai, "responses", None)
332
+ if responses_obj is not None and hasattr(responses_obj, "generate"):
333
+ sdk_kwargs = {"model": model, "messages": messages, "max_output_tokens": int(max_output_tokens or 512)}
334
+ if files:
335
+ sdk_kwargs["files"] = files
336
+ return responses_obj.generate(**sdk_kwargs)
337
+ except Exception:
338
+ # fall through to HTTP fallback
339
+ pass
340
+ # HTTP fallback
341
+ prompt = _messages_to_prompt(messages)
342
+ return _http_generate(api_key, model, prompt, max_output_tokens)
343
 
344
  def call_responses_once(model_used, system_msg, user_msg, fname, max_tokens):
 
345
  messages = [system_msg, user_msg]
346
  files = [{"name": fname}] if fname else None
347
  for attempt in range(2):
348
  try:
349
  return responses_generate(model_used, messages, files, max_tokens, api_key=get_runtime_api_key())
350
+ except Exception as e:
351
  if attempt == 0:
352
  time.sleep(1.0)
353
  continue
354
  raise
355
 
 
356
  def extract_text_from_response(response):
357
+ if response is None:
358
+ return None
359
+ # dict-like (HTTP)
360
+ if isinstance(response, dict):
361
+ # try common shapes
362
+ if "candidates" in response and isinstance(response["candidates"], list) and response["candidates"]:
363
+ cand = response["candidates"][0]
364
+ return cand.get("content") or cand.get("text") or response.get("text")
365
+ if "output" in response and isinstance(response["output"], list):
366
+ pieces = []
367
+ for item in response["output"]:
368
+ if isinstance(item, dict):
369
+ c = item.get("content") or item.get("text")
370
+ if isinstance(c, str):
371
+ pieces.append(c)
372
+ if pieces:
373
+ return "\n\n".join(pieces)
374
+ if "text" in response and isinstance(response["text"], str):
375
+ return response["text"]
376
+ # fallback: join any candidate-like entries
377
+ return None
378
+ # object-like (SDK)
379
  try:
380
+ outputs = getattr(response, "output", None) or getattr(response, "candidates", None)
381
+ if outputs:
382
+ pieces = []
383
+ for item in outputs:
384
+ txt = getattr(item, "content", None) or getattr(item, "text", None)
385
+ if txt:
386
+ pieces.append(txt)
387
+ if pieces:
388
+ return "\n\n".join(pieces)
389
+ txt = getattr(response, "text", None)
390
+ if txt:
391
+ return txt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
  except Exception:
393
  pass
394
  return None
395
+
396
+ # ---- end compatibility layer ----
397
 
398
  if (st.session_state.get("busy") is False) and ('generate_now' in locals() and generate_now):
399
  if not st.session_state.get("videos"):
 
455
 
456
  prompt_text = (analysis_prompt or default_prompt).strip()
457
  if st.session_state.get("fast_mode"):
458
+ model_used = model_arg or "text-bison@001"
459
  max_tokens = min(st.session_state.get("max_output_tokens", 512), 1024)
460
  else:
461
  model_used = model_arg
462
  max_tokens = st.session_state.get("max_output_tokens", 1024)
463
 
464
+ # Ensure model_used is a short name (SDK accepts it; HTTP will normalize)
465
  system_msg = {"role": "system", "content": "You are a helpful assistant that summarizes videos concisely in vivid detail."}
466
  user_msg = {"role": "user", "content": prompt_text}
467
 
 
470
 
471
  out = extract_text_from_response(response)
472
 
473
+ # Try to read token info if present
474
  meta = getattr(response, "metrics", None) or (response.get("metrics") if isinstance(response, dict) else None) or {}
475
  output_tokens = 0
476
  try:
 
481
  except Exception:
482
  output_tokens = 0
483
 
484
+ # Retry strategies if no output
485
  if (not out or output_tokens == 0) and model_used:
486
  retry_prompt = "Summarize the video content briefly and vividly (2-4 paragraphs)."
487
  try: