CB commited on
Commit
d5c14ed
·
verified ·
1 Parent(s): 03539b5

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +50 -61
streamlit_app.py CHANGED
@@ -13,7 +13,7 @@ from dotenv import load_dotenv
13
 
14
  load_dotenv()
15
 
16
- # Optional SDK import; we try to use it when available.
17
  HAS_GENAI = False
18
  genai = None
19
  upload_file = None
@@ -126,7 +126,6 @@ def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) ->
126
  def file_name_or_id(file_obj):
127
  if not file_obj:
128
  return None
129
- # simple handling for dict or object - return a plausible id/name string
130
  if isinstance(file_obj, dict):
131
  for key in ("name", "id", "fileId", "file_id", "uri", "url"):
132
  val = file_obj.get(key)
@@ -195,12 +194,9 @@ settings = st.sidebar.expander("Settings", expanded=False)
195
 
196
  env_key = os.getenv("GOOGLE_API_KEY", "")
197
  API_KEY_INPUT = settings.text_input("Google API Key (one-time)", value="", type="password")
198
- # Default model changed to text-bison@001 (broadly available). Replace if you have another.
199
  model_input = settings.text_input("Model (short name)", "text-bison@001")
200
  model_id = model_input.strip() or "text-bison@001"
201
- # model_arg used with SDK; keep as short name like "text-bison@001"
202
  model_arg = model_id
203
- model_for_url_default = model_arg
204
 
205
  default_prompt = (
206
  "You are an Indoor Human Behavior Analyzer. Watch the video and produce a detailed, evidence‑based behavioral report focused on human actions, "
@@ -279,20 +275,7 @@ def get_runtime_api_key():
279
  return key
280
  return os.getenv("GOOGLE_API_KEY", "").strip() or None
281
 
282
- # ---- Simplified SDK-first + HTTP-fallback layer ----
283
- def _normalize_model_for_http(model: str) -> str:
284
- """
285
- Return a short model name appropriate for the HTTP path,
286
- e.g. "text-bison@001" (no "models/" prefix).
287
- """
288
- if not model:
289
- return "text-bison@001"
290
- m = model.strip()
291
- # If user mistakenly provided "models/..." strip the prefix for HTTP path
292
- if m.startswith("models/"):
293
- return m.split("/", 1)[1]
294
- return m
295
-
296
  def _messages_to_prompt(messages):
297
  if not messages:
298
  return ""
@@ -303,34 +286,23 @@ def _messages_to_prompt(messages):
303
  parts.append(f"{role.upper()}:\n{content.strip()}\n")
304
  return "\n".join(parts)
305
 
306
- def _http_generate(api_key: str, model: str, prompt: str, max_tokens: int):
307
- host = "https://generativelanguage.googleapis.com"
308
- norm = _normalize_model_for_http(model)
309
- candidates = [
310
- f"{host}/v1/models/{norm}:generate",
311
- f"{host}/v1beta3/models/{norm}:generate",
312
- f"{host}/v1beta2/models/{norm}:generate",
313
- ]
314
- payload = {"prompt": {"text": prompt}, "maxOutputTokens": int(max_tokens or 512)}
315
  headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
316
- last_exc = None
317
- for url in candidates:
318
- try:
319
- r = requests.post(url, json=payload, headers=headers, timeout=20)
320
- if r.status_code == 200:
321
- try:
322
- return r.json()
323
- except Exception:
324
- return {"text": r.text}
325
- last_exc = RuntimeError(f"HTTP {r.status_code}: {r.text}")
326
- except Exception as e:
327
- last_exc = e
328
- raise RuntimeError(f"HTTP generate failed: {last_exc}; tried: {candidates}")
329
 
330
  def responses_generate(model, messages, files, max_output_tokens, api_key):
331
  if not api_key:
332
  raise RuntimeError("No API key for responses_generate")
333
- # Try SDK responses.generate when available and working
334
  if HAS_GENAI and genai is not None:
335
  try:
336
  genai.configure(api_key=api_key)
@@ -342,9 +314,9 @@ def responses_generate(model, messages, files, max_output_tokens, api_key):
342
  return responses_obj.generate(**sdk_kwargs)
343
  except Exception:
344
  pass
345
- # HTTP fallback
346
  prompt = _messages_to_prompt(messages)
347
- return _http_generate(api_key, model, prompt, max_output_tokens)
348
 
349
  def call_responses_once(model_used, system_msg, user_msg, fname, max_tokens):
350
  messages = [system_msg, user_msg]
@@ -361,35 +333,53 @@ def call_responses_once(model_used, system_msg, user_msg, fname, max_tokens):
361
  def extract_text_from_response(response):
362
  if response is None:
363
  return None
 
364
  if isinstance(response, dict):
365
- if "candidates" in response and isinstance(response["candidates"], list) and response["candidates"]:
366
- cand = response["candidates"][0]
367
- if isinstance(cand, dict):
368
- return cand.get("content") or cand.get("text") or response.get("text")
369
  if "output" in response and isinstance(response["output"], list):
370
  pieces = []
371
  for item in response["output"]:
372
  if isinstance(item, dict):
373
- c = item.get("content") or item.get("text")
374
- if isinstance(c, str):
375
- pieces.append(c)
 
 
 
 
 
 
376
  if pieces:
377
  return "\n\n".join(pieces)
378
- if "text" in response and isinstance(response["text"], str):
379
- return response["text"]
 
 
 
380
  if "outputText" in response and isinstance(response["outputText"], str):
381
  return response["outputText"]
382
- for k in ("result", "generated_text", "description"):
383
- if k in response and isinstance(response[k], str):
384
- return response[k]
385
- return None
 
386
  try:
387
  outputs = getattr(response, "output", None) or getattr(response, "candidates", None)
388
  if outputs:
389
  pieces = []
390
  for item in outputs:
391
- txt = getattr(item, "content", None) or getattr(item, "text", None)
392
- if txt:
 
 
 
 
 
 
 
 
 
 
393
  pieces.append(txt)
394
  if pieces:
395
  return "\n\n".join(pieces)
@@ -400,8 +390,7 @@ def extract_text_from_response(response):
400
  pass
401
  return None
402
 
403
- # ---- end compatibility layer ----
404
-
405
  if (st.session_state.get("busy") is False) and ('generate_now' in locals() and generate_now):
406
  if not st.session_state.get("videos"):
407
  st.error("No video loaded. Use 'Load Video' in the sidebar.")
 
13
 
14
  load_dotenv()
15
 
16
+ # Try SDK import
17
  HAS_GENAI = False
18
  genai = None
19
  upload_file = None
 
126
  def file_name_or_id(file_obj):
127
  if not file_obj:
128
  return None
 
129
  if isinstance(file_obj, dict):
130
  for key in ("name", "id", "fileId", "file_id", "uri", "url"):
131
  val = file_obj.get(key)
 
194
 
195
  env_key = os.getenv("GOOGLE_API_KEY", "")
196
  API_KEY_INPUT = settings.text_input("Google API Key (one-time)", value="", type="password")
 
197
  model_input = settings.text_input("Model (short name)", "text-bison@001")
198
  model_id = model_input.strip() or "text-bison@001"
 
199
  model_arg = model_id
 
200
 
201
  default_prompt = (
202
  "You are an Indoor Human Behavior Analyzer. Watch the video and produce a detailed, evidence‑based behavioral report focused on human actions, "
 
275
  return key
276
  return os.getenv("GOOGLE_API_KEY", "").strip() or None
277
 
278
+ # --- Compatibility layer: SDK-first, HTTP fallback using /v1/responses ---
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  def _messages_to_prompt(messages):
280
  if not messages:
281
  return ""
 
286
  parts.append(f"{role.upper()}:\n{content.strip()}\n")
287
  return "\n".join(parts)
288
 
289
+ def _http_generate_responses(api_key: str, model: str, prompt: str, max_tokens: int):
290
+ url = "https://generativelanguage.googleapis.com/v1/responses"
 
 
 
 
 
 
 
291
  headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
292
+ payload = {
293
+ "model": model or "text-bison@001",
294
+ "input": prompt,
295
+ "maxOutputTokens": int(max_tokens or 512),
296
+ }
297
+ r = requests.post(url, json=payload, headers=headers, timeout=30)
298
+ if r.status_code != 200:
299
+ raise RuntimeError(f"HTTP {r.status_code}: {r.text}")
300
+ return r.json()
 
 
 
 
301
 
302
  def responses_generate(model, messages, files, max_output_tokens, api_key):
303
  if not api_key:
304
  raise RuntimeError("No API key for responses_generate")
305
+ # SDK path
306
  if HAS_GENAI and genai is not None:
307
  try:
308
  genai.configure(api_key=api_key)
 
314
  return responses_obj.generate(**sdk_kwargs)
315
  except Exception:
316
  pass
317
+ # HTTP fallback (Responses API)
318
  prompt = _messages_to_prompt(messages)
319
+ return _http_generate_responses(api_key, model, prompt, max_output_tokens)
320
 
321
  def call_responses_once(model_used, system_msg, user_msg, fname, max_tokens):
322
  messages = [system_msg, user_msg]
 
333
  def extract_text_from_response(response):
334
  if response is None:
335
  return None
336
+ # dict-style
337
  if isinstance(response, dict):
338
+ # Responses v1: look for "output" or "candidates" or "outputText"
 
 
 
339
  if "output" in response and isinstance(response["output"], list):
340
  pieces = []
341
  for item in response["output"]:
342
  if isinstance(item, dict):
343
+ # new Responses API nested content sometimes in "content" list with dicts
344
+ if "content" in item and isinstance(item["content"], list):
345
+ for c in item["content"]:
346
+ if isinstance(c, dict) and "text" in c:
347
+ pieces.append(c["text"])
348
+ else:
349
+ c = item.get("content") or item.get("text")
350
+ if isinstance(c, str):
351
+ pieces.append(c)
352
  if pieces:
353
  return "\n\n".join(pieces)
354
+ # older style candidates
355
+ if "candidates" in response and isinstance(response["candidates"], list) and response["candidates"]:
356
+ cand = response["candidates"][0]
357
+ if isinstance(cand, dict):
358
+ return cand.get("content") or cand.get("text") or response.get("text")
359
  if "outputText" in response and isinstance(response["outputText"], str):
360
  return response["outputText"]
361
+ if "text" in response and isinstance(response["text"], str):
362
+ return response["text"]
363
+ # fallback: stringified body
364
+ return json.dumps(response)[:16000]
365
+ # object-style (SDK)
366
  try:
367
  outputs = getattr(response, "output", None) or getattr(response, "candidates", None)
368
  if outputs:
369
  pieces = []
370
  for item in outputs:
371
+ txt = None
372
+ if hasattr(item, "content"):
373
+ txt = getattr(item, "content")
374
+ if isinstance(txt, list):
375
+ # SDK content lists may contain dicts with 'text'
376
+ for c in txt:
377
+ if isinstance(c, dict) and "text" in c:
378
+ pieces.append(c["text"])
379
+ elif isinstance(txt, str):
380
+ pieces.append(txt)
381
+ txt = txt or getattr(item, "text", None)
382
+ if isinstance(txt, str):
383
  pieces.append(txt)
384
  if pieces:
385
  return "\n\n".join(pieces)
 
390
  pass
391
  return None
392
 
393
+ # ---- Main generation flow ----
 
394
  if (st.session_state.get("busy") is False) and ('generate_now' in locals() and generate_now):
395
  if not st.session_state.get("videos"):
396
  st.error("No video loaded. Use 'Load Video' in the sidebar.")