Spaces:
Build error
Build error
CB commited on
Update streamlit_app.py
Browse files- streamlit_app.py +64 -23
streamlit_app.py
CHANGED
|
@@ -50,6 +50,7 @@ st.session_state.setdefault("fast_mode", False)
|
|
| 50 |
st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
|
| 51 |
st.session_state.setdefault("last_model", "")
|
| 52 |
st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
|
|
|
|
| 53 |
|
| 54 |
def sanitize_filename(path_str: str):
|
| 55 |
name = Path(path_str).name
|
|
@@ -93,7 +94,8 @@ def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) ->
|
|
| 93 |
if video_id:
|
| 94 |
matches = glob(os.path.join(save_dir, f"{video_id}.*"))
|
| 95 |
else:
|
| 96 |
-
|
|
|
|
| 97 |
if not matches:
|
| 98 |
raise FileNotFoundError("Downloaded video not found")
|
| 99 |
return convert_video_to_mp4(matches[0])
|
|
@@ -115,7 +117,6 @@ def configure_genai_if_needed():
|
|
| 115 |
try:
|
| 116 |
genai.configure(api_key=key)
|
| 117 |
except Exception:
|
| 118 |
-
# ignore here; callers will handle failures
|
| 119 |
pass
|
| 120 |
return True
|
| 121 |
|
|
@@ -150,8 +151,7 @@ def clear_all_video_state():
|
|
| 150 |
except Exception:
|
| 151 |
pass
|
| 152 |
|
| 153 |
-
|
| 154 |
-
st.session_state["last_url_value"] = st.session_state.get("url", "")
|
| 155 |
current_url = st.session_state.get("url", "")
|
| 156 |
if current_url != st.session_state.get("last_url_value"):
|
| 157 |
clear_all_video_state()
|
|
@@ -161,12 +161,10 @@ st.sidebar.header("Video Input")
|
|
| 161 |
st.sidebar.text_input("Video URL", key="url", placeholder="https://")
|
| 162 |
|
| 163 |
settings_exp = st.sidebar.expander("Settings", expanded=False)
|
| 164 |
-
model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.
|
| 165 |
-
# session API key widget (session-first, fallback to .env)
|
| 166 |
settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
|
| 167 |
default_prompt = (
|
| 168 |
-
"Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. "
|
| 169 |
-
"Keep language professional and avoid anatomically explicit or sensitive detail. Include a list of observations and any timestamps for notable events."
|
| 170 |
)
|
| 171 |
analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
|
| 172 |
settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
|
|
@@ -193,7 +191,6 @@ def upload_video_sdk(filepath: str):
|
|
| 193 |
if not HAS_GENAI or upload_file is None:
|
| 194 |
raise RuntimeError("google.generativeai SDK not available; cannot upload")
|
| 195 |
genai.configure(api_key=key)
|
| 196 |
-
# upload_file may stream; wrap to update session progress if supported
|
| 197 |
return upload_file(filepath)
|
| 198 |
|
| 199 |
def wait_for_processed(file_obj, timeout=180):
|
|
@@ -247,7 +244,10 @@ if st.sidebar.button("Load Video", use_container_width=True):
|
|
| 247 |
st.session_state["last_loaded_path"] = path
|
| 248 |
st.session_state.pop("uploaded_file", None)
|
| 249 |
st.session_state.pop("processed_file", None)
|
| 250 |
-
|
|
|
|
|
|
|
|
|
|
| 251 |
except Exception as e:
|
| 252 |
st.sidebar.error(f"Failed to load video: {e}")
|
| 253 |
|
|
@@ -271,16 +271,16 @@ if st.session_state["videos"]:
|
|
| 271 |
st.sidebar.error("Failed to prepare download")
|
| 272 |
|
| 273 |
st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
|
| 274 |
-
# show file size and compression suggestion
|
| 275 |
try:
|
| 276 |
file_size_mb = os.path.getsize(st.session_state["videos"]) / (1024 * 1024)
|
| 277 |
st.sidebar.caption(f"File size: {file_size_mb:.1f} MB")
|
| 278 |
if file_size_mb > 50 and not st.session_state.get("fast_mode", False):
|
| 279 |
st.sidebar.warning("Large file detected — consider enabling Fast mode or compression.", icon="⚠️")
|
| 280 |
-
|
| 281 |
except Exception:
|
| 282 |
pass
|
| 283 |
|
|
|
|
| 284 |
if generate_now and not st.session_state.get("busy"):
|
| 285 |
if not st.session_state.get("videos"):
|
| 286 |
st.error("No video loaded. Use 'Load Video' in the sidebar.")
|
|
@@ -293,14 +293,14 @@ if generate_now and not st.session_state.get("busy"):
|
|
| 293 |
st.session_state["busy"] = True
|
| 294 |
# ensure genai is configured now
|
| 295 |
try:
|
| 296 |
-
genai
|
|
|
|
| 297 |
except Exception:
|
| 298 |
pass
|
| 299 |
|
| 300 |
# recreate/clear agent if key or model changed
|
| 301 |
-
model_id = (st.session_state.get("model_input") or "gemini-2.
|
| 302 |
if st.session_state.get("last_model") != model_id:
|
| 303 |
-
# clear cached agent to rebuild with new model/key
|
| 304 |
st.session_state["last_model"] = ""
|
| 305 |
maybe_create_agent(model_id)
|
| 306 |
|
|
@@ -346,7 +346,7 @@ if generate_now and not st.session_state.get("busy"):
|
|
| 346 |
|
| 347 |
out = ""
|
| 348 |
if st.session_state.get("fast_mode"):
|
| 349 |
-
model_used = model_id if model_id else "gemini-2.
|
| 350 |
max_tokens = 512
|
| 351 |
else:
|
| 352 |
model_used = model_id
|
|
@@ -360,6 +360,9 @@ if generate_now and not st.session_state.get("busy"):
|
|
| 360 |
agent = maybe_create_agent(model_used)
|
| 361 |
if agent:
|
| 362 |
with st.spinner("Generating description via Agent..."):
|
|
|
|
|
|
|
|
|
|
| 363 |
response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
|
| 364 |
out = getattr(response, "content", None) or getattr(response, "outputText", None) or str(response)
|
| 365 |
else:
|
|
@@ -371,6 +374,7 @@ if generate_now and not st.session_state.get("busy"):
|
|
| 371 |
raise RuntimeError("Uploaded file missing name/id")
|
| 372 |
system_msg = {"role": "system", "content": prompt_text}
|
| 373 |
user_msg = {"role": "user", "content": "Please summarize the attached video."}
|
|
|
|
| 374 |
try:
|
| 375 |
response = genai.responses.generate(
|
| 376 |
model=model_used,
|
|
@@ -387,23 +391,59 @@ if generate_now and not st.session_state.get("busy"):
|
|
| 387 |
max_output_tokens=max_tokens,
|
| 388 |
)
|
| 389 |
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
|
| 394 |
text_pieces = []
|
| 395 |
-
for item in outputs
|
| 396 |
-
|
| 397 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
ctype = getattr(c, "type", None) or (c.get("type") if isinstance(c, dict) else None)
|
| 399 |
if ctype in ("output_text", "text") or ctype is None:
|
| 400 |
txt = getattr(c, "text", None) or (c.get("text") if isinstance(c, dict) else None)
|
| 401 |
if txt:
|
| 402 |
text_pieces.append(txt)
|
|
|
|
|
|
|
| 403 |
if not text_pieces:
|
| 404 |
top_text = getattr(response, "text", None) or (response.get("text") if isinstance(response, dict) else None)
|
| 405 |
if top_text:
|
| 406 |
text_pieces.append(top_text)
|
|
|
|
|
|
|
| 407 |
seen = set()
|
| 408 |
filtered = []
|
| 409 |
for t in text_pieces:
|
|
@@ -412,6 +452,7 @@ if generate_now and not st.session_state.get("busy"):
|
|
| 412 |
seen.add(t)
|
| 413 |
out = "\n\n".join(filtered)
|
| 414 |
|
|
|
|
| 415 |
if out:
|
| 416 |
out = remove_prompt_echo(prompt_text, out)
|
| 417 |
p = prompt_text
|
|
@@ -428,7 +469,7 @@ if generate_now and not st.session_state.get("busy"):
|
|
| 428 |
st.session_state["analysis_out"] = out
|
| 429 |
st.session_state["last_error"] = ""
|
| 430 |
st.subheader("Analysis Result")
|
| 431 |
-
st.markdown(out)
|
| 432 |
st.caption(est_cost_caption)
|
| 433 |
except Exception as e:
|
| 434 |
st.session_state["last_error"] = str(e)
|
|
|
|
| 50 |
st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
|
| 51 |
st.session_state.setdefault("last_model", "")
|
| 52 |
st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
|
| 53 |
+
st.session_state.setdefault("last_url_value", "")
|
| 54 |
|
| 55 |
def sanitize_filename(path_str: str):
|
| 56 |
name = Path(path_str).name
|
|
|
|
| 94 |
if video_id:
|
| 95 |
matches = glob(os.path.join(save_dir, f"{video_id}.*"))
|
| 96 |
else:
|
| 97 |
+
all_files = glob(os.path.join(save_dir, "*"))
|
| 98 |
+
matches = sorted(all_files, key=os.path.getmtime, reverse=True)[:1] if all_files else []
|
| 99 |
if not matches:
|
| 100 |
raise FileNotFoundError("Downloaded video not found")
|
| 101 |
return convert_video_to_mp4(matches[0])
|
|
|
|
| 117 |
try:
|
| 118 |
genai.configure(api_key=key)
|
| 119 |
except Exception:
|
|
|
|
| 120 |
pass
|
| 121 |
return True
|
| 122 |
|
|
|
|
| 151 |
except Exception:
|
| 152 |
pass
|
| 153 |
|
| 154 |
+
# track url changes
|
|
|
|
| 155 |
current_url = st.session_state.get("url", "")
|
| 156 |
if current_url != st.session_state.get("last_url_value"):
|
| 157 |
clear_all_video_state()
|
|
|
|
| 161 |
st.sidebar.text_input("Video URL", key="url", placeholder="https://")
|
| 162 |
|
| 163 |
settings_exp = st.sidebar.expander("Settings", expanded=False)
|
| 164 |
+
model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.5-flash-lite", key="model_input")
|
|
|
|
| 165 |
settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
|
| 166 |
default_prompt = (
|
| 167 |
+
"Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. Keep language professional. Include a list of observations for notable events."
|
|
|
|
| 168 |
)
|
| 169 |
analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
|
| 170 |
settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
|
|
|
|
| 191 |
if not HAS_GENAI or upload_file is None:
|
| 192 |
raise RuntimeError("google.generativeai SDK not available; cannot upload")
|
| 193 |
genai.configure(api_key=key)
|
|
|
|
| 194 |
return upload_file(filepath)
|
| 195 |
|
| 196 |
def wait_for_processed(file_obj, timeout=180):
|
|
|
|
| 244 |
st.session_state["last_loaded_path"] = path
|
| 245 |
st.session_state.pop("uploaded_file", None)
|
| 246 |
st.session_state.pop("processed_file", None)
|
| 247 |
+
try:
|
| 248 |
+
st.session_state["file_hash"] = file_sha256(path)
|
| 249 |
+
except Exception:
|
| 250 |
+
st.session_state["file_hash"] = None
|
| 251 |
except Exception as e:
|
| 252 |
st.sidebar.error(f"Failed to load video: {e}")
|
| 253 |
|
|
|
|
| 271 |
st.sidebar.error("Failed to prepare download")
|
| 272 |
|
| 273 |
st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
|
|
|
|
| 274 |
try:
|
| 275 |
file_size_mb = os.path.getsize(st.session_state["videos"]) / (1024 * 1024)
|
| 276 |
st.sidebar.caption(f"File size: {file_size_mb:.1f} MB")
|
| 277 |
if file_size_mb > 50 and not st.session_state.get("fast_mode", False):
|
| 278 |
st.sidebar.warning("Large file detected — consider enabling Fast mode or compression.", icon="⚠️")
|
| 279 |
+
# do not forcibly change user's fast_mode here; just recommend
|
| 280 |
except Exception:
|
| 281 |
pass
|
| 282 |
|
| 283 |
+
# --- Generation flow ---
|
| 284 |
if generate_now and not st.session_state.get("busy"):
|
| 285 |
if not st.session_state.get("videos"):
|
| 286 |
st.error("No video loaded. Use 'Load Video' in the sidebar.")
|
|
|
|
| 293 |
st.session_state["busy"] = True
|
| 294 |
# ensure genai is configured now
|
| 295 |
try:
|
| 296 |
+
if HAS_GENAI and genai is not None:
|
| 297 |
+
genai.configure(api_key=key_to_use)
|
| 298 |
except Exception:
|
| 299 |
pass
|
| 300 |
|
| 301 |
# recreate/clear agent if key or model changed
|
| 302 |
+
model_id = (st.session_state.get("model_input") or "gemini-2.5-flash-lite").strip()
|
| 303 |
if st.session_state.get("last_model") != model_id:
|
|
|
|
| 304 |
st.session_state["last_model"] = ""
|
| 305 |
maybe_create_agent(model_id)
|
| 306 |
|
|
|
|
| 346 |
|
| 347 |
out = ""
|
| 348 |
if st.session_state.get("fast_mode"):
|
| 349 |
+
model_used = model_id if model_id else "gemini-2.5-flash-lite"
|
| 350 |
max_tokens = 512
|
| 351 |
else:
|
| 352 |
model_used = model_id
|
|
|
|
| 360 |
agent = maybe_create_agent(model_used)
|
| 361 |
if agent:
|
| 362 |
with st.spinner("Generating description via Agent..."):
|
| 363 |
+
# guard processed for agent: must be non-none
|
| 364 |
+
if not processed:
|
| 365 |
+
raise RuntimeError("Processed file missing for agent generation")
|
| 366 |
response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
|
| 367 |
out = getattr(response, "content", None) or getattr(response, "outputText", None) or str(response)
|
| 368 |
else:
|
|
|
|
| 374 |
raise RuntimeError("Uploaded file missing name/id")
|
| 375 |
system_msg = {"role": "system", "content": prompt_text}
|
| 376 |
user_msg = {"role": "user", "content": "Please summarize the attached video."}
|
| 377 |
+
# call responses.generate, handling both signature variants and return shapes
|
| 378 |
try:
|
| 379 |
response = genai.responses.generate(
|
| 380 |
model=model_used,
|
|
|
|
| 391 |
max_output_tokens=max_tokens,
|
| 392 |
)
|
| 393 |
|
| 394 |
+
# Normalize outputs to a list of items with .content or dict form
|
| 395 |
+
outputs = []
|
| 396 |
+
if response is None:
|
| 397 |
+
outputs = []
|
| 398 |
+
else:
|
| 399 |
+
outputs = getattr(response, "output", None) or (response.get("output") if isinstance(response, dict) else None) or []
|
| 400 |
+
# Newer API may use 'candidates' or 'items' -- check safely
|
| 401 |
+
if not outputs:
|
| 402 |
+
# check common alternative keys
|
| 403 |
+
if isinstance(response, dict):
|
| 404 |
+
if "candidates" in response and isinstance(response["candidates"], list):
|
| 405 |
+
outputs = response["candidates"]
|
| 406 |
+
elif "items" in response and isinstance(response["items"], list):
|
| 407 |
+
outputs = response["items"]
|
| 408 |
+
elif "output" in response and isinstance(response["output"], list):
|
| 409 |
+
outputs = response["output"]
|
| 410 |
+
else:
|
| 411 |
+
# attempt attribute access fallbacks
|
| 412 |
+
outputs = getattr(response, "candidates", None) or getattr(response, "items", None) or getattr(response, "output", None) or []
|
| 413 |
+
# ensure outputs is a list
|
| 414 |
+
if outputs is None:
|
| 415 |
+
outputs = []
|
| 416 |
|
| 417 |
text_pieces = []
|
| 418 |
+
for item in outputs:
|
| 419 |
+
# item may be dict or object
|
| 420 |
+
contents = getattr(item, "content", None) or (item.get("content") if isinstance(item, dict) else None)
|
| 421 |
+
# some shapes put text directly on item
|
| 422 |
+
if contents is None:
|
| 423 |
+
# fallback to checking item text or string
|
| 424 |
+
txt_direct = getattr(item, "text", None) or (item.get("text") if isinstance(item, dict) else None)
|
| 425 |
+
if txt_direct:
|
| 426 |
+
text_pieces.append(txt_direct)
|
| 427 |
+
continue
|
| 428 |
+
# contents might be list or single dict/object
|
| 429 |
+
if isinstance(contents, (list, tuple)):
|
| 430 |
+
content_iter = contents
|
| 431 |
+
else:
|
| 432 |
+
content_iter = [contents]
|
| 433 |
+
for c in content_iter:
|
| 434 |
ctype = getattr(c, "type", None) or (c.get("type") if isinstance(c, dict) else None)
|
| 435 |
if ctype in ("output_text", "text") or ctype is None:
|
| 436 |
txt = getattr(c, "text", None) or (c.get("text") if isinstance(c, dict) else None)
|
| 437 |
if txt:
|
| 438 |
text_pieces.append(txt)
|
| 439 |
+
|
| 440 |
+
# final fallback: top-level text fields
|
| 441 |
if not text_pieces:
|
| 442 |
top_text = getattr(response, "text", None) or (response.get("text") if isinstance(response, dict) else None)
|
| 443 |
if top_text:
|
| 444 |
text_pieces.append(top_text)
|
| 445 |
+
|
| 446 |
+
# dedupe while preserving order
|
| 447 |
seen = set()
|
| 448 |
filtered = []
|
| 449 |
for t in text_pieces:
|
|
|
|
| 452 |
seen.add(t)
|
| 453 |
out = "\n\n".join(filtered)
|
| 454 |
|
| 455 |
+
# post-process output to remove prompt echo or placeholders
|
| 456 |
if out:
|
| 457 |
out = remove_prompt_echo(prompt_text, out)
|
| 458 |
p = prompt_text
|
|
|
|
| 469 |
st.session_state["analysis_out"] = out
|
| 470 |
st.session_state["last_error"] = ""
|
| 471 |
st.subheader("Analysis Result")
|
| 472 |
+
st.markdown(out if out else "No analysis returned.")
|
| 473 |
st.caption(est_cost_caption)
|
| 474 |
except Exception as e:
|
| 475 |
st.session_state["last_error"] = str(e)
|