Spaces:

Hug0endob
/

Video-Analysis

Build error

App Files Files Community

CB commited on Sep 13, 2025

Commit

eb05733

verified ·

1 Parent(s): 18c6ab8

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +64 -23

streamlit_app.py CHANGED Viewed

@@ -50,6 +50,7 @@ st.session_state.setdefault("fast_mode", False)
 st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
 st.session_state.setdefault("last_model", "")
 st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
 def sanitize_filename(path_str: str):
     name = Path(path_str).name
@@ -93,7 +94,8 @@ def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) ->
     if video_id:
         matches = glob(os.path.join(save_dir, f"{video_id}.*"))
     else:
-        matches = sorted(glob(os.path.join(save_dir, "*")), key=os.path.getmtime, reverse=True)[:1]
     if not matches:
         raise FileNotFoundError("Downloaded video not found")
     return convert_video_to_mp4(matches[0])
@@ -115,7 +117,6 @@ def configure_genai_if_needed():
     try:
         genai.configure(api_key=key)
     except Exception:
-        # ignore here; callers will handle failures
         pass
     return True
@@ -150,8 +151,7 @@ def clear_all_video_state():
         except Exception:
             pass
-if "last_url_value" not in st.session_state:
-    st.session_state["last_url_value"] = st.session_state.get("url", "")
 current_url = st.session_state.get("url", "")
 if current_url != st.session_state.get("last_url_value"):
     clear_all_video_state()
@@ -161,12 +161,10 @@ st.sidebar.header("Video Input")
 st.sidebar.text_input("Video URL", key="url", placeholder="https://")
 settings_exp = st.sidebar.expander("Settings", expanded=False)
-model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite", key="model_input")
-# session API key widget (session-first, fallback to .env)
 settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
 default_prompt = (
-    "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. "
-    "Keep language professional and avoid anatomically explicit or sensitive detail. Include a list of observations and any timestamps for notable events."
 )
 analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
 settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
@@ -193,7 +191,6 @@ def upload_video_sdk(filepath: str):
     if not HAS_GENAI or upload_file is None:
         raise RuntimeError("google.generativeai SDK not available; cannot upload")
     genai.configure(api_key=key)
-    # upload_file may stream; wrap to update session progress if supported
     return upload_file(filepath)
 def wait_for_processed(file_obj, timeout=180):
@@ -247,7 +244,10 @@ if st.sidebar.button("Load Video", use_container_width=True):
         st.session_state["last_loaded_path"] = path
         st.session_state.pop("uploaded_file", None)
         st.session_state.pop("processed_file", None)
-        st.session_state["file_hash"] = file_sha256(path)
     except Exception as e:
         st.sidebar.error(f"Failed to load video: {e}")
@@ -271,16 +271,16 @@ if st.session_state["videos"]:
             st.sidebar.error("Failed to prepare download")
     st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
-    # show file size and compression suggestion
     try:
         file_size_mb = os.path.getsize(st.session_state["videos"]) / (1024 * 1024)
         st.sidebar.caption(f"File size: {file_size_mb:.1f} MB")
         if file_size_mb > 50 and not st.session_state.get("fast_mode", False):
             st.sidebar.warning("Large file detected — consider enabling Fast mode or compression.", icon="⚠️")
-            st.session_state["fast_mode"] = True
     except Exception:
         pass
 if generate_now and not st.session_state.get("busy"):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
@@ -293,14 +293,14 @@ if generate_now and not st.session_state.get("busy"):
                 st.session_state["busy"] = True
                 # ensure genai is configured now
                 try:
-                    genai.configure(api_key=key_to_use)
                 except Exception:
                     pass
                 # recreate/clear agent if key or model changed
-                model_id = (st.session_state.get("model_input") or "gemini-2.0-flash-lite").strip()
                 if st.session_state.get("last_model") != model_id:
-                    # clear cached agent to rebuild with new model/key
                     st.session_state["last_model"] = ""
                 maybe_create_agent(model_id)
@@ -346,7 +346,7 @@ if generate_now and not st.session_state.get("busy"):
                 out = ""
                 if st.session_state.get("fast_mode"):
-                    model_used = model_id if model_id else "gemini-2.0-flash-lite"
                     max_tokens = 512
                 else:
                     model_used = model_id
@@ -360,6 +360,9 @@ if generate_now and not st.session_state.get("busy"):
                 agent = maybe_create_agent(model_used)
                 if agent:
                     with st.spinner("Generating description via Agent..."):
                         response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
                         out = getattr(response, "content", None) or getattr(response, "outputText", None) or str(response)
                 else:
@@ -371,6 +374,7 @@ if generate_now and not st.session_state.get("busy"):
                         raise RuntimeError("Uploaded file missing name/id")
                     system_msg = {"role": "system", "content": prompt_text}
                     user_msg = {"role": "user", "content": "Please summarize the attached video."}
                     try:
                         response = genai.responses.generate(
                             model=model_used,
@@ -387,23 +391,59 @@ if generate_now and not st.session_state.get("busy"):
                             max_output_tokens=max_tokens,
                         )
-                    outputs = getattr(response, "output", None) or (response.get("output") if isinstance(response, dict) else None) or []
-                    if not outputs and isinstance(response, dict):
-                        outputs = response.get("output", [])
                     text_pieces = []
-                    for item in outputs or []:
-                        contents = getattr(item, "content", None) or (item.get("content") if isinstance(item, dict) else None) or []
-                        for c in contents:
                             ctype = getattr(c, "type", None) or (c.get("type") if isinstance(c, dict) else None)
                             if ctype in ("output_text", "text") or ctype is None:
                                 txt = getattr(c, "text", None) or (c.get("text") if isinstance(c, dict) else None)
                                 if txt:
                                     text_pieces.append(txt)
                     if not text_pieces:
                         top_text = getattr(response, "text", None) or (response.get("text") if isinstance(response, dict) else None)
                         if top_text:
                             text_pieces.append(top_text)
                     seen = set()
                     filtered = []
                     for t in text_pieces:
@@ -412,6 +452,7 @@ if generate_now and not st.session_state.get("busy"):
                             seen.add(t)
                     out = "\n\n".join(filtered)
                 if out:
                     out = remove_prompt_echo(prompt_text, out)
                     p = prompt_text
@@ -428,7 +469,7 @@ if generate_now and not st.session_state.get("busy"):
                 st.session_state["analysis_out"] = out
                 st.session_state["last_error"] = ""
                 st.subheader("Analysis Result")
-                st.markdown(out)
                 st.caption(est_cost_caption)
             except Exception as e:
                 st.session_state["last_error"] = str(e)

 st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
 st.session_state.setdefault("last_model", "")
 st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
+st.session_state.setdefault("last_url_value", "")
 def sanitize_filename(path_str: str):
     name = Path(path_str).name
     if video_id:
         matches = glob(os.path.join(save_dir, f"{video_id}.*"))
     else:
+        all_files = glob(os.path.join(save_dir, "*"))
+        matches = sorted(all_files, key=os.path.getmtime, reverse=True)[:1] if all_files else []
     if not matches:
         raise FileNotFoundError("Downloaded video not found")
     return convert_video_to_mp4(matches[0])
     try:
         genai.configure(api_key=key)
     except Exception:
         pass
     return True
         except Exception:
             pass
+# track url changes
 current_url = st.session_state.get("url", "")
 if current_url != st.session_state.get("last_url_value"):
     clear_all_video_state()
 st.sidebar.text_input("Video URL", key="url", placeholder="https://")
 settings_exp = st.sidebar.expander("Settings", expanded=False)
+model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.5-flash-lite", key="model_input")
 settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
 default_prompt = (
+    "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. Keep language professional. Include a list of observations for notable events."
 )
 analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
 settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
     if not HAS_GENAI or upload_file is None:
         raise RuntimeError("google.generativeai SDK not available; cannot upload")
     genai.configure(api_key=key)
     return upload_file(filepath)
 def wait_for_processed(file_obj, timeout=180):
         st.session_state["last_loaded_path"] = path
         st.session_state.pop("uploaded_file", None)
         st.session_state.pop("processed_file", None)
+        try:
+            st.session_state["file_hash"] = file_sha256(path)
+        except Exception:
+            st.session_state["file_hash"] = None
     except Exception as e:
         st.sidebar.error(f"Failed to load video: {e}")
             st.sidebar.error("Failed to prepare download")
     st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
     try:
         file_size_mb = os.path.getsize(st.session_state["videos"]) / (1024 * 1024)
         st.sidebar.caption(f"File size: {file_size_mb:.1f} MB")
         if file_size_mb > 50 and not st.session_state.get("fast_mode", False):
             st.sidebar.warning("Large file detected — consider enabling Fast mode or compression.", icon="⚠️")
+            # do not forcibly change user's fast_mode here; just recommend
     except Exception:
         pass
+# --- Generation flow ---
 if generate_now and not st.session_state.get("busy"):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
                 st.session_state["busy"] = True
                 # ensure genai is configured now
                 try:
+                    if HAS_GENAI and genai is not None:
+                        genai.configure(api_key=key_to_use)
                 except Exception:
                     pass
                 # recreate/clear agent if key or model changed
+                model_id = (st.session_state.get("model_input") or "gemini-2.5-flash-lite").strip()
                 if st.session_state.get("last_model") != model_id:
                     st.session_state["last_model"] = ""
                 maybe_create_agent(model_id)
                 out = ""
                 if st.session_state.get("fast_mode"):
+                    model_used = model_id if model_id else "gemini-2.5-flash-lite"
                     max_tokens = 512
                 else:
                     model_used = model_id
                 agent = maybe_create_agent(model_used)
                 if agent:
                     with st.spinner("Generating description via Agent..."):
+                        # guard processed for agent: must be non-none
+                        if not processed:
+                            raise RuntimeError("Processed file missing for agent generation")
                         response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
                         out = getattr(response, "content", None) or getattr(response, "outputText", None) or str(response)
                 else:
                         raise RuntimeError("Uploaded file missing name/id")
                     system_msg = {"role": "system", "content": prompt_text}
                     user_msg = {"role": "user", "content": "Please summarize the attached video."}
+                    # call responses.generate, handling both signature variants and return shapes
                     try:
                         response = genai.responses.generate(
                             model=model_used,
                             max_output_tokens=max_tokens,
                         )
+                    # Normalize outputs to a list of items with .content or dict form
+                    outputs = []
+                    if response is None:
+                        outputs = []
+                    else:
+                        outputs = getattr(response, "output", None) or (response.get("output") if isinstance(response, dict) else None) or []
+                        # Newer API may use 'candidates' or 'items' -- check safely
+                        if not outputs:
+                            # check common alternative keys
+                            if isinstance(response, dict):
+                                if "candidates" in response and isinstance(response["candidates"], list):
+                                    outputs = response["candidates"]
+                                elif "items" in response and isinstance(response["items"], list):
+                                    outputs = response["items"]
+                                elif "output" in response and isinstance(response["output"], list):
+                                    outputs = response["output"]
+                            else:
+                                # attempt attribute access fallbacks
+                                outputs = getattr(response, "candidates", None) or getattr(response, "items", None) or getattr(response, "output", None) or []
+                    # ensure outputs is a list
+                    if outputs is None:
+                        outputs = []
                     text_pieces = []
+                    for item in outputs:
+                        # item may be dict or object
+                        contents = getattr(item, "content", None) or (item.get("content") if isinstance(item, dict) else None)
+                        # some shapes put text directly on item
+                        if contents is None:
+                            # fallback to checking item text or string
+                            txt_direct = getattr(item, "text", None) or (item.get("text") if isinstance(item, dict) else None)
+                            if txt_direct:
+                                text_pieces.append(txt_direct)
+                            continue
+                        # contents might be list or single dict/object
+                        if isinstance(contents, (list, tuple)):
+                            content_iter = contents
+                        else:
+                            content_iter = [contents]
+                        for c in content_iter:
                             ctype = getattr(c, "type", None) or (c.get("type") if isinstance(c, dict) else None)
                             if ctype in ("output_text", "text") or ctype is None:
                                 txt = getattr(c, "text", None) or (c.get("text") if isinstance(c, dict) else None)
                                 if txt:
                                     text_pieces.append(txt)
+                    # final fallback: top-level text fields
                     if not text_pieces:
                         top_text = getattr(response, "text", None) or (response.get("text") if isinstance(response, dict) else None)
                         if top_text:
                             text_pieces.append(top_text)
+                    # dedupe while preserving order
                     seen = set()
                     filtered = []
                     for t in text_pieces:
                             seen.add(t)
                     out = "\n\n".join(filtered)
+                # post-process output to remove prompt echo or placeholders
                 if out:
                     out = remove_prompt_echo(prompt_text, out)
                     p = prompt_text
                 st.session_state["analysis_out"] = out
                 st.session_state["last_error"] = ""
                 st.subheader("Analysis Result")
+                st.markdown(out if out else "No analysis returned.")
                 st.caption(est_cost_caption)
             except Exception as e:
                 st.session_state["last_error"] = str(e)