Spaces:

Hug0endob
/

Video-Analysis

Build error

App Files Files Community

CB commited on Sep 13, 2025

Commit

a43f860

verified ·

1 Parent(s): 0b25ca5

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +48 -95

streamlit_app.py CHANGED Viewed

@@ -15,7 +15,6 @@ from dotenv import load_dotenv
 load_dotenv()
-# Optional phi/GenAI imports — gracefully degrade if not present
 try:
     from phi.agent import Agent
     from phi.model.google import Gemini
@@ -38,7 +37,6 @@ st.set_page_config(page_title="Generate the story of videos", layout="wide")
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
-# Session defaults
 st.session_state.setdefault("videos", "")
 st.session_state.setdefault("loop_video", False)
 st.session_state.setdefault("uploaded_file", None)
@@ -51,12 +49,10 @@ st.session_state.setdefault("file_hash", None)
 st.session_state.setdefault("fast_mode", False)
 st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
 st.session_state.setdefault("last_model", "")
-st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
 st.session_state.setdefault("last_url_value", "")
 def sanitize_filename(path_str: str):
-    name = Path(path_str).name
-    return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
 def file_sha256(path: str, block_size: int = 65536) -> str:
     h = hashlib.sha256()
@@ -107,27 +103,24 @@ def file_name_or_id(file_obj):
         return None
     if isinstance(file_obj, dict):
         return file_obj.get("name") or file_obj.get("id")
-    # object-like
-    for attr in ("name", "id", "fileId", "file_id", "file_id"):
         if hasattr(file_obj, attr):
             val = getattr(file_obj, attr)
             if val:
                 return val
-    # fallback to string
     return str(file_obj)
 def get_effective_api_key():
     return st.session_state.get("api_key") or os.getenv("GOOGLE_API_KEY")
-def configure_genai_if_needed():
-    key = get_effective_api_key()
-    if not key:
         return False
     try:
         genai.configure(api_key=key)
     except Exception:
-        pass
-    return True
 _agent = None
 def maybe_create_agent(model_id: str):
@@ -160,7 +153,6 @@ def clear_all_video_state():
         except Exception:
             pass
-# track url changes
 current_url = st.session_state.get("url", "")
 if current_url != st.session_state.get("last_url_value"):
     clear_all_video_state()
@@ -170,16 +162,15 @@ st.sidebar.header("Video Input")
 st.sidebar.text_input("Video URL", key="url", placeholder="https://")
 settings_exp = st.sidebar.expander("Settings", expanded=False)
-model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.5-flash-lite", key="model_input")
 settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
 default_prompt = (
     "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. Keep language professional. Include a list of observations for notable events."
 )
-analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
 settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
 settings_exp.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
-# Show which key is active
 key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
 settings_exp.caption(f"Using API key from: **{key_source}**")
@@ -214,7 +205,6 @@ def wait_for_processed(file_obj, timeout=180):
         try:
             obj = get_file(name)
         except Exception:
-            # if the SDK fails, return original object
             return file_obj
         state = getattr(obj, "state", None)
         if not state or getattr(state, "name", None) != "PROCESSING":
@@ -295,7 +285,6 @@ if st.session_state["videos"]:
     except Exception:
         pass
-# --- Generation flow ---
 if generate_now and not st.session_state.get("busy"):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
@@ -306,11 +295,7 @@ if generate_now and not st.session_state.get("busy"):
         else:
             try:
                 st.session_state["busy"] = True
-                try:
-                    if HAS_GENAI and genai is not None:
-                        genai.configure(api_key=key_to_use)
-                except Exception:
-                    pass
                 model_id = (st.session_state.get("model_input") or "gemini-2.5-flash-lite").strip()
                 if st.session_state.get("last_model") != model_id:
@@ -355,59 +340,35 @@ if generate_now and not st.session_state.get("busy"):
                         st.session_state["last_loaded_path"] = current_path
                         st.session_state["file_hash"] = current_hash
-                prompt_text = (analysis_prompt.strip() or default_prompt).strip()
                 out = ""
-                if st.session_state.get("fast_mode"):
-                    model_used = model_id if model_id else "gemini-2.5-flash-lite"
-                    max_tokens = 512
-                else:
-                    model_used = model_id
-                    max_tokens = 1024
                 est_tokens = max_tokens
-                est_cost_caption = f"Est. max tokens: {est_tokens}"
-                # First try Agent, but guard and FALLBACK to direct genai responses if Agent fails or returns empty.
                 agent = maybe_create_agent(model_used)
-                debug_info = {"agent_attempted": False, "agent_ok": False, "agent_error": None, "agent_response_has_text": False}
                 if agent:
                     debug_info["agent_attempted"] = True
                     try:
-                        with st.spinner("Generating description via Agent..."):
                             if not processed:
                                 raise RuntimeError("Processed file missing for agent generation")
-                            # call agent.run inside try/except to catch library IndexError
                             agent_response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
-                            # Try to extract text from common attributes; be defensive
-                            agent_text = getattr(agent_response, "content", None) or getattr(agent_response, "outputText", None) or None
-                            if not agent_text:
-                                # try dict-like access
-                                try:
-                                    if isinstance(agent_response, dict):
-                                        # check common keys
-                                        for k in ("content", "outputText", "text"):
-                                            if k in agent_response and agent_response[k]:
-                                                agent_text = agent_response[k]
-                                                break
-                                except Exception:
-                                    pass
                             if agent_text and str(agent_text).strip():
                                 out = str(agent_text).strip()
                                 debug_info["agent_ok"] = True
-                                debug_info["agent_response_has_text"] = True
-                            else:
-                                # Agent returned but had no usable text; set a marker to fallback
-                                debug_info["agent_ok"] = False
                     except Exception as ae:
-                        # Save agent error and continue to fallback path instead of crashing
-                        debug_info["agent_error"] = f"{ae}"
-                        # include traceback for debugging
-                        debug_info["agent_traceback"] = traceback.format_exc()
-                        # Do not re-raise; we'll fallback to genai.responses.generate below
                 if not out:
-                    # Fallback to direct Responses API flow (robust multi-version support)
                     try:
                         if not HAS_GENAI or genai is None:
                             raise RuntimeError("Responses API not available; install google.generativeai SDK.")
@@ -415,12 +376,13 @@ if generate_now and not st.session_state.get("busy"):
                         fname = file_name_or_id(processed)
                         if not fname:
                             raise RuntimeError("Uploaded file missing name/id")
                         system_msg = {"role": "system", "content": prompt_text}
                         user_msg = {"role": "user", "content": "Please summarize the attached video."}
                         response = None
-                        # Try 1: new-style responses API (genai.responses.generate)
                         try:
                             if hasattr(genai, "responses") and hasattr(genai.responses, "generate"):
                                 response = genai.responses.generate(
@@ -432,8 +394,8 @@ if generate_now and not st.session_state.get("busy"):
                                 )
                         except Exception:
                             response = None
-                        # Try 2: model-based interface (GenerativeModel / model.generate_content)
                         if response is None:
                             try:
                                 if hasattr(genai, "GenerativeModel"):
@@ -444,8 +406,8 @@ if generate_now and not st.session_state.get("busy"):
                                         response = model_obj.generate([system_msg, user_msg], files=[{"name": fname}], max_output_tokens=max_tokens)
                             except Exception:
                                 response = None
-                        # Try 3: generic genai.generate / genai.create
                         if response is None:
                             try:
                                 if hasattr(genai, "generate"):
@@ -454,15 +416,12 @@ if generate_now and not st.session_state.get("busy"):
                                     response = genai.create(model=model_used, input=[{"text": prompt_text, "files": [{"name": fname}]}], max_output_tokens=max_tokens)
                             except Exception:
                                 response = None
                         if response is None:
-                            raise RuntimeError("No supported generate method found on google.generativeai; check SDK version.")
-                        # Defensive normalization of response -> outputs list
                         outputs = []
-                        if response is None:
-                            outputs = []
-                        elif isinstance(response, dict):
                             for key in ("output", "candidates", "items", "responses"):
                                 val = response.get(key)
                                 if isinstance(val, list) and val:
@@ -479,34 +438,31 @@ if generate_now and not st.session_state.get("busy"):
                                 if isinstance(val, list) and val:
                                     outputs = val
                                     break
                         if not isinstance(outputs, list):
                             outputs = list(outputs) if outputs else []
-                        # extract text pieces safely
                         text_pieces = []
                         for item in outputs:
                             if item is None:
                                 continue
-                            cand_contents = None
                             if isinstance(item, dict):
                                 for k in ("content", "text", "message", "output_text", "output"):
                                     if k in item and item[k]:
-                                        cand_contents = item[k]
                                         break
                             else:
                                 for k in ("content", "text", "message", "output", "output_text"):
-                                    cand_contents = getattr(item, k, None)
-                                    if cand_contents:
                                         break
-                            if isinstance(cand_contents, str):
-                                if cand_contents.strip():
-                                    text_pieces.append(cand_contents.strip())
                                 continue
-                            if isinstance(cand_contents, (list, tuple)):
-                                for c in cand_contents:
                                     if c is None:
                                         continue
                                     if isinstance(c, str):
@@ -520,7 +476,6 @@ if generate_now and not st.session_state.get("busy"):
                                     if t:
                                         text_pieces.append(str(t).strip())
                                 continue
                             direct = None
                             if isinstance(item, dict):
                                 direct = item.get("text") or item.get("output_text") or item.get("message")
@@ -528,7 +483,7 @@ if generate_now and not st.session_state.get("busy"):
                                 direct = getattr(item, "text", None) or getattr(item, "output_text", None) or getattr(item, "message", None)
                             if direct:
                                 text_pieces.append(str(direct).strip())
                         if not text_pieces:
                             top_text = None
                             if isinstance(response, dict):
@@ -537,8 +492,7 @@ if generate_now and not st.session_state.get("busy"):
                                 top_text = getattr(response, "text", None) or getattr(response, "message", None)
                             if top_text:
                                 text_pieces.append(str(top_text).strip())
-                        # dedupe preserving order
                         seen = set()
                         filtered = []
                         for t in text_pieces:
@@ -548,14 +502,13 @@ if generate_now and not st.session_state.get("busy"):
                                 filtered.append(t)
                                 seen.add(t)
                         out = "\n\n".join(filtered)
                     except Exception as e:
                         tb = traceback.format_exc()
                         st.session_state["last_error"] = f"Responses API error: {e}\n\nDebug: {debug_info}\n\nTraceback:\n{tb}"
                         st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
                         out = ""
-                # post-process output
                 if out:
                     out = remove_prompt_echo(prompt_text, out)
                     p = prompt_text
@@ -577,7 +530,7 @@ if generate_now and not st.session_state.get("busy"):
             except Exception as e:
                 tb = traceback.format_exc()
-                st.session_state["last_error"] = f"{e}\n\nDebug: {locals().get('debug_info', {})}\n\nTraceback:\n{tb}"
                 st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
             finally:
                 st.session_state["busy"] = False

 load_dotenv()
 try:
     from phi.agent import Agent
     from phi.model.google import Gemini
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
 st.session_state.setdefault("videos", "")
 st.session_state.setdefault("loop_video", False)
 st.session_state.setdefault("uploaded_file", None)
 st.session_state.setdefault("fast_mode", False)
 st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
 st.session_state.setdefault("last_model", "")
 st.session_state.setdefault("last_url_value", "")
 def sanitize_filename(path_str: str):
+    return Path(path_str).name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
 def file_sha256(path: str, block_size: int = 65536) -> str:
     h = hashlib.sha256()
         return None
     if isinstance(file_obj, dict):
         return file_obj.get("name") or file_obj.get("id")
+    for attr in ("name", "id", "fileId", "file_id"):
         if hasattr(file_obj, attr):
             val = getattr(file_obj, attr)
             if val:
                 return val
     return str(file_obj)
 def get_effective_api_key():
     return st.session_state.get("api_key") or os.getenv("GOOGLE_API_KEY")
+def maybe_configure_genai(key):
+    if not key or not HAS_GENAI:
         return False
     try:
         genai.configure(api_key=key)
+        return True
     except Exception:
+        return False
 _agent = None
 def maybe_create_agent(model_id: str):
         except Exception:
             pass
 current_url = st.session_state.get("url", "")
 if current_url != st.session_state.get("last_url_value"):
     clear_all_video_state()
 st.sidebar.text_input("Video URL", key="url", placeholder="https://")
 settings_exp = st.sidebar.expander("Settings", expanded=False)
+settings_exp.text_input("Gemini Model (short name)", "gemini-2.5-flash-lite", key="model_input")
 settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
 default_prompt = (
     "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. Keep language professional. Include a list of observations for notable events."
 )
+settings_exp.text_area("Enter analysis", value=default_prompt, height=140, key="analysis_prompt")
 settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
 settings_exp.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
 key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
 settings_exp.caption(f"Using API key from: **{key_source}**")
         try:
             obj = get_file(name)
         except Exception:
             return file_obj
         state = getattr(obj, "state", None)
         if not state or getattr(state, "name", None) != "PROCESSING":
     except Exception:
         pass
 if generate_now and not st.session_state.get("busy"):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
         else:
             try:
                 st.session_state["busy"] = True
+                maybe_configure_genai(key_to_use)
                 model_id = (st.session_state.get("model_input") or "gemini-2.5-flash-lite").strip()
                 if st.session_state.get("last_model") != model_id:
                         st.session_state["last_loaded_path"] = current_path
                         st.session_state["file_hash"] = current_hash
+                prompt_text = (st.session_state.get("analysis_prompt", "").strip() or default_prompt).strip()
                 out = ""
+                model_used = model_id
+                max_tokens = 512 if st.session_state.get("fast_mode") else 1024
                 est_tokens = max_tokens
                 agent = maybe_create_agent(model_used)
+                debug_info = {"agent_attempted": False, "agent_ok": False, "agent_error": None}
                 if agent:
                     debug_info["agent_attempted"] = True
                     try:
+                        with st.spinner("Generating via Agent..."):
                             if not processed:
                                 raise RuntimeError("Processed file missing for agent generation")
                             agent_response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
+                            agent_text = getattr(agent_response, "content", None) or getattr(agent_response, "outputText", None)
+                            if not agent_text and isinstance(agent_response, dict):
+                                for k in ("content", "outputText", "text"):
+                                    if k in agent_response and agent_response[k]:
+                                        agent_text = agent_response[k]
+                                        break
                             if agent_text and str(agent_text).strip():
                                 out = str(agent_text).strip()
                                 debug_info["agent_ok"] = True
                     except Exception as ae:
+                        debug_info["agent_error"] = f"{ae}\n{traceback.format_exc()}"
                 if not out:
                     try:
                         if not HAS_GENAI or genai is None:
                             raise RuntimeError("Responses API not available; install google.generativeai SDK.")
                         fname = file_name_or_id(processed)
                         if not fname:
                             raise RuntimeError("Uploaded file missing name/id")
                         system_msg = {"role": "system", "content": prompt_text}
                         user_msg = {"role": "user", "content": "Please summarize the attached video."}
                         response = None
+                        # Attempt modern responses API
                         try:
                             if hasattr(genai, "responses") and hasattr(genai.responses, "generate"):
                                 response = genai.responses.generate(
                                 )
                         except Exception:
                             response = None
+                        # Attempt GenerativeModel / fallback interfaces
                         if response is None:
                             try:
                                 if hasattr(genai, "GenerativeModel"):
                                         response = model_obj.generate([system_msg, user_msg], files=[{"name": fname}], max_output_tokens=max_tokens)
                             except Exception:
                                 response = None
+                        # Attempt legacy generate/create
                         if response is None:
                             try:
                                 if hasattr(genai, "generate"):
                                     response = genai.create(model=model_used, input=[{"text": prompt_text, "files": [{"name": fname}]}], max_output_tokens=max_tokens)
                             except Exception:
                                 response = None
                         if response is None:
+                            raise RuntimeError("No supported generate method found on google.generativeai; check SDK version or model compatibility.")
                         outputs = []
+                        if isinstance(response, dict):
                             for key in ("output", "candidates", "items", "responses"):
                                 val = response.get(key)
                                 if isinstance(val, list) and val:
                                 if isinstance(val, list) and val:
                                     outputs = val
                                     break
                         if not isinstance(outputs, list):
                             outputs = list(outputs) if outputs else []
                         text_pieces = []
                         for item in outputs:
                             if item is None:
                                 continue
+                            cand = None
                             if isinstance(item, dict):
                                 for k in ("content", "text", "message", "output_text", "output"):
                                     if k in item and item[k]:
+                                        cand = item[k]
                                         break
                             else:
                                 for k in ("content", "text", "message", "output", "output_text"):
+                                    cand = getattr(item, k, None)
+                                    if cand:
                                         break
+                            if isinstance(cand, str):
+                                if cand.strip():
+                                    text_pieces.append(cand.strip())
                                 continue
+                            if isinstance(cand, (list, tuple)):
+                                for c in cand:
                                     if c is None:
                                         continue
                                     if isinstance(c, str):
                                     if t:
                                         text_pieces.append(str(t).strip())
                                 continue
                             direct = None
                             if isinstance(item, dict):
                                 direct = item.get("text") or item.get("output_text") or item.get("message")
                                 direct = getattr(item, "text", None) or getattr(item, "output_text", None) or getattr(item, "message", None)
                             if direct:
                                 text_pieces.append(str(direct).strip())
                         if not text_pieces:
                             top_text = None
                             if isinstance(response, dict):
                                 top_text = getattr(response, "text", None) or getattr(response, "message", None)
                             if top_text:
                                 text_pieces.append(str(top_text).strip())
                         seen = set()
                         filtered = []
                         for t in text_pieces:
                                 filtered.append(t)
                                 seen.add(t)
                         out = "\n\n".join(filtered)
                     except Exception as e:
                         tb = traceback.format_exc()
                         st.session_state["last_error"] = f"Responses API error: {e}\n\nDebug: {debug_info}\n\nTraceback:\n{tb}"
                         st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
                         out = ""
                 if out:
                     out = remove_prompt_echo(prompt_text, out)
                     p = prompt_text
             except Exception as e:
                 tb = traceback.format_exc()
+                st.session_state["last_error"] = f"{e}\n\nTraceback:\n{tb}"
                 st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
             finally:
                 st.session_state["busy"] = False