Spaces:

Hug0endob
/

Video-Analysis

Build error

App Files Files Community

CB commited on Sep 15, 2025

Commit

1158077

verified ·

1 Parent(s): e61ff31

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +132 -115

streamlit_app.py CHANGED Viewed

@@ -8,6 +8,7 @@ from glob import glob
 from pathlib import Path
 from difflib import SequenceMatcher
 import concurrent.futures
 import yt_dlp
 import ffmpeg
@@ -16,6 +17,7 @@ from dotenv import load_dotenv
 load_dotenv()
 try:
     from phi.agent import Agent
     from phi.model.google import Gemini
@@ -25,6 +27,7 @@ except Exception:
     Agent = Gemini = DuckDuckGo = None
     HAS_PHI = False
 try:
     import google.generativeai as genai
     from google.generativeai import upload_file, get_file  # type: ignore
@@ -38,7 +41,7 @@ st.set_page_config(page_title="Generate the story of videos", layout="wide")
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
-# Session defaults
 st.session_state.setdefault("videos", "")
 st.session_state.setdefault("loop_video", False)
 st.session_state.setdefault("uploaded_file", None)
@@ -52,8 +55,10 @@ st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
 st.session_state.setdefault("last_model", "")
 st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
 st.session_state.setdefault("last_url_value", "")
-st.session_state.setdefault("processing_timeout", 600)  # default 10 minutes
 def sanitize_filename(path_str: str):
     name = Path(path_str).name
     return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
@@ -124,6 +129,7 @@ def configure_genai_if_needed():
         pass
     return True
 _agent = None
 def maybe_create_agent(model_id: str):
     global _agent
@@ -155,12 +161,13 @@ def clear_all_video_state():
         except Exception:
             pass
-# track url changes
 current_url = st.session_state.get("url", "")
 if current_url != st.session_state.get("last_url_value"):
     clear_all_video_state()
     st.session_state["last_url_value"] = current_url
 st.sidebar.header("Video Input")
 st.sidebar.text_input("Video URL", key="url", placeholder="https://")
@@ -173,19 +180,19 @@ default_prompt = (
 analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
 settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
-# Expose processing timeout
 settings_exp.number_input(
-    "Processing timeout (s)",
-    min_value=60,
-    max_value=3600,
-    value=st.session_state.get("processing_timeout", 600),
-    step=30,
     key="processing_timeout",
 )
 key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
 settings_exp.caption(f"Using API key from: **{key_source}**")
 if not get_effective_api_key():
     settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
@@ -196,6 +203,7 @@ safety_settings = [
     {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
 ]
 def upload_video_sdk(filepath: str):
     key = get_effective_api_key()
     if not key:
@@ -207,13 +215,11 @@ def upload_video_sdk(filepath: str):
 def wait_for_processed(file_obj, timeout: int = None):
     """
-    Poll get_file until file is no longer in PROCESSING state.
-    Uses st.session_state['processing_timeout'] if timeout is None.
-    Retries on transient get_file errors with exponential backoff.
-    Raises TimeoutError on timeout.
     """
     if timeout is None:
-        timeout = st.session_state.get("processing_timeout", 600)
     if not HAS_GENAI or get_file is None:
         return file_obj
     start = time.time()
@@ -221,12 +227,10 @@ def wait_for_processed(file_obj, timeout: int = None):
     if not name:
         return file_obj
     backoff = 1.0
-    last_exc = None
     while True:
         try:
             obj = get_file(name)
         except Exception as e:
-            last_exc = e
             if time.time() - start > timeout:
                 raise TimeoutError(f"Failed to fetch file status before timeout: {e}")
             time.sleep(backoff)
@@ -262,11 +266,6 @@ def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_thres
     return text
 def compress_video_if_large(local_path: str, threshold_mb: int = 50):
-    """
-    Returns (path_to_upload, compressed_flag).
-    If compression fails or isn't needed returns (local_path, False).
-    Logs errors to st.session_state['last_error'].
-    """
     try:
         file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
     except Exception as e:
@@ -286,7 +285,8 @@ def compress_video_if_large(local_path: str, threshold_mb: int = 50):
         st.session_state["last_error"] = f"Video compression failed: {e}\n{traceback.format_exc()}"
         return local_path, False
-def generate_via_responses_api(prompt_text: str, processed, model_used: str, max_tokens: int = 1024):
     key = get_effective_api_key()
     if not key:
         raise RuntimeError("No API key provided")
@@ -296,104 +296,111 @@ def generate_via_responses_api(prompt_text: str, processed, model_used: str, max
     fname = file_name_or_id(processed)
     if not fname:
         raise RuntimeError("Uploaded file missing name/id")
     system_msg = {"role": "system", "content": prompt_text}
     user_msg = {"role": "user", "content": "Please summarize the attached video."}
-    try:
-        response = genai.responses.generate(
-            model=model_used,
-            messages=[system_msg, user_msg],
-            files=[{"name": fname}],
-            safety_settings=safety_settings,
-            max_output_tokens=max_tokens,
-        )
-    except TypeError:
-        response = genai.responses.generate(
-            model=model_used,
-            input=[{"text": prompt_text, "files": [{"name": fname}]}],
-            safety_settings=safety_settings,
-            max_output_tokens=max_tokens,
-        )
-    # Normalize outputs into text pieces
     outputs = []
     if response is None:
-        outputs = []
-    elif isinstance(response, dict):
-        for key in ("output", "candidates", "items", "responses"):
-            val = response.get(key)
-            if isinstance(val, list) and val:
-                outputs = val
-                break
-        if not outputs:
             for v in response.values():
                 if isinstance(v, list) and v:
-                    outputs = v
                     break
-    else:
-        for attr in ("output", "candidates", "items", "responses"):
-            val = getattr(response, attr, None)
-            if isinstance(val, list) and val:
-                outputs = val
-                break
-    if not isinstance(outputs, list):
-        outputs = list(outputs) if outputs else []
     text_pieces = []
-    for item in outputs:
-        if item is None:
-            continue
-        cand_contents = None
-        if isinstance(item, dict):
-            for k in ("content", "text", "message", "output_text", "output"):
-                if k in item and item[k]:
-                    cand_contents = item[k]
-                    break
-        else:
-            for k in ("content", "text", "message", "output", "output_text"):
-                cand_contents = getattr(item, k, None)
-                if cand_contents:
-                    break
-        if isinstance(cand_contents, str):
-            if cand_contents.strip():
-                text_pieces.append(cand_contents.strip())
-            continue
-        if isinstance(cand_contents, (list, tuple)):
-            for c in cand_contents:
-                if c is None:
-                    continue
-                if isinstance(c, str):
-                    if c.strip():
-                        text_pieces.append(c.strip())
-                    continue
-                if isinstance(c, dict):
-                    t = c.get("text") or c.get("content")
                 else:
-                    t = getattr(c, "text", None) or getattr(c, "content", None)
-                if t:
-                    text_pieces.append(str(t).strip())
-            continue
-        direct = None
-        if isinstance(item, dict):
-            direct = item.get("text") or item.get("output_text") or item.get("message")
-        else:
-            direct = getattr(item, "text", None) or getattr(item, "output_text", None) or getattr(item, "message", None)
-        if direct:
-            text_pieces.append(str(direct).strip())
-    if not text_pieces:
-        top_text = None
-        if isinstance(response, dict):
-            top_text = response.get("text") or response.get("message")
-        else:
-            top_text = getattr(response, "text", None) or getattr(response, "message", None)
-        if top_text:
-            text_pieces.append(str(top_text).strip())
-    # Deduplicate, preserve order
     seen = set()
     filtered = []
     for t in text_pieces:
@@ -402,8 +409,9 @@ def generate_via_responses_api(prompt_text: str, processed, model_used: str, max
         if t and t not in seen:
             filtered.append(t)
             seen.add(t)
-    return "\n\n".join(filtered)
 col1, col2 = st.columns([1, 3])
 with col1:
     generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
@@ -453,7 +461,7 @@ if st.session_state["videos"]:
     except Exception:
         pass
-# --- Generation flow ---
 if generate_now and not st.session_state.get("busy"):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
@@ -493,6 +501,9 @@ if generate_now and not st.session_state.get("busy"):
                     upload_path, compressed = compress_video_if_large(local_path)
                     with st.spinner(f"Uploading video{' (compressed)' if compressed else ''}..."):
                         try:
                             uploaded = upload_video_sdk(upload_path)
                         except Exception as e:
@@ -501,7 +512,13 @@ if generate_now and not st.session_state.get("busy"):
                             raise
                     try:
-                        processed = wait_for_processed(uploaded, timeout=st.session_state.get("processing_timeout", 600))
                     except Exception as e:
                         st.session_state["last_error"] = f"Processing failed/wait timeout: {e}\n\nTraceback:\n{traceback.format_exc()}"
                         st.error("Video processing failed or timed out. See Last Error.")
@@ -515,7 +532,7 @@ if generate_now and not st.session_state.get("busy"):
                 prompt_text = (analysis_prompt.strip() or default_prompt).strip()
                 out = ""
                 model_used = model_id
-                max_tokens = 1024
                 est_tokens = max_tokens
                 # Try Agent first, fallback to Responses API
@@ -532,7 +549,7 @@ if generate_now and not st.session_state.get("busy"):
                             if not agent_text:
                                 try:
                                     if isinstance(agent_response, dict):
-                                        for k in ("content", "outputText", "text"):
                                             if k in agent_response and agent_response[k]:
                                                 agent_text = agent_response[k]
                                                 break
@@ -551,7 +568,7 @@ if generate_now and not st.session_state.get("busy"):
                 if not out:
                     try:
                         with st.spinner("Generating description via Responses API..."):
-                            out = generate_via_responses_api(prompt_text, processed, model_used, max_tokens=max_tokens)
                     except Exception as e:
                         tb = traceback.format_exc()
                         st.session_state["last_error"] = f"Responses API error: {e}\n\nDebug: {debug_info}\n\nTraceback:\n{tb}"

 from pathlib import Path
 from difflib import SequenceMatcher
 import concurrent.futures
+import json
 import yt_dlp
 import ffmpeg
 load_dotenv()
+# Optional phi integration (Agent + Gemini wrapper)
 try:
     from phi.agent import Agent
     from phi.model.google import Gemini
     Agent = Gemini = DuckDuckGo = None
     HAS_PHI = False
+# google.generativeai SDK
 try:
     import google.generativeai as genai
     from google.generativeai import upload_file, get_file  # type: ignore
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
+# ---- Session defaults ----
 st.session_state.setdefault("videos", "")
 st.session_state.setdefault("loop_video", False)
 st.session_state.setdefault("uploaded_file", None)
 st.session_state.setdefault("last_model", "")
 st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
 st.session_state.setdefault("last_url_value", "")
+st.session_state.setdefault("processing_timeout", 900)  # increased default to 15m
+st.session_state.setdefault("generation_timeout", 300)  # for Responses generate
+# ---- Helpers ----
 def sanitize_filename(path_str: str):
     name = Path(path_str).name
     return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
         pass
     return True
+# ---- Agent management ----
 _agent = None
 def maybe_create_agent(model_id: str):
     global _agent
         except Exception:
             pass
+# Reset when URL changes
 current_url = st.session_state.get("url", "")
 if current_url != st.session_state.get("last_url_value"):
     clear_all_video_state()
     st.session_state["last_url_value"] = current_url
+# ---- Sidebar UI ----
 st.sidebar.header("Video Input")
 st.sidebar.text_input("Video URL", key="url", placeholder="https://")
 analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
 settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
 settings_exp.number_input(
+    "Processing timeout (s)", min_value=60, max_value=3600,
+    value=st.session_state.get("processing_timeout", 900), step=30,
     key="processing_timeout",
 )
+settings_exp.number_input(
+    "Generation timeout (s)", min_value=30, max_value=1800,
+    value=st.session_state.get("generation_timeout", 300), step=10,
+    key="generation_timeout",
+)
 key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
 settings_exp.caption(f"Using API key from: **{key_source}**")
 if not get_effective_api_key():
     settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
     {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
 ]
+# ---- Upload & processing helpers ----
 def upload_video_sdk(filepath: str):
     key = get_effective_api_key()
     if not key:
 def wait_for_processed(file_obj, timeout: int = None):
     """
+    Poll get_file until file is no longer PROCESSING.
+    Retries get_file on transient errors with exponential backoff.
     """
     if timeout is None:
+        timeout = st.session_state.get("processing_timeout", 900)
     if not HAS_GENAI or get_file is None:
         return file_obj
     start = time.time()
     if not name:
         return file_obj
     backoff = 1.0
     while True:
         try:
             obj = get_file(name)
         except Exception as e:
             if time.time() - start > timeout:
                 raise TimeoutError(f"Failed to fetch file status before timeout: {e}")
             time.sleep(backoff)
     return text
 def compress_video_if_large(local_path: str, threshold_mb: int = 50):
     try:
         file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
     except Exception as e:
         st.session_state["last_error"] = f"Video compression failed: {e}\n{traceback.format_exc()}"
         return local_path, False
+# ---- Robust Responses API caller adapted for varying model versions ----
+def generate_via_responses_api(prompt_text: str, processed, model_used: str, max_tokens: int = 1024, timeout: int = 300):
     key = get_effective_api_key()
     if not key:
         raise RuntimeError("No API key provided")
     fname = file_name_or_id(processed)
     if not fname:
         raise RuntimeError("Uploaded file missing name/id")
     system_msg = {"role": "system", "content": prompt_text}
     user_msg = {"role": "user", "content": "Please summarize the attached video."}
+    # Some model versions and SDK releases expect messages, some older ones expect input with files.
+    call_variants = [
+        {"messages": [system_msg, user_msg], "files": [{"name": fname}], "safety_settings": safety_settings, "max_output_tokens": max_tokens},
+        {"input": [{"text": prompt_text, "files": [{"name": fname}]}], "safety_settings": safety_settings, "max_output_tokens": max_tokens},
+    ]
+    last_exc = None
+    start = time.time()
+    backoff = 1.0
+    while True:
+        for payload in call_variants:
+            try:
+                response = genai.responses.generate(model=model_used, **payload)
+                # If successful, normalize below
+                return _normalize_genai_response(response)
+            except Exception as e:
+                last_exc = e
+                # If it's a transient server error, let outer retry/backoff handle it
+                # Quick heuristic: inspect message for INTERNAL/UNAVAILABLE/DeadlineExceeded
+                msg = str(e).lower()
+                if any(k in msg for k in ("internal", "unavailable", "deadlineexceeded", "deadline exceeded", "timeout", "rate limit")):
+                    # will retry below
+                    pass
+                else:
+                    # If it's a clear invalid-argument or permission error, bubble up immediately
+                    raise
+        if time.time() - start > timeout:
+            raise TimeoutError(f"Responses.generate timed out after {timeout}s: last error: {last_exc}")
+        time.sleep(backoff)
+        backoff = min(backoff * 2, 8.0)
+def _normalize_genai_response(response):
+    # Accept dict or object shapes. Extract text pieces robustly and join.
     outputs = []
     if response is None:
+        return ""
+    # If it's an object with attributes
+    if not isinstance(response, dict):
+        try:
+            response = json.loads(str(response))
+        except Exception:
+            # fallback to attribute access
+            pass
+    # Strategy: check common keys
+    candidate_lists = []
+    for key in ("output", "candidates", "items", "responses", "choices"):
+        val = response.get(key) if isinstance(response, dict) else None
+        if isinstance(val, list) and val:
+            candidate_lists.append(val)
+    if not candidate_lists:
+        # fallback: any list value
+        if isinstance(response, dict):
             for v in response.values():
                 if isinstance(v, list) and v:
+                    candidate_lists.append(v)
                     break
     text_pieces = []
+    for lst in candidate_lists:
+        for item in lst:
+            if not item:
+                continue
+            if isinstance(item, dict):
+                # common text keys
+                for k in ("content", "text", "message", "output_text", "output"):
+                    t = item.get(k)
+                    if t:
+                        text_pieces.append(str(t).strip())
+                        break
                 else:
+                    # nested forms
+                    if "content" in item and isinstance(item["content"], list):
+                        for part in item["content"]:
+                            if isinstance(part, dict):
+                                t = part.get("text") or part.get("content")
+                                if t:
+                                    text_pieces.append(str(t).strip())
+                            elif isinstance(part, str):
+                                text_pieces.append(part.strip())
+            elif isinstance(item, str):
+                text_pieces.append(item.strip())
+            else:
+                # try attribute access
+                try:
+                    t = getattr(item, "text", None) or getattr(item, "content", None)
+                    if t:
+                        text_pieces.append(str(t).strip())
+                except Exception:
+                    pass
+    # If still empty, try top-level text fields
+    if not text_pieces and isinstance(response, dict):
+        for k in ("text", "message", "output_text"):
+            v = response.get(k)
+            if v:
+                text_pieces.append(str(v).strip())
+                break
+    # deduplicate preserving order
     seen = set()
     filtered = []
     for t in text_pieces:
         if t and t not in seen:
             filtered.append(t)
             seen.add(t)
+    return "\n\n".join(filtered).strip()
+# ---- Layout ----
 col1, col2 = st.columns([1, 3])
 with col1:
     generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
     except Exception:
         pass
+# ---- Main generation flow ----
 if generate_now and not st.session_state.get("busy"):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
                     upload_path, compressed = compress_video_if_large(local_path)
                     with st.spinner(f"Uploading video{' (compressed)' if compressed else ''}..."):
+                        # Provide an upload progress bar UI while calling upload_file.
+                        progress_placeholder = st.empty()
+                        progress_bar = None
                         try:
                             uploaded = upload_video_sdk(upload_path)
                         except Exception as e:
                             raise
                     try:
+                        # Show a more informative processing progress area
+                        processing_placeholder = st.empty()
+                        processing_bar = processing_placeholder.progress(0)
+                        start_wait = time.time()
+                        processed = wait_for_processed(uploaded, timeout=st.session_state.get("processing_timeout", 900))
+                        processing_bar.progress(100)
+                        processing_placeholder.success("Processing complete")
                     except Exception as e:
                         st.session_state["last_error"] = f"Processing failed/wait timeout: {e}\n\nTraceback:\n{traceback.format_exc()}"
                         st.error("Video processing failed or timed out. See Last Error.")
                 prompt_text = (analysis_prompt.strip() or default_prompt).strip()
                 out = ""
                 model_used = model_id
+                max_tokens = 2048 if "2.5" in model_used else 1024
                 est_tokens = max_tokens
                 # Try Agent first, fallback to Responses API
                             if not agent_text:
                                 try:
                                     if isinstance(agent_response, dict):
+                                        for k in ("content", "outputText", "text", "message"):
                                             if k in agent_response and agent_response[k]:
                                                 agent_text = agent_response[k]
                                                 break
                 if not out:
                     try:
                         with st.spinner("Generating description via Responses API..."):
+                            out = generate_via_responses_api(prompt_text, processed, model_used, max_tokens=max_tokens, timeout=st.session_state.get("generation_timeout", 300))
                     except Exception as e:
                         tb = traceback.format_exc()
                         st.session_state["last_error"] = f"Responses API error: {e}\n\nDebug: {debug_info}\n\nTraceback:\n{tb}"