Video-Analysis-Tool

Sleeping

App Files Files Community

CB commited on Sep 15, 2025

Commit

1ed3b89

verified ·

1 Parent(s): d9fde4e

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +334 -203

streamlit_app.py CHANGED Viewed

@@ -7,6 +7,8 @@ import traceback
 from glob import glob
 from pathlib import Path
 from difflib import SequenceMatcher
 import yt_dlp
 import ffmpeg
@@ -15,29 +17,50 @@ from dotenv import load_dotenv
 load_dotenv()
 try:
     from phi.agent import Agent
     from phi.model.google import Gemini
     from phi.tools.duckduckgo import DuckDuckGo
     HAS_PHI = True
 except Exception:
     Agent = Gemini = DuckDuckGo = None
     HAS_PHI = False
 try:
     import google.generativeai as genai
-    from google.generativeai import upload_file, get_file  # type: ignore
     HAS_GENAI = True
 except Exception:
     genai = None
     upload_file = get_file = None
     HAS_GENAI = False
 st.set_page_config(page_title="Generate the story of videos", layout="wide")
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
-# Session defaults
 st.session_state.setdefault("videos", "")
 st.session_state.setdefault("loop_video", False)
 st.session_state.setdefault("uploaded_file", None)
@@ -47,39 +70,59 @@ st.session_state.setdefault("last_loaded_path", "")
 st.session_state.setdefault("analysis_out", "")
 st.session_state.setdefault("last_error", "")
 st.session_state.setdefault("file_hash", None)
-st.session_state.setdefault("fast_mode", False)
 st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
 st.session_state.setdefault("last_model", "")
 st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
 st.session_state.setdefault("last_url_value", "")
 def sanitize_filename(path_str: str):
     name = Path(path_str).name
     return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
 def file_sha256(path: str, block_size: int = 65536) -> str:
-    h = hashlib.sha256()
-    with open(path, "rb") as f:
-        for chunk in iter(lambda: f.read(block_size), b""):
-            h.update(chunk)
-    return h.hexdigest()
 def convert_video_to_mp4(video_path: str) -> str:
     target_path = str(Path(video_path).with_suffix(".mp4"))
     if os.path.exists(target_path):
         return target_path
-    ffmpeg.input(video_path).output(target_path).run(overwrite_output=True, quiet=True)
     try:
-        os.remove(video_path)
-    except Exception:
-        pass
     return target_path
 def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
     try:
-        ffmpeg.input(input_path).output(target_path, vcodec="libx264", crf=crf, preset=preset).run(overwrite_output=True, quiet=True)
-        return target_path
     except Exception:
         return input_path
 def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
@@ -88,24 +131,46 @@ def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) ->
     outtmpl = str(Path(save_dir) / "%(id)s.%(ext)s")
     ydl_opts = {"outtmpl": outtmpl, "format": "best"}
     if video_password:
         ydl_opts["videopassword"] = video_password
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         info = ydl.extract_info(url, download=True)
-    video_id = info.get("id") if isinstance(info, dict) else None
-    if video_id:
-        matches = glob(os.path.join(save_dir, f"{video_id}.*"))
-    else:
         all_files = glob(os.path.join(save_dir, "*"))
-        matches = sorted(all_files, key=os.path.getmtime, reverse=True)[:1] if all_files else []
-    if not matches:
-        raise FileNotFoundError("Downloaded video not found")
-    return convert_video_to_mp4(matches[0])
 def file_name_or_id(file_obj):
     if file_obj is None:
         return None
     if isinstance(file_obj, dict):
         return file_obj.get("name") or file_obj.get("id")
     return getattr(file_obj, "name", None) or getattr(file_obj, "id", None) or getattr(file_obj, "fileId", None)
 def get_effective_api_key():
@@ -118,9 +183,10 @@ def configure_genai_if_needed():
     try:
         genai.configure(api_key=key)
     except Exception:
-        pass
     return True
 _agent = None
 def maybe_create_agent(model_id: str):
     global _agent
@@ -135,6 +201,7 @@ def maybe_create_agent(model_id: str):
         _agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
         st.session_state["last_model"] = model_id
     except Exception:
         _agent = None
     return _agent
@@ -150,31 +217,50 @@ def clear_all_video_state():
         try:
             os.remove(f)
         except Exception:
-            pass
-# track url changes
 current_url = st.session_state.get("url", "")
 if current_url != st.session_state.get("last_url_value"):
     clear_all_video_state()
     st.session_state["last_url_value"] = current_url
 st.sidebar.header("Video Input")
 st.sidebar.text_input("Video URL", key="url", placeholder="https://")
 settings_exp = st.sidebar.expander("Settings", expanded=False)
-model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite", key="model_input")
 settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
-default_prompt = (
-    "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. Keep language professional. Include a list of observations for notable events."
-)
-analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
 settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
-settings_exp.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
-# Show which key is active
 key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
 settings_exp.caption(f"Using API key from: **{key_source}**")
 if not get_effective_api_key():
     settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
@@ -185,6 +271,7 @@ safety_settings = [
     {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
 ]
 def upload_video_sdk(filepath: str):
     key = get_effective_api_key()
     if not key:
@@ -192,9 +279,12 @@ def upload_video_sdk(filepath: str):
     if not HAS_GENAI or upload_file is None:
         raise RuntimeError("google.generativeai SDK not available; cannot upload")
     genai.configure(api_key=key)
     return upload_file(filepath)
-def wait_for_processed(file_obj, timeout=180):
     if not HAS_GENAI or get_file is None:
         return file_obj
     start = time.time()
@@ -203,12 +293,21 @@ def wait_for_processed(file_obj, timeout=180):
         return file_obj
     backoff = 1.0
     while True:
-        obj = get_file(name)
         state = getattr(obj, "state", None)
         if not state or getattr(state, "name", None) != "PROCESSING":
             return obj
         if time.time() - start > timeout:
-            raise TimeoutError("File processing timed out")
         time.sleep(backoff)
         backoff = min(backoff * 2, 8.0)
@@ -231,11 +330,161 @@ def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_thres
             return b_full[len(ph):].lstrip(" \n:-")
     return text
 col1, col2 = st.columns([1, 3])
 with col1:
     generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
 with col2:
-    pass
 if st.sidebar.button("Load Video", use_container_width=True):
     try:
@@ -250,6 +499,7 @@ if st.sidebar.button("Load Video", use_container_width=True):
         except Exception:
             st.session_state["file_hash"] = None
     except Exception as e:
         st.sidebar.error(f"Failed to load video: {e}")
 if st.session_state["videos"]:
@@ -275,12 +525,12 @@ if st.session_state["videos"]:
     try:
         file_size_mb = os.path.getsize(st.session_state["videos"]) / (1024 * 1024)
         st.sidebar.caption(f"File size: {file_size_mb:.1f} MB")
-        if file_size_mb > 50 and not st.session_state.get("fast_mode", False):
-            st.sidebar.warning("Large file detected — consider enabling Fast mode or compression.", icon="⚠️")
     except Exception:
         pass
-# --- Generation flow ---
 if generate_now and not st.session_state.get("busy"):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
@@ -295,9 +545,9 @@ if generate_now and not st.session_state.get("busy"):
                     if HAS_GENAI and genai is not None:
                         genai.configure(api_key=key_to_use)
                 except Exception:
-                    pass
-                model_id = (st.session_state.get("model_input") or "gemini-2.0-flash-lite").strip()
                 if st.session_state.get("last_model") != model_id:
                     st.session_state["last_model"] = ""
                 maybe_create_agent(model_id)
@@ -309,51 +559,57 @@ if generate_now and not st.session_state.get("busy"):
                 except Exception:
                     current_hash = None
                 reupload_needed = True
-                if processed and st.session_state.get("last_loaded_path") == current_path and st.session_state.get("file_hash") == current_hash:
                     reupload_needed = False
                 if reupload_needed:
                     if not HAS_GENAI:
                         raise RuntimeError("google.generativeai SDK not available; install it.")
                     local_path = current_path
-                    fast_mode = st.session_state.get("fast_mode", False)
-                    upload_path = local_path
-                    try:
-                        file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
-                    except Exception:
-                        file_size_mb = 0
-                    if not fast_mode and file_size_mb > 50:
-                        compressed_path = str(Path(local_path).with_name(Path(local_path).stem + "_compressed.mp4"))
                         try:
-                            preset = "veryfast" if fast_mode else "fast"
-                            upload_path = compress_video(local_path, compressed_path, crf=28, preset=preset)
-                        except Exception:
-                            upload_path = local_path
-                    with st.spinner("Uploading video..."):
-                        uploaded = upload_video_sdk(upload_path)
-                        processed = wait_for_processed(uploaded, timeout=180)
-                        st.session_state["uploaded_file"] = uploaded
-                        st.session_state["processed_file"] = processed
-                        st.session_state["last_loaded_path"] = current_path
-                        st.session_state["file_hash"] = current_hash
-                prompt_text = (analysis_prompt.strip() or default_prompt).strip()
                 out = ""
-                if st.session_state.get("fast_mode"):
-                    model_used = model_id if model_id else "gemini-2.0-flash-lite"
-                    max_tokens = 512
-                else:
-                    model_used = model_id
-                    max_tokens = 1024
                 est_tokens = max_tokens
-                est_cost_caption = f"Est. max tokens: {est_tokens}"
-                # First try Agent, but guard and FALLBACK to direct genai responses if Agent fails or returns empty.
                 agent = maybe_create_agent(model_used)
                 debug_info = {"agent_attempted": False, "agent_ok": False, "agent_error": None, "agent_response_has_text": False}
                 if agent:
@@ -362,16 +618,12 @@ if generate_now and not st.session_state.get("busy"):
                         with st.spinner("Generating description via Agent..."):
                             if not processed:
                                 raise RuntimeError("Processed file missing for agent generation")
-                            # call agent.run inside try/except to catch library IndexError
                             agent_response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
-                            # Try to extract text from common attributes; be defensive
                             agent_text = getattr(agent_response, "content", None) or getattr(agent_response, "outputText", None) or None
                             if not agent_text:
-                                # try dict-like access
                                 try:
                                     if isinstance(agent_response, dict):
-                                        # check common keys
-                                        for k in ("content", "outputText", "text"):
                                             if k in agent_response and agent_response[k]:
                                                 agent_text = agent_response[k]
                                                 break
@@ -382,143 +634,21 @@ if generate_now and not st.session_state.get("busy"):
                                 debug_info["agent_ok"] = True
                                 debug_info["agent_response_has_text"] = True
                             else:
-                                # Agent returned but had no usable text; set a marker to fallback
                                 debug_info["agent_ok"] = False
                     except Exception as ae:
-                        # Save agent error and continue to fallback path instead of crashing
                         debug_info["agent_error"] = f"{ae}"
-                        # include traceback for debugging
                         debug_info["agent_traceback"] = traceback.format_exc()
-                        # Do not re-raise; we'll fallback to genai.responses.generate below
                 if not out:
-                    # Fallback to direct Responses API flow
                     try:
-                        if not HAS_GENAI or genai is None:
-                            raise RuntimeError("Responses API not available; install google.generativeai SDK.")
-                        genai.configure(api_key=key_to_use)
-                        fname = file_name_or_id(processed)
-                        if not fname:
-                            raise RuntimeError("Uploaded file missing name/id")
-                        system_msg = {"role": "system", "content": prompt_text}
-                        user_msg = {"role": "user", "content": "Please summarize the attached video."}
-                        try:
-                            response = genai.responses.generate(
-                                model=model_used,
-                                messages=[system_msg, user_msg],
-                                files=[{"name": fname}],
-                                safety_settings=safety_settings,
-                                max_output_tokens=max_tokens,
-                            )
-                        except TypeError:
-                            response = genai.responses.generate(
-                                model=model_used,
-                                input=[{"text": prompt_text, "files": [{"name": fname}]}],
-                                safety_settings=safety_settings,
-                                max_output_tokens=max_tokens,
-                            )
-                        # Defensive normalization of response -> outputs list
-                        outputs = []
-                        if response is None:
-                            outputs = []
-                        elif isinstance(response, dict):
-                            for key in ("output", "candidates", "items", "responses"):
-                                val = response.get(key)
-                                if isinstance(val, list) and val:
-                                    outputs = val
-                                    break
-                            if not outputs:
-                                for v in response.values():
-                                    if isinstance(v, list) and v:
-                                        outputs = v
-                                        break
-                        else:
-                            for attr in ("output", "candidates", "items", "responses"):
-                                val = getattr(response, attr, None)
-                                if isinstance(val, list) and val:
-                                    outputs = val
-                                    break
-                        # ensure list
-                        if not isinstance(outputs, list):
-                            outputs = list(outputs) if outputs else []
-                        # extract text pieces safely
-                        text_pieces = []
-                        for item in outputs:
-                            if item is None:
-                                continue
-                            # item may be dict or object; attempt to find text-rich fields
-                            cand_contents = None
-                            if isinstance(item, dict):
-                                for k in ("content", "text", "message", "output_text", "output"):
-                                    if k in item and item[k]:
-                                        cand_contents = item[k]
-                                        break
-                            else:
-                                for k in ("content", "text", "message", "output", "output_text"):
-                                    cand_contents = getattr(item, k, None)
-                                    if cand_contents:
-                                        break
-                            if isinstance(cand_contents, str):
-                                if cand_contents.strip():
-                                    text_pieces.append(cand_contents.strip())
-                                continue
-                            if isinstance(cand_contents, (list, tuple)):
-                                for c in cand_contents:
-                                    if c is None:
-                                        continue
-                                    if isinstance(c, str):
-                                        if c.strip():
-                                            text_pieces.append(c.strip())
-                                        continue
-                                    if isinstance(c, dict):
-                                        t = c.get("text") or c.get("content")
-                                    else:
-                                        t = getattr(c, "text", None) or getattr(c, "content", None)
-                                    if t:
-                                        text_pieces.append(str(t).strip())
-                                continue
-                            direct = None
-                            if isinstance(item, dict):
-                                direct = item.get("text") or item.get("output_text") or item.get("message")
-                            else:
-                                direct = getattr(item, "text", None) or getattr(item, "output_text", None) or getattr(item, "message", None)
-                            if direct:
-                                text_pieces.append(str(direct).strip())
-                        if not text_pieces:
-                            top_text = None
-                            if isinstance(response, dict):
-                                top_text = response.get("text") or response.get("message")
-                            else:
-                                top_text = getattr(response, "text", None) or getattr(response, "message", None)
-                            if top_text:
-                                text_pieces.append(str(top_text).strip())
-                        # dedupe preserving order
-                        seen = set()
-                        filtered = []
-                        for t in text_pieces:
-                            if not isinstance(t, str):
-                                continue
-                            if t and t not in seen:
-                                filtered.append(t)
-                                seen.add(t)
-                        out = "\n\n".join(filtered)
                     except Exception as e:
-                        # Capture clear error to UI and include debug_info
                         tb = traceback.format_exc()
-                        st.session_state["last_error"] = f"Responses API error: {e}\n\nDebug: {debug_info}\n\nTraceback:\n{tb}"
                         st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
                         out = ""
-                # post-process output
                 if out:
                     out = remove_prompt_echo(prompt_text, out)
                     p = prompt_text
@@ -540,7 +670,8 @@ if generate_now and not st.session_state.get("busy"):
             except Exception as e:
                 tb = traceback.format_exc()
-                st.session_state["last_error"] = f"{e}\n\nDebug: {locals().get('debug_info', debug_info)}\n\nTraceback:\n{tb}"
                 st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
             finally:
                 st.session_state["busy"] = False

 from glob import glob
 from pathlib import Path
 from difflib import SequenceMatcher
+import json
+import logging
 import yt_dlp
 import ffmpeg
 load_dotenv()
+# Optional PHI integration
 try:
     from phi.agent import Agent
     from phi.model.google import Gemini
     from phi.tools.duckduckgo import DuckDuckGo
     HAS_PHI = True
 except Exception:
     Agent = Gemini = DuckDuckGo = None
     HAS_PHI = False
+# google.generativeai SDK
 try:
     import google.generativeai as genai
+    from google.generativeai import upload_file, get_file
     HAS_GENAI = True
 except Exception:
     genai = None
     upload_file = get_file = None
     HAS_GENAI = False
+logging.basicConfig(level=logging.INFO)
 st.set_page_config(page_title="Generate the story of videos", layout="wide")
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
+# ---- Defaults & constants ----
+MODEL_OPTIONS = [
+    "gemini-2.5-flash",
+    "gemini-2.5-flash-lite",
+    "gemini-2.0-flash",
+    "gemini-2.0-flash-lite",
+    "custom",
+]
+DEFAULT_MODEL = "gemini-2.0-flash-lite"
+DEFAULT_PROMPT = (
+    "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. "
+    "Keep language professional. Include a list of observations for notable events."
+)
+# ---- Session defaults ----
+st.session_state.setdefault("url", "")
 st.session_state.setdefault("videos", "")
 st.session_state.setdefault("loop_video", False)
 st.session_state.setdefault("uploaded_file", None)
 st.session_state.setdefault("analysis_out", "")
 st.session_state.setdefault("last_error", "")
 st.session_state.setdefault("file_hash", None)
 st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
 st.session_state.setdefault("last_model", "")
 st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
 st.session_state.setdefault("last_url_value", "")
+st.session_state.setdefault("processing_timeout", 900)
+st.session_state.setdefault("generation_timeout", 300)
+st.session_state.setdefault("compress_threshold_mb", 200)
+# ---- Helpers ----
 def sanitize_filename(path_str: str):
     name = Path(path_str).name
     return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
 def file_sha256(path: str, block_size: int = 65536) -> str:
+    try:
+        h = hashlib.sha256()
+        with open(path, "rb") as f:
+            for chunk in iter(lambda: f.read(block_size), b""):
+                h.update(chunk)
+        return h.hexdigest()
+    except Exception:
+        return None
 def convert_video_to_mp4(video_path: str) -> str:
     target_path = str(Path(video_path).with_suffix(".mp4"))
     if os.path.exists(target_path):
         return target_path
     try:
+        ffmpeg.input(video_path).output(target_path).run(overwrite_output=True, quiet=True)
+    except Exception as e:
+        logging.exception("ffmpeg conversion failed")
+        # If conversion fails, do not delete original; re-raise for caller to handle if needed
+        raise
+    # Only remove source if target exists and is non-empty
+    if os.path.exists(target_path) and os.path.getsize(target_path) > 0:
+        try:
+            if str(Path(video_path).resolve()) != str(Path(target_path).resolve()):
+                os.remove(video_path)
+        except Exception:
+            logging.exception("Failed to remove original video after conversion")
     return target_path
 def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
     try:
+        ffmpeg.input(input_path).output(
+            target_path, vcodec="libx264", crf=crf, preset=preset
+        ).run(overwrite_output=True, quiet=True)
+        if os.path.exists(target_path) and os.path.getsize(target_path) > 0:
+            return target_path
+        logging.warning("Compression completed but target missing or empty; returning input path")
+        return input_path
     except Exception:
+        logging.exception("Video compression failed")
         return input_path
 def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
     outtmpl = str(Path(save_dir) / "%(id)s.%(ext)s")
     ydl_opts = {"outtmpl": outtmpl, "format": "best"}
     if video_password:
+        # yt-dlp accepts 'videopassword' in options for password-protected videos
         ydl_opts["videopassword"] = video_password
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         info = ydl.extract_info(url, download=True)
+    # info may be a dict for single video or playlist; prefer single entry if present
+    video_candidates = []
+    if isinstance(info, dict):
+        # playlist -> entries list
+        entries = info.get("entries")
+        if entries:
+            # get last-downloaded entry (entries may be nested); map to filesystem files by ids
+            for e in entries:
+                if isinstance(e, dict) and e.get("id"):
+                    video_candidates.append(str(Path(save_dir) / f"{e['id']}.mp4"))
+        else:
+            vid = info.get("id")
+            ext = info.get("ext") or "mp4"
+            if vid:
+                video_candidates.append(str(Path(save_dir) / f"{vid}.{ext}"))
+    # fallback: pick most recent file in dir
+    if not video_candidates:
         all_files = glob(os.path.join(save_dir, "*"))
+        if not all_files:
+            raise FileNotFoundError("Downloaded video not found")
+        matches = sorted(all_files, key=os.path.getmtime, reverse=True)
+        chosen = matches[0]
+    else:
+        # prefer existing files among candidates; pick first that exists, else fall back to newest
+        existing = [p for p in video_candidates if os.path.exists(p)]
+        chosen = existing[0] if existing else (sorted(glob(os.path.join(save_dir, "*")), key=os.path.getmtime, reverse=True)[0])
+    # Ensure mp4 target
+    final = convert_video_to_mp4(chosen)
+    return final
 def file_name_or_id(file_obj):
     if file_obj is None:
         return None
     if isinstance(file_obj, dict):
         return file_obj.get("name") or file_obj.get("id")
+    # common SDK wrappers may expose 'name', 'id', 'fileId'
     return getattr(file_obj, "name", None) or getattr(file_obj, "id", None) or getattr(file_obj, "fileId", None)
 def get_effective_api_key():
     try:
         genai.configure(api_key=key)
     except Exception:
+        logging.exception("genai.configure failed")
     return True
+# ---- Agent management (reuse) ----
 _agent = None
 def maybe_create_agent(model_id: str):
     global _agent
         _agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
         st.session_state["last_model"] = model_id
     except Exception:
+        logging.exception("Failed to create PHI Agent")
         _agent = None
     return _agent
         try:
             os.remove(f)
         except Exception:
+            logging.exception("Failed to remove data file during clear_all_video_state")
+# Reset when URL changes
 current_url = st.session_state.get("url", "")
 if current_url != st.session_state.get("last_url_value"):
     clear_all_video_state()
     st.session_state["last_url_value"] = current_url
+# ---- Sidebar UI ----
 st.sidebar.header("Video Input")
 st.sidebar.text_input("Video URL", key="url", placeholder="https://")
 settings_exp = st.sidebar.expander("Settings", expanded=False)
+model_choice = settings_exp.selectbox("Select model", options=MODEL_OPTIONS, index=MODEL_OPTIONS.index(DEFAULT_MODEL) if DEFAULT_MODEL in MODEL_OPTIONS else 0)
+if model_choice == "custom":
+    model_input = settings_exp.text_input("Custom model id", value=DEFAULT_MODEL, key="model_input")
+    model_selected = model_input.strip() or DEFAULT_MODEL
+else:
+    # keep model_input in session_state for later reads
+    st.session_state["model_input"] = model_choice
+    model_selected = model_choice
 settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
+analysis_prompt = settings_exp.text_area("Analysis prompt", value=DEFAULT_PROMPT, height=140)
 settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
+settings_exp.number_input(
+    "Processing timeout (s)", min_value=60, max_value=3600,
+    value=st.session_state.get("processing_timeout", 900), step=30,
+    key="processing_timeout",
+)
+settings_exp.number_input(
+    "Generation timeout (s)", min_value=30, max_value=1800,
+    value=st.session_state.get("generation_timeout", 300), step=10,
+    key="generation_timeout",
+)
+settings_exp.number_input(
+    "Optional compression threshold (MB)", min_value=10, max_value=2000,
+    value=st.session_state.get("compress_threshold_mb", 200), step=10,
+    key="compress_threshold_mb",
+)
 key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
 settings_exp.caption(f"Using API key from: **{key_source}**")
 if not get_effective_api_key():
     settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
     {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
 ]
+# ---- Upload & processing helpers ----
 def upload_video_sdk(filepath: str):
     key = get_effective_api_key()
     if not key:
     if not HAS_GENAI or upload_file is None:
         raise RuntimeError("google.generativeai SDK not available; cannot upload")
     genai.configure(api_key=key)
+    # upload_file may return object with id or name, keep as-is
     return upload_file(filepath)
+def wait_for_processed(file_obj, timeout: int = None):
+    if timeout is None:
+        timeout = st.session_state.get("processing_timeout", 900)
     if not HAS_GENAI or get_file is None:
         return file_obj
     start = time.time()
         return file_obj
     backoff = 1.0
     while True:
+        try:
+            obj = get_file(name)
+        except Exception as e:
+            if time.time() - start > timeout:
+                raise TimeoutError(f"Failed to fetch file status before timeout: {e}")
+            time.sleep(backoff)
+            backoff = min(backoff * 2, 8.0)
+            continue
         state = getattr(obj, "state", None)
         if not state or getattr(state, "name", None) != "PROCESSING":
             return obj
         if time.time() - start > timeout:
+            raise TimeoutError(f"File processing timed out after {int(time.time() - start)}s")
         time.sleep(backoff)
         backoff = min(backoff * 2, 8.0)
             return b_full[len(ph):].lstrip(" \n:-")
     return text
+def compress_video_if_large(local_path: str, threshold_mb: int = 200):
+    try:
+        file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
+    except Exception as e:
+        st.session_state["last_error"] = f"Failed to stat file before compression: {e}"
+        return local_path, False
+    if file_size_mb <= threshold_mb:
+        return local_path, False
+    # build compressed path reliably
+    p = Path(local_path)
+    compressed_name = f"{p.stem}_compressed.mp4"
+    compressed_path = str(p.with_name(compressed_name))
+    try:
+        result = compress_video(local_path, compressed_path, crf=28, preset="fast")
+        if result and os.path.exists(result) and os.path.getsize(result) > 0:
+            return result, True
+        return local_path, False
+    except Exception as e:
+        st.session_state["last_error"] = f"Video compression failed: {e}\n{traceback.format_exc()}"
+        return local_path, False
+# ---- Responses API integration ----
+def generate_via_responses_api(prompt_text: str, processed, model_used: str, max_tokens: int = 1024, timeout: int = 300):
+    key = get_effective_api_key()
+    if not key:
+        raise RuntimeError("No API key provided")
+    if not HAS_GENAI or genai is None:
+        raise RuntimeError("Responses API not available; install google.generativeai SDK.")
+    genai.configure(api_key=key)
+    fname = file_name_or_id(processed)
+    if not fname:
+        raise RuntimeError("Uploaded file missing name/id")
+    system_msg = {"role": "system", "content": prompt_text}
+    user_msg = {"role": "user", "content": "Please summarize the attached video."}
+    call_variants = [
+        {"messages": [system_msg, user_msg], "files": [{"name": fname}], "safety_settings": safety_settings, "max_output_tokens": max_tokens},
+        {"input": [{"text": prompt_text, "files": [{"name": fname}]}], "safety_settings": safety_settings, "max_output_tokens": max_tokens},
+    ]
+    last_exc = None
+    start = time.time()
+    backoff = 1.0
+    while True:
+        for payload in call_variants:
+            try:
+                response = genai.responses.generate(model=model_used, **payload)
+                return _normalize_genai_response(response)
+            except Exception as e:
+                last_exc = e
+                msg = str(e).lower()
+                # retry for transient/server errors
+                if any(k in msg for k in ("internal", "unavailable", "deadlineexceeded", "deadline exceeded", "timeout", "rate limit")):
+                    logging.warning("Transient error from Responses API, will retry: %s", e)
+                    continue
+                logging.exception("Non-retryable Responses API error")
+                raise
+        if time.time() - start > timeout:
+            raise TimeoutError(f"Responses.generate timed out after {timeout}s: last error: {last_exc}")
+        time.sleep(backoff)
+        backoff = min(backoff * 2, 8.0)
+def _normalize_genai_response(response):
+    outputs = []
+    if response is None:
+        return ""
+    if not isinstance(response, dict):
+        try:
+            response = json.loads(str(response))
+        except Exception:
+            pass
+    candidate_lists = []
+    if isinstance(response, dict):
+        for key in ("output", "candidates", "items", "responses", "choices"):
+            val = response.get(key)
+            if isinstance(val, list) and val:
+                candidate_lists.append(val)
+    if not candidate_lists and isinstance(response, dict):
+        for v in response.values():
+            if isinstance(v, list) and v:
+                candidate_lists.append(v)
+                break
+    text_pieces = []
+    for lst in candidate_lists:
+        for item in lst:
+            if not item:
+                continue
+            if isinstance(item, dict):
+                for k in ("content", "text", "message", "output_text", "output"):
+                    t = item.get(k)
+                    if t:
+                        text_pieces.append(str(t).strip())
+                        break
+                else:
+                    if "content" in item and isinstance(item["content"], list):
+                        for part in item["content"]:
+                            if isinstance(part, dict):
+                                t = part.get("text") or part.get("content")
+                                if t:
+                                    text_pieces.append(str(t).strip())
+                            elif isinstance(part, str):
+                                text_pieces.append(part.strip())
+            elif isinstance(item, str):
+                text_pieces.append(item.strip())
+            else:
+                try:
+                    t = getattr(item, "text", None) or getattr(item, "content", None)
+                    if t:
+                        text_pieces.append(str(t).strip())
+                except Exception:
+                    pass
+    if not text_pieces and isinstance(response, dict):
+        for k in ("text", "message", "output_text"):
+            v = response.get(k)
+            if v:
+                text_pieces.append(str(v).strip())
+                break
+    seen = set()
+    filtered = []
+    for t in text_pieces:
+        if not isinstance(t, str):
+            continue
+        if t and t not in seen:
+            filtered.append(t)
+            seen.add(t)
+    return "\n\n".join(filtered).strip()
+# ---- small helpers for safer tracebacks ----
+def safe_traceback(max_chars=2000):
+    tb = traceback.format_exc()
+    return tb if len(tb) <= max_chars else tb[:max_chars] + "\n...[truncated]"
+def scrub_api_keys(s: str) -> str:
+    if not s:
+        return s
+    key = get_effective_api_key()
+    if key and key in s:
+        return s.replace(key, "[REDACTED_API_KEY]")
+    return s
+# ---- Layout ----
 col1, col2 = st.columns([1, 3])
 with col1:
     generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
 with col2:
+    st.write("")
 if st.sidebar.button("Load Video", use_container_width=True):
     try:
         except Exception:
             st.session_state["file_hash"] = None
     except Exception as e:
+        logging.exception("Failed to load video")
         st.sidebar.error(f"Failed to load video: {e}")
 if st.session_state["videos"]:
     try:
         file_size_mb = os.path.getsize(st.session_state["videos"]) / (1024 * 1024)
         st.sidebar.caption(f"File size: {file_size_mb:.1f} MB")
+        if file_size_mb > st.session_state.get("compress_threshold_mb", 200):
+            st.sidebar.warning(f"Large file detected — it will be compressed automatically before upload (>{st.session_state.get('compress_threshold_mb')} MB).", icon="⚠️")
     except Exception:
         pass
+# ---- Main generation flow ----
 if generate_now and not st.session_state.get("busy"):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
                     if HAS_GENAI and genai is not None:
                         genai.configure(api_key=key_to_use)
                 except Exception:
+                    logging.exception("genai.configure failed at start")
+                model_id = (st.session_state.get("model_input") or model_selected or DEFAULT_MODEL).strip()
                 if st.session_state.get("last_model") != model_id:
                     st.session_state["last_model"] = ""
                 maybe_create_agent(model_id)
                 except Exception:
                     current_hash = None
+                # determine if reupload is needed: same local path + same hash + we have uploaded/processed file id
                 reupload_needed = True
+                uploaded_file = st.session_state.get("uploaded_file")
+                uploaded_name = file_name_or_id(uploaded_file)
+                if processed and st.session_state.get("last_loaded_path") == current_path and st.session_state.get("file_hash") == current_hash and uploaded_name:
                     reupload_needed = False
                 if reupload_needed:
                     if not HAS_GENAI:
                         raise RuntimeError("google.generativeai SDK not available; install it.")
                     local_path = current_path
+                    upload_path, compressed = compress_video_if_large(local_path, threshold_mb=st.session_state.get("compress_threshold_mb", 200))
+                    with st.spinner(f"Uploading video{' (compressed)' if compressed else ''}..."):
                         try:
+                            uploaded = upload_video_sdk(upload_path)
+                        except Exception as e:
+                            err = scrub_api_keys(f"Upload failed: {e}\n\nTraceback:\n{safe_traceback()}")
+                            st.session_state["last_error"] = err
+                            st.error("Upload failed. See Last Error for details.")
+                            raise
+                    try:
+                        processing_placeholder = st.empty()
+                        processing_bar = processing_placeholder.progress(0)
+                        start_time = time.time()
+                        processed = wait_for_processed(uploaded, timeout=st.session_state.get("processing_timeout", 900))
+                        # update progress once after wait (full incremental requires moving polling here)
+                        elapsed = time.time() - start_time
+                        timeout = st.session_state.get("processing_timeout", 900)
+                        pct = min(100, int((elapsed / timeout) * 100)) if timeout > 0 else 0
+                        processing_bar.progress(pct)
+                        processing_placeholder.success("Processing complete")
+                    except Exception as e:
+                        err = scrub_api_keys(f"Processing failed/wait timeout: {e}\n\nTraceback:\n{safe_traceback()}")
+                        st.session_state["last_error"] = err
+                        st.error("Video processing failed or timed out. See Last Error.")
+                        raise
+                    st.session_state["uploaded_file"] = uploaded
+                    st.session_state["processed_file"] = processed
+                    st.session_state["last_loaded_path"] = current_path
+                    st.session_state["file_hash"] = current_hash
+                prompt_text = (analysis_prompt.strip() or DEFAULT_PROMPT).strip()
                 out = ""
+                model_used = model_id
+                max_tokens = 2048 if "2.5" in model_used else 1024
                 est_tokens = max_tokens
+                # Try Agent first, fallback to Responses API
                 agent = maybe_create_agent(model_used)
                 debug_info = {"agent_attempted": False, "agent_ok": False, "agent_error": None, "agent_response_has_text": False}
                 if agent:
                         with st.spinner("Generating description via Agent..."):
                             if not processed:
                                 raise RuntimeError("Processed file missing for agent generation")
                             agent_response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
                             agent_text = getattr(agent_response, "content", None) or getattr(agent_response, "outputText", None) or None
                             if not agent_text:
                                 try:
                                     if isinstance(agent_response, dict):
+                                        for k in ("content", "outputText", "text", "message"):
                                             if k in agent_response and agent_response[k]:
                                                 agent_text = agent_response[k]
                                                 break
                                 debug_info["agent_ok"] = True
                                 debug_info["agent_response_has_text"] = True
                             else:
                                 debug_info["agent_ok"] = False
                     except Exception as ae:
                         debug_info["agent_error"] = f"{ae}"
                         debug_info["agent_traceback"] = traceback.format_exc()
                 if not out:
                     try:
+                        with st.spinner("Generating description via Responses API..."):
+                            out = generate_via_responses_api(prompt_text, processed, model_used, max_tokens=max_tokens, timeout=st.session_state.get("generation_timeout", 300))
                     except Exception as e:
                         tb = traceback.format_exc()
+                        st.session_state["last_error"] = scrub_api_keys(f"Responses API error: {e}\n\nDebug: {debug_info}\n\nTraceback:\n{safe_traceback()}")
                         st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
                         out = ""
                 if out:
                     out = remove_prompt_echo(prompt_text, out)
                     p = prompt_text
             except Exception as e:
                 tb = traceback.format_exc()
+                err = scrub_api_keys(f"{e}\n\nDebug: {locals().get('debug_info', {})}\n\nTraceback:\n{safe_traceback()}")
+                st.session_state["last_error"] = err
                 st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
             finally:
                 st.session_state["busy"] = False