Video-Analysis-Tool

Sleeping

App Files Files Community

CB commited on Sep 15, 2025

Commit

d9fde4e

verified ·

1 Parent(s): 8fea353

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +204 -335

streamlit_app.py CHANGED Viewed

@@ -7,60 +7,37 @@ import traceback
 from glob import glob
 from pathlib import Path
 from difflib import SequenceMatcher
-import json
-import logging
 import yt_dlp
-import ffmpeg  # ffmpeg-python
 import streamlit as st
 from dotenv import load_dotenv
 load_dotenv()
-# Optional PHI integration
 try:
     from phi.agent import Agent
     from phi.model.google import Gemini
     from phi.tools.duckduckgo import DuckDuckGo
     HAS_PHI = True
 except Exception:
     Agent = Gemini = DuckDuckGo = None
     HAS_PHI = False
-# google.generativeai SDK
 try:
     import google.generativeai as genai
-    from google.generativeai import upload_file, get_file
     HAS_GENAI = True
 except Exception:
     genai = None
     upload_file = get_file = None
     HAS_GENAI = False
-logging.basicConfig(level=logging.INFO)
 st.set_page_config(page_title="Generate the story of videos", layout="wide")
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
-# ---- Defaults & constants ----
-MODEL_OPTIONS = [
-    "gemini-2.5-flash",
-    "gemini-2.5-flash-lite",
-    "gemini-2.0-flash",
-    "gemini-2.0-flash-lite",
-    "custom",
-]
-DEFAULT_MODEL = "gemini-2.0-flash-lite"
-DEFAULT_PROMPT = (
-    "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. "
-    "Keep language professional. Include a list of observations for notable events."
-)
-# ---- Session defaults ----
-st.session_state.setdefault("url", "")
 st.session_state.setdefault("videos", "")
 st.session_state.setdefault("loop_video", False)
 st.session_state.setdefault("uploaded_file", None)
@@ -70,59 +47,39 @@ st.session_state.setdefault("last_loaded_path", "")
 st.session_state.setdefault("analysis_out", "")
 st.session_state.setdefault("last_error", "")
 st.session_state.setdefault("file_hash", None)
 st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
 st.session_state.setdefault("last_model", "")
 st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
 st.session_state.setdefault("last_url_value", "")
-st.session_state.setdefault("processing_timeout", 900)
-st.session_state.setdefault("generation_timeout", 300)
-st.session_state.setdefault("compress_threshold_mb", 200)
-# ---- Helpers ----
 def sanitize_filename(path_str: str):
     name = Path(path_str).name
     return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
 def file_sha256(path: str, block_size: int = 65536) -> str:
-    try:
-        h = hashlib.sha256()
-        with open(path, "rb") as f:
-            for chunk in iter(lambda: f.read(block_size), b""):
-                h.update(chunk)
-        return h.hexdigest()
-    except Exception:
-        return None
 def convert_video_to_mp4(video_path: str) -> str:
     target_path = str(Path(video_path).with_suffix(".mp4"))
     if os.path.exists(target_path):
         return target_path
     try:
-        ffmpeg.input(video_path).output(target_path).run(overwrite_output=True, quiet=True)
-    except Exception as e:
-        logging.exception("ffmpeg conversion failed")
-        # If conversion fails, do not delete original; re-raise for caller to handle if needed
-        raise
-    # Only remove source if target exists and is non-empty
-    if os.path.exists(target_path) and os.path.getsize(target_path) > 0:
-        try:
-            if str(Path(video_path).resolve()) != str(Path(target_path).resolve()):
-                os.remove(video_path)
-        except Exception:
-            logging.exception("Failed to remove original video after conversion")
     return target_path
 def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
     try:
-        ffmpeg.input(input_path).output(
-            target_path, vcodec="libx264", crf=crf, preset=preset
-        ).run(overwrite_output=True, quiet=True)
-        if os.path.exists(target_path) and os.path.getsize(target_path) > 0:
-            return target_path
-        logging.warning("Compression completed but target missing or empty; returning input path")
-        return input_path
     except Exception:
-        logging.exception("Video compression failed")
         return input_path
 def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
@@ -131,46 +88,24 @@ def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) ->
     outtmpl = str(Path(save_dir) / "%(id)s.%(ext)s")
     ydl_opts = {"outtmpl": outtmpl, "format": "best"}
     if video_password:
-        # yt-dlp accepts 'videopassword' in options for password-protected videos
         ydl_opts["videopassword"] = video_password
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         info = ydl.extract_info(url, download=True)
-    # info may be a dict for single video or playlist; prefer single entry if present
-    video_candidates = []
-    if isinstance(info, dict):
-        # playlist -> entries list
-        entries = info.get("entries")
-        if entries:
-            # get last-downloaded entry (entries may be nested); map to filesystem files by ids
-            for e in entries:
-                if isinstance(e, dict) and e.get("id"):
-                    video_candidates.append(str(Path(save_dir) / f"{e['id']}.mp4"))
-        else:
-            vid = info.get("id")
-            ext = info.get("ext") or "mp4"
-            if vid:
-                video_candidates.append(str(Path(save_dir) / f"{vid}.{ext}"))
-    # fallback: pick most recent file in dir
-    if not video_candidates:
-        all_files = glob(os.path.join(save_dir, "*"))
-        if not all_files:
-            raise FileNotFoundError("Downloaded video not found")
-        matches = sorted(all_files, key=os.path.getmtime, reverse=True)
-        chosen = matches[0]
     else:
-        # prefer existing files among candidates; pick first that exists, else fall back to newest
-        existing = [p for p in video_candidates if os.path.exists(p)]
-        chosen = existing[0] if existing else (sorted(glob(os.path.join(save_dir, "*")), key=os.path.getmtime, reverse=True)[0])
-    # Ensure mp4 target
-    final = convert_video_to_mp4(chosen)
-    return final
 def file_name_or_id(file_obj):
     if file_obj is None:
         return None
     if isinstance(file_obj, dict):
         return file_obj.get("name") or file_obj.get("id")
-    # common SDK wrappers may expose 'name', 'id', 'fileId'
     return getattr(file_obj, "name", None) or getattr(file_obj, "id", None) or getattr(file_obj, "fileId", None)
 def get_effective_api_key():
@@ -183,10 +118,9 @@ def configure_genai_if_needed():
     try:
         genai.configure(api_key=key)
     except Exception:
-        logging.exception("genai.configure failed")
     return True
-# ---- Agent management (reuse) ----
 _agent = None
 def maybe_create_agent(model_id: str):
     global _agent
@@ -201,7 +135,6 @@ def maybe_create_agent(model_id: str):
         _agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
         st.session_state["last_model"] = model_id
     except Exception:
-        logging.exception("Failed to create PHI Agent")
         _agent = None
     return _agent
@@ -217,50 +150,31 @@ def clear_all_video_state():
         try:
             os.remove(f)
         except Exception:
-            logging.exception("Failed to remove data file during clear_all_video_state")
-# Reset when URL changes
 current_url = st.session_state.get("url", "")
 if current_url != st.session_state.get("last_url_value"):
     clear_all_video_state()
     st.session_state["last_url_value"] = current_url
-# ---- Sidebar UI ----
 st.sidebar.header("Video Input")
 st.sidebar.text_input("Video URL", key="url", placeholder="https://")
 settings_exp = st.sidebar.expander("Settings", expanded=False)
-model_choice = settings_exp.selectbox("Select model", options=MODEL_OPTIONS, index=MODEL_OPTIONS.index(DEFAULT_MODEL) if DEFAULT_MODEL in MODEL_OPTIONS else 0)
-if model_choice == "custom":
-    model_input = settings_exp.text_input("Custom model id", value=DEFAULT_MODEL, key="model_input")
-    model_selected = model_input.strip() or DEFAULT_MODEL
-else:
-    # keep model_input in session_state for later reads
-    st.session_state["model_input"] = model_choice
-    model_selected = model_choice
 settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
-analysis_prompt = settings_exp.text_area("Analysis prompt", value=DEFAULT_PROMPT, height=140)
-settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
-settings_exp.number_input(
-    "Processing timeout (s)", min_value=60, max_value=3600,
-    value=st.session_state.get("processing_timeout", 900), step=30,
-    key="processing_timeout",
-)
-settings_exp.number_input(
-    "Generation timeout (s)", min_value=30, max_value=1800,
-    value=st.session_state.get("generation_timeout", 300), step=10,
-    key="generation_timeout",
-)
-settings_exp.number_input(
-    "Optional compression threshold (MB)", min_value=10, max_value=2000,
-    value=st.session_state.get("compress_threshold_mb", 200), step=10,
-    key="compress_threshold_mb",
 )
 key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
 settings_exp.caption(f"Using API key from: **{key_source}**")
 if not get_effective_api_key():
     settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
@@ -271,7 +185,6 @@ safety_settings = [
     {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
 ]
-# ---- Upload & processing helpers ----
 def upload_video_sdk(filepath: str):
     key = get_effective_api_key()
     if not key:
@@ -279,12 +192,9 @@ def upload_video_sdk(filepath: str):
     if not HAS_GENAI or upload_file is None:
         raise RuntimeError("google.generativeai SDK not available; cannot upload")
     genai.configure(api_key=key)
-    # upload_file may return object with id or name, keep as-is
     return upload_file(filepath)
-def wait_for_processed(file_obj, timeout: int = None):
-    if timeout is None:
-        timeout = st.session_state.get("processing_timeout", 900)
     if not HAS_GENAI or get_file is None:
         return file_obj
     start = time.time()
@@ -293,21 +203,12 @@ def wait_for_processed(file_obj, timeout: int = None):
         return file_obj
     backoff = 1.0
     while True:
-        try:
-            obj = get_file(name)
-        except Exception as e:
-            if time.time() - start > timeout:
-                raise TimeoutError(f"Failed to fetch file status before timeout: {e}")
-            time.sleep(backoff)
-            backoff = min(backoff * 2, 8.0)
-            continue
         state = getattr(obj, "state", None)
         if not state or getattr(state, "name", None) != "PROCESSING":
             return obj
         if time.time() - start > timeout:
-            raise TimeoutError(f"File processing timed out after {int(time.time() - start)}s")
         time.sleep(backoff)
         backoff = min(backoff * 2, 8.0)
@@ -330,161 +231,11 @@ def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_thres
             return b_full[len(ph):].lstrip(" \n:-")
     return text
-def compress_video_if_large(local_path: str, threshold_mb: int = 200):
-    try:
-        file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
-    except Exception as e:
-        st.session_state["last_error"] = f"Failed to stat file before compression: {e}"
-        return local_path, False
-    if file_size_mb <= threshold_mb:
-        return local_path, False
-    # build compressed path reliably
-    p = Path(local_path)
-    compressed_name = f"{p.stem}_compressed.mp4"
-    compressed_path = str(p.with_name(compressed_name))
-    try:
-        result = compress_video(local_path, compressed_path, crf=28, preset="fast")
-        if result and os.path.exists(result) and os.path.getsize(result) > 0:
-            return result, True
-        return local_path, False
-    except Exception as e:
-        st.session_state["last_error"] = f"Video compression failed: {e}\n{traceback.format_exc()}"
-        return local_path, False
-# ---- Responses API integration ----
-def generate_via_responses_api(prompt_text: str, processed, model_used: str, max_tokens: int = 1024, timeout: int = 300):
-    key = get_effective_api_key()
-    if not key:
-        raise RuntimeError("No API key provided")
-    if not HAS_GENAI or genai is None:
-        raise RuntimeError("Responses API not available; install google.generativeai SDK.")
-    genai.configure(api_key=key)
-    fname = file_name_or_id(processed)
-    if not fname:
-        raise RuntimeError("Uploaded file missing name/id")
-    system_msg = {"role": "system", "content": prompt_text}
-    user_msg = {"role": "user", "content": "Please summarize the attached video."}
-    call_variants = [
-        {"messages": [system_msg, user_msg], "files": [{"name": fname}], "safety_settings": safety_settings, "max_output_tokens": max_tokens},
-        {"input": [{"text": prompt_text, "files": [{"name": fname}]}], "safety_settings": safety_settings, "max_output_tokens": max_tokens},
-    ]
-    last_exc = None
-    start = time.time()
-    backoff = 1.0
-    while True:
-        for payload in call_variants:
-            try:
-                response = genai.responses.generate(model=model_used, **payload)
-                return _normalize_genai_response(response)
-            except Exception as e:
-                last_exc = e
-                msg = str(e).lower()
-                # retry for transient/server errors
-                if any(k in msg for k in ("internal", "unavailable", "deadlineexceeded", "deadline exceeded", "timeout", "rate limit")):
-                    logging.warning("Transient error from Responses API, will retry: %s", e)
-                    continue
-                logging.exception("Non-retryable Responses API error")
-                raise
-        if time.time() - start > timeout:
-            raise TimeoutError(f"Responses.generate timed out after {timeout}s: last error: {last_exc}")
-        time.sleep(backoff)
-        backoff = min(backoff * 2, 8.0)
-def _normalize_genai_response(response):
-    outputs = []
-    if response is None:
-        return ""
-    if not isinstance(response, dict):
-        try:
-            response = json.loads(str(response))
-        except Exception:
-            pass
-    candidate_lists = []
-    if isinstance(response, dict):
-        for key in ("output", "candidates", "items", "responses", "choices"):
-            val = response.get(key)
-            if isinstance(val, list) and val:
-                candidate_lists.append(val)
-    if not candidate_lists and isinstance(response, dict):
-        for v in response.values():
-            if isinstance(v, list) and v:
-                candidate_lists.append(v)
-                break
-    text_pieces = []
-    for lst in candidate_lists:
-        for item in lst:
-            if not item:
-                continue
-            if isinstance(item, dict):
-                for k in ("content", "text", "message", "output_text", "output"):
-                    t = item.get(k)
-                    if t:
-                        text_pieces.append(str(t).strip())
-                        break
-                else:
-                    if "content" in item and isinstance(item["content"], list):
-                        for part in item["content"]:
-                            if isinstance(part, dict):
-                                t = part.get("text") or part.get("content")
-                                if t:
-                                    text_pieces.append(str(t).strip())
-                            elif isinstance(part, str):
-                                text_pieces.append(part.strip())
-            elif isinstance(item, str):
-                text_pieces.append(item.strip())
-            else:
-                try:
-                    t = getattr(item, "text", None) or getattr(item, "content", None)
-                    if t:
-                        text_pieces.append(str(t).strip())
-                except Exception:
-                    pass
-    if not text_pieces and isinstance(response, dict):
-        for k in ("text", "message", "output_text"):
-            v = response.get(k)
-            if v:
-                text_pieces.append(str(v).strip())
-                break
-    seen = set()
-    filtered = []
-    for t in text_pieces:
-        if not isinstance(t, str):
-            continue
-        if t and t not in seen:
-            filtered.append(t)
-            seen.add(t)
-    return "\n\n".join(filtered).strip()
-# ---- small helpers for safer tracebacks ----
-def safe_traceback(max_chars=2000):
-    tb = traceback.format_exc()
-    return tb if len(tb) <= max_chars else tb[:max_chars] + "\n...[truncated]"
-def scrub_api_keys(s: str) -> str:
-    if not s:
-        return s
-    key = get_effective_api_key()
-    if key and key in s:
-        return s.replace(key, "[REDACTED_API_KEY]")
-    return s
-# ---- Layout ----
 col1, col2 = st.columns([1, 3])
 with col1:
     generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
 with col2:
-    st.write("")
 if st.sidebar.button("Load Video", use_container_width=True):
     try:
@@ -499,7 +250,6 @@ if st.sidebar.button("Load Video", use_container_width=True):
         except Exception:
             st.session_state["file_hash"] = None
     except Exception as e:
-        logging.exception("Failed to load video")
         st.sidebar.error(f"Failed to load video: {e}")
 if st.session_state["videos"]:
@@ -525,12 +275,12 @@ if st.session_state["videos"]:
     try:
         file_size_mb = os.path.getsize(st.session_state["videos"]) / (1024 * 1024)
         st.sidebar.caption(f"File size: {file_size_mb:.1f} MB")
-        if file_size_mb > st.session_state.get("compress_threshold_mb", 200):
-            st.sidebar.warning(f"Large file detected — it will be compressed automatically before upload (>{st.session_state.get('compress_threshold_mb')} MB).", icon="⚠️")
     except Exception:
         pass
-# ---- Main generation flow ----
 if generate_now and not st.session_state.get("busy"):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
@@ -545,9 +295,9 @@ if generate_now and not st.session_state.get("busy"):
                     if HAS_GENAI and genai is not None:
                         genai.configure(api_key=key_to_use)
                 except Exception:
-                    logging.exception("genai.configure failed at start")
-                model_id = (st.session_state.get("model_input") or model_selected or DEFAULT_MODEL).strip()
                 if st.session_state.get("last_model") != model_id:
                     st.session_state["last_model"] = ""
                 maybe_create_agent(model_id)
@@ -559,57 +309,51 @@ if generate_now and not st.session_state.get("busy"):
                 except Exception:
                     current_hash = None
-                # determine if reupload is needed: same local path + same hash + we have uploaded/processed file id
                 reupload_needed = True
-                uploaded_file = st.session_state.get("uploaded_file")
-                uploaded_name = file_name_or_id(uploaded_file)
-                if processed and st.session_state.get("last_loaded_path") == current_path and st.session_state.get("file_hash") == current_hash and uploaded_name:
                     reupload_needed = False
                 if reupload_needed:
                     if not HAS_GENAI:
                         raise RuntimeError("google.generativeai SDK not available; install it.")
                     local_path = current_path
-                    upload_path, compressed = compress_video_if_large(local_path, threshold_mb=st.session_state.get("compress_threshold_mb", 200))
-                    with st.spinner(f"Uploading video{' (compressed)' if compressed else ''}..."):
                         try:
-                            uploaded = upload_video_sdk(upload_path)
-                        except Exception as e:
-                            err = scrub_api_keys(f"Upload failed: {e}\n\nTraceback:\n{safe_traceback()}")
-                            st.session_state["last_error"] = err
-                            st.error("Upload failed. See Last Error for details.")
-                            raise
-                    try:
-                        processing_placeholder = st.empty()
-                        processing_bar = processing_placeholder.progress(0)
-                        start_time = time.time()
-                        processed = wait_for_processed(uploaded, timeout=st.session_state.get("processing_timeout", 900))
-                        # update progress once after wait (full incremental requires moving polling here)
-                        elapsed = time.time() - start_time
-                        timeout = st.session_state.get("processing_timeout", 900)
-                        pct = min(100, int((elapsed / timeout) * 100)) if timeout > 0 else 0
-                        processing_bar.progress(pct)
-                        processing_placeholder.success("Processing complete")
-                    except Exception as e:
-                        err = scrub_api_keys(f"Processing failed/wait timeout: {e}\n\nTraceback:\n{safe_traceback()}")
-                        st.session_state["last_error"] = err
-                        st.error("Video processing failed or timed out. See Last Error.")
-                        raise
-                    st.session_state["uploaded_file"] = uploaded
-                    st.session_state["processed_file"] = processed
-                    st.session_state["last_loaded_path"] = current_path
-                    st.session_state["file_hash"] = current_hash
-                prompt_text = (analysis_prompt.strip() or DEFAULT_PROMPT).strip()
                 out = ""
-                model_used = model_id
-                max_tokens = 2048 if "2.5" in model_used else 1024
                 est_tokens = max_tokens
-                # Try Agent first, fallback to Responses API
                 agent = maybe_create_agent(model_used)
                 debug_info = {"agent_attempted": False, "agent_ok": False, "agent_error": None, "agent_response_has_text": False}
                 if agent:
@@ -618,12 +362,16 @@ if generate_now and not st.session_state.get("busy"):
                         with st.spinner("Generating description via Agent..."):
                             if not processed:
                                 raise RuntimeError("Processed file missing for agent generation")
                             agent_response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
                             agent_text = getattr(agent_response, "content", None) or getattr(agent_response, "outputText", None) or None
                             if not agent_text:
                                 try:
                                     if isinstance(agent_response, dict):
-                                        for k in ("content", "outputText", "text", "message"):
                                             if k in agent_response and agent_response[k]:
                                                 agent_text = agent_response[k]
                                                 break
@@ -634,21 +382,143 @@ if generate_now and not st.session_state.get("busy"):
                                 debug_info["agent_ok"] = True
                                 debug_info["agent_response_has_text"] = True
                             else:
                                 debug_info["agent_ok"] = False
                     except Exception as ae:
                         debug_info["agent_error"] = f"{ae}"
                         debug_info["agent_traceback"] = traceback.format_exc()
                 if not out:
                     try:
-                        with st.spinner("Generating description via Responses API..."):
-                            out = generate_via_responses_api(prompt_text, processed, model_used, max_tokens=max_tokens, timeout=st.session_state.get("generation_timeout", 300))
                     except Exception as e:
                         tb = traceback.format_exc()
-                        st.session_state["last_error"] = scrub_api_keys(f"Responses API error: {e}\n\nDebug: {debug_info}\n\nTraceback:\n{safe_traceback()}")
                         st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
                         out = ""
                 if out:
                     out = remove_prompt_echo(prompt_text, out)
                     p = prompt_text
@@ -670,8 +540,7 @@ if generate_now and not st.session_state.get("busy"):
             except Exception as e:
                 tb = traceback.format_exc()
-                err = scrub_api_keys(f"{e}\n\nDebug: {locals().get('debug_info', {})}\n\nTraceback:\n{safe_traceback()}")
-                st.session_state["last_error"] = err
                 st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
             finally:
                 st.session_state["busy"] = False

 from glob import glob
 from pathlib import Path
 from difflib import SequenceMatcher
 import yt_dlp
+import ffmpeg
 import streamlit as st
 from dotenv import load_dotenv
 load_dotenv()
 try:
     from phi.agent import Agent
     from phi.model.google import Gemini
     from phi.tools.duckduckgo import DuckDuckGo
     HAS_PHI = True
 except Exception:
     Agent = Gemini = DuckDuckGo = None
     HAS_PHI = False
 try:
     import google.generativeai as genai
+    from google.generativeai import upload_file, get_file  # type: ignore
     HAS_GENAI = True
 except Exception:
     genai = None
     upload_file = get_file = None
     HAS_GENAI = False
 st.set_page_config(page_title="Generate the story of videos", layout="wide")
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
+# Session defaults
 st.session_state.setdefault("videos", "")
 st.session_state.setdefault("loop_video", False)
 st.session_state.setdefault("uploaded_file", None)
 st.session_state.setdefault("analysis_out", "")
 st.session_state.setdefault("last_error", "")
 st.session_state.setdefault("file_hash", None)
+st.session_state.setdefault("fast_mode", False)
 st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
 st.session_state.setdefault("last_model", "")
 st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
 st.session_state.setdefault("last_url_value", "")
 def sanitize_filename(path_str: str):
     name = Path(path_str).name
     return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
 def file_sha256(path: str, block_size: int = 65536) -> str:
+    h = hashlib.sha256()
+    with open(path, "rb") as f:
+        for chunk in iter(lambda: f.read(block_size), b""):
+            h.update(chunk)
+    return h.hexdigest()
 def convert_video_to_mp4(video_path: str) -> str:
     target_path = str(Path(video_path).with_suffix(".mp4"))
     if os.path.exists(target_path):
         return target_path
+    ffmpeg.input(video_path).output(target_path).run(overwrite_output=True, quiet=True)
     try:
+        os.remove(video_path)
+    except Exception:
+        pass
     return target_path
 def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
     try:
+        ffmpeg.input(input_path).output(target_path, vcodec="libx264", crf=crf, preset=preset).run(overwrite_output=True, quiet=True)
+        return target_path
     except Exception:
         return input_path
 def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
     outtmpl = str(Path(save_dir) / "%(id)s.%(ext)s")
     ydl_opts = {"outtmpl": outtmpl, "format": "best"}
     if video_password:
         ydl_opts["videopassword"] = video_password
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         info = ydl.extract_info(url, download=True)
+    video_id = info.get("id") if isinstance(info, dict) else None
+    if video_id:
+        matches = glob(os.path.join(save_dir, f"{video_id}.*"))
     else:
+        all_files = glob(os.path.join(save_dir, "*"))
+        matches = sorted(all_files, key=os.path.getmtime, reverse=True)[:1] if all_files else []
+    if not matches:
+        raise FileNotFoundError("Downloaded video not found")
+    return convert_video_to_mp4(matches[0])
 def file_name_or_id(file_obj):
     if file_obj is None:
         return None
     if isinstance(file_obj, dict):
         return file_obj.get("name") or file_obj.get("id")
     return getattr(file_obj, "name", None) or getattr(file_obj, "id", None) or getattr(file_obj, "fileId", None)
 def get_effective_api_key():
     try:
         genai.configure(api_key=key)
     except Exception:
+        pass
     return True
 _agent = None
 def maybe_create_agent(model_id: str):
     global _agent
         _agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
         st.session_state["last_model"] = model_id
     except Exception:
         _agent = None
     return _agent
         try:
             os.remove(f)
         except Exception:
+            pass
+# track url changes
 current_url = st.session_state.get("url", "")
 if current_url != st.session_state.get("last_url_value"):
     clear_all_video_state()
     st.session_state["last_url_value"] = current_url
 st.sidebar.header("Video Input")
 st.sidebar.text_input("Video URL", key="url", placeholder="https://")
 settings_exp = st.sidebar.expander("Settings", expanded=False)
+model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite", key="model_input")
 settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
+default_prompt = (
+    "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. Keep language professional. Include a list of observations for notable events."
 )
+analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
+settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
+settings_exp.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
+# Show which key is active
 key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
 settings_exp.caption(f"Using API key from: **{key_source}**")
 if not get_effective_api_key():
     settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
     {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
 ]
 def upload_video_sdk(filepath: str):
     key = get_effective_api_key()
     if not key:
     if not HAS_GENAI or upload_file is None:
         raise RuntimeError("google.generativeai SDK not available; cannot upload")
     genai.configure(api_key=key)
     return upload_file(filepath)
+def wait_for_processed(file_obj, timeout=180):
     if not HAS_GENAI or get_file is None:
         return file_obj
     start = time.time()
         return file_obj
     backoff = 1.0
     while True:
+        obj = get_file(name)
         state = getattr(obj, "state", None)
         if not state or getattr(state, "name", None) != "PROCESSING":
             return obj
         if time.time() - start > timeout:
+            raise TimeoutError("File processing timed out")
         time.sleep(backoff)
         backoff = min(backoff * 2, 8.0)
             return b_full[len(ph):].lstrip(" \n:-")
     return text
 col1, col2 = st.columns([1, 3])
 with col1:
     generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
 with col2:
+    pass
 if st.sidebar.button("Load Video", use_container_width=True):
     try:
         except Exception:
             st.session_state["file_hash"] = None
     except Exception as e:
         st.sidebar.error(f"Failed to load video: {e}")
 if st.session_state["videos"]:
     try:
         file_size_mb = os.path.getsize(st.session_state["videos"]) / (1024 * 1024)
         st.sidebar.caption(f"File size: {file_size_mb:.1f} MB")
+        if file_size_mb > 50 and not st.session_state.get("fast_mode", False):
+            st.sidebar.warning("Large file detected — consider enabling Fast mode or compression.", icon="⚠️")
     except Exception:
         pass
+# --- Generation flow ---
 if generate_now and not st.session_state.get("busy"):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
                     if HAS_GENAI and genai is not None:
                         genai.configure(api_key=key_to_use)
                 except Exception:
+                    pass
+                model_id = (st.session_state.get("model_input") or "gemini-2.0-flash-lite").strip()
                 if st.session_state.get("last_model") != model_id:
                     st.session_state["last_model"] = ""
                 maybe_create_agent(model_id)
                 except Exception:
                     current_hash = None
                 reupload_needed = True
+                if processed and st.session_state.get("last_loaded_path") == current_path and st.session_state.get("file_hash") == current_hash:
                     reupload_needed = False
                 if reupload_needed:
                     if not HAS_GENAI:
                         raise RuntimeError("google.generativeai SDK not available; install it.")
                     local_path = current_path
+                    fast_mode = st.session_state.get("fast_mode", False)
+                    upload_path = local_path
+                    try:
+                        file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
+                    except Exception:
+                        file_size_mb = 0
+                    if not fast_mode and file_size_mb > 50:
+                        compressed_path = str(Path(local_path).with_name(Path(local_path).stem + "_compressed.mp4"))
                         try:
+                            preset = "veryfast" if fast_mode else "fast"
+                            upload_path = compress_video(local_path, compressed_path, crf=28, preset=preset)
+                        except Exception:
+                            upload_path = local_path
+                    with st.spinner("Uploading video..."):
+                        uploaded = upload_video_sdk(upload_path)
+                        processed = wait_for_processed(uploaded, timeout=180)
+                        st.session_state["uploaded_file"] = uploaded
+                        st.session_state["processed_file"] = processed
+                        st.session_state["last_loaded_path"] = current_path
+                        st.session_state["file_hash"] = current_hash
+                prompt_text = (analysis_prompt.strip() or default_prompt).strip()
                 out = ""
+                if st.session_state.get("fast_mode"):
+                    model_used = model_id if model_id else "gemini-2.0-flash-lite"
+                    max_tokens = 512
+                else:
+                    model_used = model_id
+                    max_tokens = 1024
                 est_tokens = max_tokens
+                est_cost_caption = f"Est. max tokens: {est_tokens}"
+                # First try Agent, but guard and FALLBACK to direct genai responses if Agent fails or returns empty.
                 agent = maybe_create_agent(model_used)
                 debug_info = {"agent_attempted": False, "agent_ok": False, "agent_error": None, "agent_response_has_text": False}
                 if agent:
                         with st.spinner("Generating description via Agent..."):
                             if not processed:
                                 raise RuntimeError("Processed file missing for agent generation")
+                            # call agent.run inside try/except to catch library IndexError
                             agent_response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
+                            # Try to extract text from common attributes; be defensive
                             agent_text = getattr(agent_response, "content", None) or getattr(agent_response, "outputText", None) or None
                             if not agent_text:
+                                # try dict-like access
                                 try:
                                     if isinstance(agent_response, dict):
+                                        # check common keys
+                                        for k in ("content", "outputText", "text"):
                                             if k in agent_response and agent_response[k]:
                                                 agent_text = agent_response[k]
                                                 break
                                 debug_info["agent_ok"] = True
                                 debug_info["agent_response_has_text"] = True
                             else:
+                                # Agent returned but had no usable text; set a marker to fallback
                                 debug_info["agent_ok"] = False
                     except Exception as ae:
+                        # Save agent error and continue to fallback path instead of crashing
                         debug_info["agent_error"] = f"{ae}"
+                        # include traceback for debugging
                         debug_info["agent_traceback"] = traceback.format_exc()
+                        # Do not re-raise; we'll fallback to genai.responses.generate below
                 if not out:
+                    # Fallback to direct Responses API flow
                     try:
+                        if not HAS_GENAI or genai is None:
+                            raise RuntimeError("Responses API not available; install google.generativeai SDK.")
+                        genai.configure(api_key=key_to_use)
+                        fname = file_name_or_id(processed)
+                        if not fname:
+                            raise RuntimeError("Uploaded file missing name/id")
+                        system_msg = {"role": "system", "content": prompt_text}
+                        user_msg = {"role": "user", "content": "Please summarize the attached video."}
+                        try:
+                            response = genai.responses.generate(
+                                model=model_used,
+                                messages=[system_msg, user_msg],
+                                files=[{"name": fname}],
+                                safety_settings=safety_settings,
+                                max_output_tokens=max_tokens,
+                            )
+                        except TypeError:
+                            response = genai.responses.generate(
+                                model=model_used,
+                                input=[{"text": prompt_text, "files": [{"name": fname}]}],
+                                safety_settings=safety_settings,
+                                max_output_tokens=max_tokens,
+                            )
+                        # Defensive normalization of response -> outputs list
+                        outputs = []
+                        if response is None:
+                            outputs = []
+                        elif isinstance(response, dict):
+                            for key in ("output", "candidates", "items", "responses"):
+                                val = response.get(key)
+                                if isinstance(val, list) and val:
+                                    outputs = val
+                                    break
+                            if not outputs:
+                                for v in response.values():
+                                    if isinstance(v, list) and v:
+                                        outputs = v
+                                        break
+                        else:
+                            for attr in ("output", "candidates", "items", "responses"):
+                                val = getattr(response, attr, None)
+                                if isinstance(val, list) and val:
+                                    outputs = val
+                                    break
+                        # ensure list
+                        if not isinstance(outputs, list):
+                            outputs = list(outputs) if outputs else []
+                        # extract text pieces safely
+                        text_pieces = []
+                        for item in outputs:
+                            if item is None:
+                                continue
+                            # item may be dict or object; attempt to find text-rich fields
+                            cand_contents = None
+                            if isinstance(item, dict):
+                                for k in ("content", "text", "message", "output_text", "output"):
+                                    if k in item and item[k]:
+                                        cand_contents = item[k]
+                                        break
+                            else:
+                                for k in ("content", "text", "message", "output", "output_text"):
+                                    cand_contents = getattr(item, k, None)
+                                    if cand_contents:
+                                        break
+                            if isinstance(cand_contents, str):
+                                if cand_contents.strip():
+                                    text_pieces.append(cand_contents.strip())
+                                continue
+                            if isinstance(cand_contents, (list, tuple)):
+                                for c in cand_contents:
+                                    if c is None:
+                                        continue
+                                    if isinstance(c, str):
+                                        if c.strip():
+                                            text_pieces.append(c.strip())
+                                        continue
+                                    if isinstance(c, dict):
+                                        t = c.get("text") or c.get("content")
+                                    else:
+                                        t = getattr(c, "text", None) or getattr(c, "content", None)
+                                    if t:
+                                        text_pieces.append(str(t).strip())
+                                continue
+                            direct = None
+                            if isinstance(item, dict):
+                                direct = item.get("text") or item.get("output_text") or item.get("message")
+                            else:
+                                direct = getattr(item, "text", None) or getattr(item, "output_text", None) or getattr(item, "message", None)
+                            if direct:
+                                text_pieces.append(str(direct).strip())
+                        if not text_pieces:
+                            top_text = None
+                            if isinstance(response, dict):
+                                top_text = response.get("text") or response.get("message")
+                            else:
+                                top_text = getattr(response, "text", None) or getattr(response, "message", None)
+                            if top_text:
+                                text_pieces.append(str(top_text).strip())
+                        # dedupe preserving order
+                        seen = set()
+                        filtered = []
+                        for t in text_pieces:
+                            if not isinstance(t, str):
+                                continue
+                            if t and t not in seen:
+                                filtered.append(t)
+                                seen.add(t)
+                        out = "\n\n".join(filtered)
                     except Exception as e:
+                        # Capture clear error to UI and include debug_info
                         tb = traceback.format_exc()
+                        st.session_state["last_error"] = f"Responses API error: {e}\n\nDebug: {debug_info}\n\nTraceback:\n{tb}"
                         st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
                         out = ""
+                # post-process output
                 if out:
                     out = remove_prompt_echo(prompt_text, out)
                     p = prompt_text
             except Exception as e:
                 tb = traceback.format_exc()
+                st.session_state["last_error"] = f"{e}\n\nDebug: {locals().get('debug_info', debug_info)}\n\nTraceback:\n{tb}"
                 st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
             finally:
                 st.session_state["busy"] = False