Video-Analysis-Tool

Sleeping

App Files Files Community

CB commited on Sep 13, 2025

Commit

ff726a7

verified ·

1 Parent(s): 1866c11

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +296 -496

streamlit_app.py CHANGED Viewed

@@ -3,17 +3,9 @@ import os
 import time
 import string
 import hashlib
-import traceback
-import inspect
-import json
-import re
 from glob import glob
 from pathlib import Path
 from difflib import SequenceMatcher
-from typing import Optional, Tuple, Any
-import requests
-from bs4 import BeautifulSoup
 import yt_dlp
 import ffmpeg
@@ -22,23 +14,22 @@ from dotenv import load_dotenv
 load_dotenv()
-# Try import google.generativeai (optional)
-HAS_GENAI = False
-genai = None
-upload_file = None
-get_file = None
 try:
-    import google.generativeai as genai  # type: ignore
-    try:
-        from google.generativeai import upload_file, get_file  # type: ignore
-    except Exception:
-        upload_file = None
-        get_file = None
     HAS_GENAI = True
 except Exception:
     genai = None
-    upload_file = None
-    get_file = None
     HAS_GENAI = False
 st.set_page_config(page_title="Generate the story of videos", layout="wide")
@@ -58,15 +49,12 @@ st.session_state.setdefault("file_hash", None)
 st.session_state.setdefault("fast_mode", False)
 st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
 st.session_state.setdefault("last_model", "")
 st.session_state.setdefault("last_url_value", "")
-# allow disabling SSL verify for HTTP fallback (not recommended)
-st.session_state.setdefault("http_skip_ssl_verify", False)
-HEADERS = {"User-Agent": "Mozilla/5.0 (compatible)"}
-# ----------------- Utilities -----------------
-def sanitize_filename(path_str: str) -> str:
-    return Path(path_str).name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
 def file_sha256(path: str, block_size: int = 65536) -> str:
     h = hashlib.sha256()
@@ -86,141 +74,14 @@ def convert_video_to_mp4(video_path: str) -> str:
         pass
     return target_path
-def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast") -> str:
     try:
         ffmpeg.input(input_path).output(target_path, vcodec="libx264", crf=crf, preset=preset).run(overwrite_output=True, quiet=True)
         return target_path
     except Exception:
         return input_path
-# ----------------- Download / Extract -----------------
-def expand_url(short_url: str, timeout: int = 10) -> Tuple[Optional[str], Optional[str]]:
-    try:
-        r = requests.get(short_url, allow_redirects=True, timeout=timeout, headers=HEADERS)
-        r.raise_for_status()
-        return r.url, r.text
-    except Exception as e:
-        return None, f"error: {e}"
-def extract_video_from_html(html: str, base_url: Optional[str] = None) -> Optional[str]:
-    soup = BeautifulSoup(html, "html.parser")
-    og = soup.find("meta", property="og:video")
-    if og and og.get("content"):
-        return og.get("content")
-    vtag = soup.find("video")
-    if vtag:
-        src = vtag.get("src")
-        if src:
-            return src
-        source = vtag.find("source")
-        if source and source.get("src"):
-            return source.get("src")
-    for script in soup.find_all("script", type="application/ld+json"):
-        try:
-            data = json.loads(script.string or "{}")
-            if isinstance(data, dict):
-                video = data.get("video") or data.get("videoObject") or data.get("mainEntity")
-                if isinstance(video, dict):
-                    for k in ("contentUrl", "url"):
-                        if video.get(k):
-                            return video.get(k)
-                if data.get("contentUrl"):
-                    return data.get("contentUrl")
-        except Exception:
-            continue
-    for mname in ("twitter:player:stream", "twitter:player"):
-        m = soup.find("meta", attrs={"name": mname})
-        if m and m.get("content"):
-            return m.get("content")
-    for a in soup.find_all("a", href=True):
-        href = a["href"]
-        if any(domain in href for domain in ("youtube.com", "youtu.be", "vimeo.com")):
-            return href
-    return None
-def extract_video_from_twitter_html(html: str) -> Optional[str]:
-    soup = BeautifulSoup(html, "html.parser")
-    og_video = soup.find("meta", property="og:video")
-    if og_video and og_video.get("content"):
-        return og_video["content"]
-    scripts = soup.find_all("script")
-    for s in scripts:
-        txt = s.string
-        if not txt:
-            continue
-        if any(k in txt for k in ("video_info", "variants", "playbackUrl", "media")):
-            m = re.search(r"(?s)(\{.+\})", txt)
-            if not m:
-                continue
-            try:
-                blob = json.loads(m.group(1))
-            except Exception:
-                continue
-            def find_media_urls(obj):
-                if isinstance(obj, dict):
-                    for k, v in obj.items():
-                        if isinstance(v, str) and v.startswith("https://") and v.endswith(".mp4"):
-                            yield v
-                        else:
-                            yield from find_media_urls(v)
-                elif isinstance(obj, list):
-                    for it in obj:
-                        yield from find_media_urls(it)
-            for url in find_media_urls(blob):
-                return url
-            def find_variants(obj):
-                if isinstance(obj, dict):
-                    for k, v in obj.items():
-                        if k == "variants" and isinstance(v, list):
-                            for vi in v:
-                                if isinstance(vi, dict):
-                                    url = vi.get("url") or vi.get("playbackUrl")
-                                    ct = vi.get("content_type", "") or vi.get("contentType", "")
-                                    if url and url.startswith("http") and ("mp4" in url or "video" in ct or "video" in url):
-                                        yield url
-                        else:
-                            yield from find_variants(v)
-                elif isinstance(obj, list):
-                    for it in obj:
-                        yield from find_variants(it)
-            for url in find_variants(blob):
-                return url
-    return None
-def extract_direct_twitter_video(url: str) -> Tuple[Optional[str], str]:
-    final, html_or_err = expand_url(url)
-    if final is None:
-        return None, html_or_err or "expand failed"
-    variants = [
-        final,
-        final.replace("://twitter.com/", "://mobile.twitter.com/"),
-        final.replace("://twitter.com/", "://x.com/"),
-        final + "?s=20",
-        final + "?ref_src=twsrc%5Etfw",
-    ]
-    for u in variants:
-        try:
-            r = requests.get(u, allow_redirects=True, headers=HEADERS, timeout=10)
-            r.raise_for_status()
-            direct = extract_video_from_twitter_html(r.text)
-            if direct:
-                return direct, u
-        except Exception:
-            continue
-    try:
-        oembed = requests.get("https://publish.twitter.com/oembed?url=" + final, headers=HEADERS, timeout=6)
-        if oembed.ok:
-            j = oembed.json()
-            html = j.get("html", "")
-            soup = BeautifulSoup(html, "html.parser")
-            video = soup.find("video")
-            if video and video.get("src"):
-                return video["src"], final
-    except Exception:
-        pass
-    return None, "not found"
-def download_video_ytdlp(url: str, save_dir: str, video_password: Optional[str] = None) -> str:
     if not url:
         raise ValueError("No URL provided")
     outtmpl = str(Path(save_dir) / "%(id)s.%(ext)s")
@@ -239,29 +100,100 @@ def download_video_ytdlp(url: str, save_dir: str, video_password: Optional[str]
         raise FileNotFoundError("Downloaded video not found")
     return convert_video_to_mp4(matches[0])
-# ----------------- Generative AI helpers -----------------
-def get_effective_api_key() -> Optional[str]:
     return st.session_state.get("api_key") or os.getenv("GOOGLE_API_KEY")
-def maybe_configure_genai(key: str) -> bool:
-    if not key or not HAS_GENAI:
         return False
     try:
         genai.configure(api_key=key)
-        return True
     except Exception:
-        return False
-def upload_video_sdk(filepath: str) -> Any:
     key = get_effective_api_key()
     if not key:
         raise RuntimeError("No API key provided")
     if not HAS_GENAI or upload_file is None:
-        raise RuntimeError("google.generativeai SDK upload not available")
     genai.configure(api_key=key)
     return upload_file(filepath)
-def wait_for_processed(file_obj: Any, timeout: int = 180) -> Any:
     if not HAS_GENAI or get_file is None:
         return file_obj
     start = time.time()
@@ -270,10 +202,7 @@ def wait_for_processed(file_obj: Any, timeout: int = 180) -> Any:
         return file_obj
     backoff = 1.0
     while True:
-        try:
-            obj = get_file(name)
-        except Exception:
-            return file_obj
         state = getattr(obj, "state", None)
         if not state or getattr(state, "name", None) != "PROCESSING":
             return obj
@@ -282,28 +211,13 @@ def wait_for_processed(file_obj: Any, timeout: int = 180) -> Any:
         time.sleep(backoff)
         backoff = min(backoff * 2, 8.0)
-def file_name_or_id(file_obj: Any) -> Optional[str]:
-    if file_obj is None:
-        return None
-    if isinstance(file_obj, dict):
-        return file_obj.get("name") or file_obj.get("id")
-    for attr in ("name", "id", "fileId", "file_id"):
-        if hasattr(file_obj, attr):
-            val = getattr(file_obj, attr)
-            if val:
-                return val
-    return str(file_obj)
-def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_threshold: float = 0.68) -> str:
     if not prompt or not text:
         return text
     a = " ".join(prompt.strip().lower().split())
     b_full = text.strip()
     b = " ".join(b_full[:check_len].lower().split())
-    try:
-        ratio = SequenceMatcher(None, a, b).ratio()
-    except Exception:
-        ratio = 0.0
     if ratio >= ratio_threshold:
         cut = min(len(b_full), max(int(len(prompt) * 0.9), len(a)))
         new_text = b_full[cut:].lstrip(" \n:-")
@@ -316,266 +230,46 @@ def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_thres
             return b_full[len(ph):].lstrip(" \n:-")
     return text
-def generative_model_call_flexible(model_name: str, messages: list, files: Optional[list] = None, max_output_tokens: int = 1024) -> Any:
-    """
-    Robustly call GenerativeModel with different signatures.
-    This function anticipates generate_content taking different arg shapes.
-    """
-    if not HAS_GENAI or genai is None:
-        raise RuntimeError("genai not available")
-    GM = getattr(genai, "GenerativeModel", None)
-    if GM is None:
-        raise RuntimeError("GenerativeModel not available")
-    # Construct instance robustly
-    gm = None
-    try:
-        sig = inspect.signature(GM)
-        params = sig.parameters
-        if "model" in params:
-            gm = GM(model=model_name)
-        elif "model_name" in params:
-            gm = GM(model_name=model_name)
-        else:
-            gm = GM()
-            try:
-                if hasattr(gm, "model"):
-                    setattr(gm, "model", model_name)
-            except Exception:
-                pass
-    except Exception:
-        try:
-            gm = GM(model=model_name)
-        except Exception:
-            try:
-                gm = GM(model_name=model_name)
-            except Exception:
-                gm = GM()
-    # Try generate_content with multiple call shapes:
-    #  - generate_content(messages=..., files=..., max_output_tokens=...)
-    #  - generate_content(messages, files, max_output_tokens)
-    #  - generate_content(messages)
-    if hasattr(gm, "generate_content"):
-        # try keyword style first
-        try:
-            return gm.generate_content(messages=messages, files=files, max_output_tokens=max_output_tokens)
-        except TypeError:
-            # try positional: some versions expect (messages, max_output_tokens)
-            try:
-                return gm.generate_content(messages, max_output_tokens)
-            except TypeError:
-                # maybe (prompt_str,) shape
-                try:
-                    prompt = messages[-1].get("content") if isinstance(messages, (list, tuple)) and messages else str(messages)
-                    return gm.generate_content(prompt)
-                except Exception as e:
-                    raise RuntimeError(f"GenerativeModel.generate_content unusable: {e}")
-        except Exception as e:
-            raise RuntimeError(f"generate_content failed: {e}")
-    # Try generate with files kw / positional
-    if hasattr(gm, "generate"):
-        try:
-            return gm.generate(messages=messages, files=files, max_output_tokens=max_output_tokens)
-        except TypeError:
-            try:
-                return gm.generate(messages, max_output_tokens)
-            except Exception as e:
-                raise RuntimeError(f"GenerativeModel.generate unusable: {e}")
-    raise RuntimeError("No usable generate method on GenerativeModel instance")
-def responses_http_call(api_key: str, model: str, messages: list, file_name: Optional[str] = None, max_output_tokens: int = 1024, safety_settings: Optional[list] = None) -> dict:
-    """
-    HTTP fallback to Responses v1 endpoint. Attempts retries and allows optional SSL skip.
-    """
-    url = f"https://api.generativeai.googleapis.com/v1/models/{model}:generateMessage"
-    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
-    payload = {
-        "messages": [{"role": m.get("role", "user"), "content": [{"type": "text", "text": m.get("content", "")}]} for m in messages],
-        "maxOutputTokens": max_output_tokens,
-    }
-    if safety_settings:
-        payload["safetySettings"] = safety_settings
-    if file_name:
-        payload["files"] = [{"name": file_name}]
-    # allow skipping SSL verify in special environments (not recommended)
-    verify = not bool(st.session_state.get("http_skip_ssl_verify", False))
-    last_err = None
-    for attempt in range(1, 4):
-        try:
-            r = requests.post(url, json=payload, headers=headers, timeout=30, verify=verify)
-            r.raise_for_status()
-            return r.json()
-        except requests.exceptions.SSLError as e:
-            last_err = e
-            # If SSL hostname mismatch or cert issues, surface helpful message once
-            raise RuntimeError(f"SSL error when calling Responses HTTP endpoint: {e}. If you are behind a proxy intercepting TLS, set 'Skip HTTP SSL verify' in Settings (not recommended) or fix your CA bundle.")
-        except Exception as e:
-            last_err = e
-            time.sleep(0.8 * attempt)
-            continue
-    raise RuntimeError(f"HTTP responses fallback failed after retries: {last_err}")
-def normalize_response_to_text(response: Any) -> str:
-    """Extract text from SDK or HTTP responses into a single string."""
-    if not response:
-        return ""
-    # dict-like
-    if isinstance(response, dict):
-        for key in ("output", "candidates", "items", "responses"):
-            val = response.get(key)
-            if isinstance(val, (list, tuple)) and val:
-                pieces = []
-                for el in val:
-                    if isinstance(el, dict):
-                        c = el.get("content") or el.get("message") or el.get("text")
-                        if isinstance(c, list):
-                            for part in c:
-                                if isinstance(part, dict):
-                                    t = part.get("text") or part.get("content")
-                                    if t:
-                                        pieces.append(t)
-                                elif isinstance(part, str):
-                                    pieces.append(part)
-                        elif isinstance(c, str):
-                            pieces.append(c)
-                    elif isinstance(el, str):
-                        pieces.append(el)
-                if pieces:
-                    return "\n\n".join(pieces)
-        # message path
-        msg = response.get("message") or response.get("response") or response.get("output")
-        if isinstance(msg, dict):
-            c = msg.get("content")
-            if isinstance(c, list):
-                texts = []
-                for part in c:
-                    if isinstance(part, dict) and "text" in part:
-                        texts.append(part.get("text"))
-                    elif isinstance(part, str):
-                        texts.append(part)
-                return "\n\n".join([t for t in texts if t])
-        # fallback join string values
-        flat = []
-        for v in response.values():
-            if isinstance(v, str) and v.strip():
-                flat.append(v.strip())
-        return "\n\n".join(flat)
-    # object-like SDK responses
-    for attr in ("output", "candidates", "items", "responses", "message"):
-        val = getattr(response, attr, None)
-        if isinstance(val, (list, tuple)) and val:
-            pieces = []
-            for el in val:
-                if hasattr(el, "text"):
-                    pieces.append(getattr(el, "text"))
-                elif isinstance(el, dict):
-                    t = el.get("text") or el.get("content")
-                    if t:
-                        pieces.append(t)
-                else:
-                    pieces.append(str(el))
-            return "\n\n".join([p for p in pieces if p])
-    text = getattr(response, "text", None) or getattr(response, "message", None)
-    return text or ""
-# ----------------- UI -----------------
-current_url = st.session_state.get("url", "")
-if current_url != st.session_state.get("last_url_value"):
-    st.session_state.update({"videos": "", "uploaded_file": None, "processed_file": None, "last_loaded_path": "", "analysis_out": "", "last_error": "", "file_hash": None})
-    st.session_state["last_url_value"] = current_url
-st.sidebar.header("Video Input")
-st.sidebar.text_input("Video URL", key="url", placeholder="https://")
-settings_exp = st.sidebar.expander("Settings", expanded=False)
-settings_exp.text_input("Gemini Model (short name)", "gemini-2.5-flash-lite", key="model_input")
-settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
-default_prompt = (
-    "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. Keep language professional. Include a list of observations for notable events."
-)
-settings_exp.text_area("Enter analysis", value=default_prompt, height=140, key="analysis_prompt")
-settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
-settings_exp.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
-settings_exp.checkbox("Skip HTTP SSL verify (only if you trust the network)", key="http_skip_ssl_verify")
-key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
-settings_exp.caption(f"Using API key from: **{key_source}**")
-if not get_effective_api_key():
-    settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
-safety_settings = [
-    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "OFF"},
-    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "OFF"},
-    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "OFF"},
-    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
-]
 col1, col2 = st.columns([1, 3])
 with col1:
     generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
 with col2:
     pass
-# Load Video
 if st.sidebar.button("Load Video", use_container_width=True):
     try:
         vpw = st.session_state.get("video-password", "")
-        url_val = st.session_state.get("url", "").strip()
-        final_url = url_val
-        html_text = None
-        extracted = None
-        if url_val:
-            if "t.co/" in url_val or ("twitter.com" in url_val or "x.com" in url_val):
-                extracted, src_info = extract_direct_twitter_video(url_val)
-                if extracted:
-                    final_url = extracted
-                else:
-                    expanded, html_or_err = expand_url(url_val)
-                    if expanded:
-                        final_url = expanded
-                        html_text = html_or_err
-            else:
-                expanded, html_or_err = expand_url(url_val)
-                if expanded:
-                    final_url = expanded
-                    html_text = html_or_err
-        if html_text and not extracted:
-            extracted = extract_video_from_html(html_text, base_url=final_url)
-        target_url_for_ytdlp = extracted or final_url
-        path = download_video_ytdlp(target_url_for_ytdlp, str(DATA_DIR), vpw)
         st.session_state["videos"] = path
         st.session_state["last_loaded_path"] = path
-        st.session_state["file_hash"] = file_sha256(path) if os.path.exists(path) else None
-        st.session_state["uploaded_file"] = None
-        st.session_state["processed_file"] = None
     except Exception as e:
         st.sidebar.error(f"Failed to load video: {e}")
-# Sidebar preview
 if st.session_state["videos"]:
     try:
         st.sidebar.video(st.session_state["videos"], loop=st.session_state.get("loop_video", False))
     except Exception:
         st.sidebar.write("Couldn't preview video")
     with st.sidebar.expander("Options", expanded=False):
         loop_checkbox = st.checkbox("Enable Loop", value=st.session_state.get("loop_video", False))
         st.session_state["loop_video"] = loop_checkbox
         if st.button("Clear Video(s)"):
-            for f in glob(str(DATA_DIR / "*")):
-                try:
-                    os.remove(f)
-                except Exception:
-                    pass
-            st.session_state.update({"videos": "", "uploaded_file": None, "processed_file": None, "last_loaded_path": "", "analysis_out": "", "last_error": "", "file_hash": None})
         try:
             with open(st.session_state["videos"], "rb") as vf:
                 st.download_button("Download Video", data=vf, file_name=sanitize_filename(st.session_state["videos"]), mime="video/mp4", use_container_width=True)
         except Exception:
             st.sidebar.error("Failed to prepare download")
     st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
     try:
         file_size_mb = os.path.getsize(st.session_state["videos"]) / (1024 * 1024)
@@ -585,7 +279,7 @@ if st.session_state["videos"]:
     except Exception:
         pass
-# Generation flow
 if generate_now and not st.session_state.get("busy"):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
@@ -594,28 +288,49 @@ if generate_now and not st.session_state.get("busy"):
         if not key_to_use:
             st.error("Google API key not set.")
         else:
-            st.session_state["busy"] = True
             try:
-                maybe_configure_genai(key_to_use)
                 model_id = (st.session_state.get("model_input") or "gemini-2.5-flash-lite").strip()
                 current_path = st.session_state.get("videos")
-                current_hash = file_sha256(current_path) if current_path and os.path.exists(current_path) else None
                 reupload_needed = True
-                processed = st.session_state.get("processed_file")
                 if processed and st.session_state.get("last_loaded_path") == current_path and st.session_state.get("file_hash") == current_hash:
                     reupload_needed = False
-                upload_path = current_path
                 if reupload_needed:
                     fast_mode = st.session_state.get("fast_mode", False)
                     try:
-                        file_size_mb = os.path.getsize(current_path) / (1024 * 1024)
                     except Exception:
                         file_size_mb = 0
                     if not fast_mode and file_size_mb > 50:
-                        compressed_path = str(Path(current_path).with_name(Path(current_path).stem + "_compressed.mp4"))
-                        upload_path = compress_video(current_path, compressed_path, crf=28, preset="fast")
                     with st.spinner("Uploading video..."):
                         uploaded = upload_video_sdk(upload_path)
                         processed = wait_for_processed(uploaded, timeout=180)
@@ -624,91 +339,176 @@ if generate_now and not st.session_state.get("busy"):
                         st.session_state["last_loaded_path"] = current_path
                         st.session_state["file_hash"] = current_hash
-                prompt_text = (st.session_state.get("analysis_prompt", "") or default_prompt).strip()
-                system_msg = {"role": "system", "content": prompt_text}
-                user_msg = {"role": "user", "content": "Please summarize the attached video."}
-                fname = file_name_or_id(processed)
-                response = None
-                diagnostics = {"attempts": []}
-                # 1) genai.responses.generate (supports files)
-                if response is None and HAS_GENAI and genai is not None and hasattr(genai, "responses") and hasattr(genai.responses, "generate"):
                     try:
-                        diagnostics["attempts"].append("responses.generate")
                         response = genai.responses.generate(
-                            model=model_id,
                             messages=[system_msg, user_msg],
-                            files=[{"name": fname}] if fname else None,
                             safety_settings=safety_settings,
-                            max_output_tokens=(256 if st.session_state.get("fast_mode") else 1024),
                         )
-                    except Exception as e:
-                        diagnostics["responses.generate_error"] = str(e)
-                        response = None
-                # 2) GenerativeModel flexible call
-                if response is None and HAS_GENAI and genai is not None and hasattr(genai, "GenerativeModel"):
-                    try:
-                        diagnostics["attempts"].append("GenerativeModel")
-                        response = generative_model_call_flexible(model_id, [system_msg, user_msg], files=[{"name": fname}] if fname else None, max_output_tokens=(256 if st.session_state.get("fast_mode") else 1024))
-                    except Exception as e:
-                        diagnostics["GenerativeModel_error"] = str(e)
-                        response = None
-                # 3) top-level legacy helpers
-                if response is None and HAS_GENAI and genai is not None:
-                    try:
-                        if hasattr(genai, "generate"):
-                            diagnostics["attempts"].append("top.generate")
-                            # don't assume exact param shapes; try best-effort
-                            try:
-                                response = genai.generate(model=model_id, input=[{"text": prompt_text, "files": [{"name": fname}]}], max_output_tokens=(256 if st.session_state.get("fast_mode") else 1024))
-                            except TypeError:
-                                response = genai.generate(model=model_id, input=prompt_text)
-                        elif hasattr(genai, "create"):
-                            diagnostics["attempts"].append("top.create")
-                            try:
-                                response = genai.create(model=model_id, input=[{"text": prompt_text, "files": [{"name": fname}]}], max_output_tokens=(256 if st.session_state.get("fast_mode") else 1024))
-                            except TypeError:
-                                response = genai.create(model=model_id, input=prompt_text)
-                    except Exception as e:
-                        diagnostics["top_level_error"] = str(e)
-                        response = None
-                # 4) HTTP fallback
-                if response is None:
-                    try:
-                        diagnostics["attempts"].append("http_fallback")
-                        response = responses_http_call(key_to_use, model_id, [system_msg, user_msg], file_name=fname, max_output_tokens=(256 if st.session_state.get("fast_mode") else 1024), safety_settings=safety_settings)
-                    except Exception as e:
-                        diagnostics["http_fallback_error"] = str(e)
-                        response = None
-                if response is None:
-                    st.session_state["last_error"] = f"No supported generation method found. Diagnostics: {diagnostics}"
-                    st.error("Unable to call a supported Responses method in this runtime. See Last Error.")
-                    out = ""
-                else:
-                    out = normalize_response_to_text(response)
-                    out = remove_prompt_echo(prompt_text, out).strip()
                     placeholders = ["enter analysis", "enter your analysis", "enter analysis here", "please enter analysis"]
                     low = out.strip().lower()
                     for ph in placeholders:
                         if low.startswith(ph):
                             out = out.strip()[len(ph):].lstrip(" \n:-")
                             break
                 st.session_state["analysis_out"] = out
-                st.session_state["last_error"] = "" if out else st.session_state.get("last_error", "")
                 st.subheader("Analysis Result")
                 st.markdown(out if out else "No analysis returned.")
-                st.caption(f"Est. max tokens: {256 if st.session_state.get('fast_mode') else 1024}")
             except Exception as e:
-                tb = traceback.format_exc()
-                st.session_state["last_error"] = f"{e}\n\nTraceback:\n{tb}"
                 st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
             finally:
                 st.session_state["busy"] = False
@@ -721,4 +521,4 @@ if st.session_state.get("analysis_out"):
 if st.session_state.get("last_error"):
     with st.expander("Last Error", expanded=False):
-        st.write(st.session_state.get("last_error"))

 import time
 import string
 import hashlib
 from glob import glob
 from pathlib import Path
 from difflib import SequenceMatcher
 import yt_dlp
 import ffmpeg
 load_dotenv()
 try:
+    from phi.agent import Agent
+    from phi.model.google import Gemini
+    from phi.tools.duckduckgo import DuckDuckGo
+    HAS_PHI = True
+except Exception:
+    Agent = Gemini = DuckDuckGo = None
+    HAS_PHI = False
+try:
+    import google.generativeai as genai
+    from google.generativeai import upload_file, get_file  # type: ignore
     HAS_GENAI = True
 except Exception:
     genai = None
+    upload_file = get_file = None
     HAS_GENAI = False
 st.set_page_config(page_title="Generate the story of videos", layout="wide")
 st.session_state.setdefault("fast_mode", False)
 st.session_state.setdefault("api_key", os.getenv("GOOGLE_API_KEY", ""))
 st.session_state.setdefault("last_model", "")
+st.session_state.setdefault("upload_progress", {"uploaded": 0, "total": 0})
 st.session_state.setdefault("last_url_value", "")
+def sanitize_filename(path_str: str):
+    name = Path(path_str).name
+    return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
 def file_sha256(path: str, block_size: int = 65536) -> str:
     h = hashlib.sha256()
         pass
     return target_path
+def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
     try:
         ffmpeg.input(input_path).output(target_path, vcodec="libx264", crf=crf, preset=preset).run(overwrite_output=True, quiet=True)
         return target_path
     except Exception:
         return input_path
+def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
     if not url:
         raise ValueError("No URL provided")
     outtmpl = str(Path(save_dir) / "%(id)s.%(ext)s")
         raise FileNotFoundError("Downloaded video not found")
     return convert_video_to_mp4(matches[0])
+def file_name_or_id(file_obj):
+    if file_obj is None:
+        return None
+    if isinstance(file_obj, dict):
+        return file_obj.get("name") or file_obj.get("id")
+    return getattr(file_obj, "name", None) or getattr(file_obj, "id", None) or getattr(file_obj, "fileId", None)
+def get_effective_api_key():
     return st.session_state.get("api_key") or os.getenv("GOOGLE_API_KEY")
+def configure_genai_if_needed():
+    key = get_effective_api_key()
+    if not key:
         return False
     try:
         genai.configure(api_key=key)
     except Exception:
+        pass
+    return True
+_agent = None
+def maybe_create_agent(model_id: str):
+    global _agent
+    key = get_effective_api_key()
+    if not (HAS_PHI and HAS_GENAI and key):
+        _agent = None
+        return None
+    if _agent and st.session_state.get("last_model") == model_id:
+        return _agent
+    try:
+        genai.configure(api_key=key)
+        _agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
+        st.session_state["last_model"] = model_id
+    except Exception:
+        _agent = None
+    return _agent
+def clear_all_video_state():
+    st.session_state.pop("uploaded_file", None)
+    st.session_state.pop("processed_file", None)
+    st.session_state["videos"] = ""
+    st.session_state["last_loaded_path"] = ""
+    st.session_state["analysis_out"] = ""
+    st.session_state["last_error"] = ""
+    st.session_state["file_hash"] = None
+    for f in glob(str(DATA_DIR / "*")):
+        try:
+            os.remove(f)
+        except Exception:
+            pass
+# track url changes
+current_url = st.session_state.get("url", "")
+if current_url != st.session_state.get("last_url_value"):
+    clear_all_video_state()
+    st.session_state["last_url_value"] = current_url
+st.sidebar.header("Video Input")
+st.sidebar.text_input("Video URL", key="url", placeholder="https://")
+settings_exp = st.sidebar.expander("Settings", expanded=False)
+model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.5-flash-lite", key="model_input")
+settings_exp.text_input("Google API Key", key="api_key", value=os.getenv("GOOGLE_API_KEY", ""), type="password")
+default_prompt = (
+    "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. Keep language professional. Include a list of observations for notable events."
+)
+analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
+settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
+settings_exp.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
+# Show which key is active
+key_source = "session" if st.session_state.get("api_key") else ".env" if os.getenv("GOOGLE_API_KEY") else "none"
+settings_exp.caption(f"Using API key from: **{key_source}**")
+if not get_effective_api_key():
+    settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
+safety_settings = [
+    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "OFF"},
+    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "OFF"},
+    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "OFF"},
+    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
+]
+def upload_video_sdk(filepath: str):
     key = get_effective_api_key()
     if not key:
         raise RuntimeError("No API key provided")
     if not HAS_GENAI or upload_file is None:
+        raise RuntimeError("google.generativeai SDK not available; cannot upload")
     genai.configure(api_key=key)
     return upload_file(filepath)
+def wait_for_processed(file_obj, timeout=180):
     if not HAS_GENAI or get_file is None:
         return file_obj
     start = time.time()
         return file_obj
     backoff = 1.0
     while True:
+        obj = get_file(name)
         state = getattr(obj, "state", None)
         if not state or getattr(state, "name", None) != "PROCESSING":
             return obj
         time.sleep(backoff)
         backoff = min(backoff * 2, 8.0)
+def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_threshold: float = 0.68):
     if not prompt or not text:
         return text
     a = " ".join(prompt.strip().lower().split())
     b_full = text.strip()
     b = " ".join(b_full[:check_len].lower().split())
+    ratio = SequenceMatcher(None, a, b).ratio()
     if ratio >= ratio_threshold:
         cut = min(len(b_full), max(int(len(prompt) * 0.9), len(a)))
         new_text = b_full[cut:].lstrip(" \n:-")
             return b_full[len(ph):].lstrip(" \n:-")
     return text
 col1, col2 = st.columns([1, 3])
 with col1:
     generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
 with col2:
     pass
 if st.sidebar.button("Load Video", use_container_width=True):
     try:
         vpw = st.session_state.get("video-password", "")
+        path = download_video_ytdlp(st.session_state.get("url", ""), str(DATA_DIR), vpw)
         st.session_state["videos"] = path
         st.session_state["last_loaded_path"] = path
+        st.session_state.pop("uploaded_file", None)
+        st.session_state.pop("processed_file", None)
+        try:
+            st.session_state["file_hash"] = file_sha256(path)
+        except Exception:
+            st.session_state["file_hash"] = None
     except Exception as e:
         st.sidebar.error(f"Failed to load video: {e}")
 if st.session_state["videos"]:
     try:
         st.sidebar.video(st.session_state["videos"], loop=st.session_state.get("loop_video", False))
     except Exception:
         st.sidebar.write("Couldn't preview video")
     with st.sidebar.expander("Options", expanded=False):
         loop_checkbox = st.checkbox("Enable Loop", value=st.session_state.get("loop_video", False))
         st.session_state["loop_video"] = loop_checkbox
         if st.button("Clear Video(s)"):
+            clear_all_video_state()
         try:
             with open(st.session_state["videos"], "rb") as vf:
                 st.download_button("Download Video", data=vf, file_name=sanitize_filename(st.session_state["videos"]), mime="video/mp4", use_container_width=True)
         except Exception:
             st.sidebar.error("Failed to prepare download")
     st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
     try:
         file_size_mb = os.path.getsize(st.session_state["videos"]) / (1024 * 1024)
     except Exception:
         pass
+# --- Generation flow ---
 if generate_now and not st.session_state.get("busy"):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
         if not key_to_use:
             st.error("Google API key not set.")
         else:
             try:
+                st.session_state["busy"] = True
+                try:
+                    if HAS_GENAI and genai is not None:
+                        genai.configure(api_key=key_to_use)
+                except Exception:
+                    pass
                 model_id = (st.session_state.get("model_input") or "gemini-2.5-flash-lite").strip()
+                if st.session_state.get("last_model") != model_id:
+                    st.session_state["last_model"] = ""
+                maybe_create_agent(model_id)
+                processed = st.session_state.get("processed_file")
                 current_path = st.session_state.get("videos")
+                try:
+                    current_hash = file_sha256(current_path) if current_path and os.path.exists(current_path) else None
+                except Exception:
+                    current_hash = None
                 reupload_needed = True
                 if processed and st.session_state.get("last_loaded_path") == current_path and st.session_state.get("file_hash") == current_hash:
                     reupload_needed = False
                 if reupload_needed:
+                    if not HAS_GENAI:
+                        raise RuntimeError("google.generativeai SDK not available; install it.")
+                    local_path = current_path
                     fast_mode = st.session_state.get("fast_mode", False)
+                    upload_path = local_path
                     try:
+                        file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
                     except Exception:
                         file_size_mb = 0
                     if not fast_mode and file_size_mb > 50:
+                        compressed_path = str(Path(local_path).with_name(Path(local_path).stem + "_compressed.mp4"))
+                        try:
+                            preset = "veryfast" if fast_mode else "fast"
+                            upload_path = compress_video(local_path, compressed_path, crf=28, preset=preset)
+                        except Exception:
+                            upload_path = local_path
                     with st.spinner("Uploading video..."):
                         uploaded = upload_video_sdk(upload_path)
                         processed = wait_for_processed(uploaded, timeout=180)
                         st.session_state["last_loaded_path"] = current_path
                         st.session_state["file_hash"] = current_hash
+                prompt_text = (analysis_prompt.strip() or default_prompt).strip()
+                out = ""
+                if st.session_state.get("fast_mode"):
+                    model_used = model_id if model_id else "gemini-2.5-flash-lite"
+                    max_tokens = 512
+                else:
+                    model_used = model_id
+                    max_tokens = 1024
+                est_tokens = max_tokens
+                est_cost_caption = f"Est. max tokens: {est_tokens}"
+                agent = maybe_create_agent(model_used)
+                if agent:
+                    with st.spinner("Generating description via Agent..."):
+                        if not processed:
+                            raise RuntimeError("Processed file missing for agent generation")
+                        response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
+                        out = getattr(response, "content", None) or getattr(response, "outputText", None) or str(response)
+                else:
+                    if not HAS_GENAI or genai is None:
+                        raise RuntimeError("Responses API not available; install google.generativeai SDK.")
+                    genai.configure(api_key=key_to_use)
+                    fname = file_name_or_id(processed)
+                    if not fname:
+                        raise RuntimeError("Uploaded file missing name/id")
+                    system_msg = {"role": "system", "content": prompt_text}
+                    user_msg = {"role": "user", "content": "Please summarize the attached video."}
+                    # Try the modern and legacy signatures; fail clearly if both fail
                     try:
                         response = genai.responses.generate(
+                            model=model_used,
                             messages=[system_msg, user_msg],
+                            files=[{"name": fname}],
+                            safety_settings=safety_settings,
+                            max_output_tokens=max_tokens,
+                        )
+                    except TypeError:
+                        response = genai.responses.generate(
+                            model=model_used,
+                            input=[{"text": prompt_text, "files": [{"name": fname}]}],
                             safety_settings=safety_settings,
+                            max_output_tokens=max_tokens,
                         )
+                    # Normalize response into iterable items safely
+                    outputs = []
+                    if response is None:
+                        outputs = []
+                    else:
+                        # response might be object or dict; try known attributes/keys
+                        if isinstance(response, dict):
+                            # common dict keys
+                            if isinstance(response.get("output"), list):
+                                outputs = response.get("output") or []
+                            elif isinstance(response.get("candidates"), list):
+                                outputs = response.get("candidates") or []
+                            elif isinstance(response.get("items"), list):
+                                outputs = response.get("items") or []
+                            elif isinstance(response.get("responses"), list):
+                                outputs = response.get("responses") or []
+                            else:
+                                # fallback: try to find list-valued entries
+                                for v in response.values():
+                                    if isinstance(v, list):
+                                        outputs = v
+                                        break
+                        else:
+                            # try attribute access
+                            attr_candidates = []
+                            for attr in ("output", "candidates", "items", "responses"):
+                                val = getattr(response, attr, None)
+                                if isinstance(val, list):
+                                    attr_candidates = val
+                                    break
+                            outputs = attr_candidates or []
+                    # Ensure we have a list
+                    if not isinstance(outputs, list):
+                        outputs = list(outputs) if outputs else []
+                    text_pieces = []
+                    # Iterate safely through outputs (may be dicts or objects)
+                    for item in outputs:
+                        if item is None:
+                            continue
+                        # attempt to extract a 'content' bag
+                        contents = None
+                        if isinstance(item, dict):
+                            contents = item.get("content") or item.get("text") or item.get("message") or item.get("output")
+                        else:
+                            contents = getattr(item, "content", None) or getattr(item, "text", None) or getattr(item, "message", None) or getattr(item, "output", None)
+                        # If contents is a single string, take it
+                        if isinstance(contents, str):
+                            if contents.strip():
+                                text_pieces.append(contents.strip())
+                            continue
+                        # If contents is list-like, iterate
+                        if isinstance(contents, (list, tuple)):
+                            for c in contents:
+                                if c is None:
+                                    continue
+                                if isinstance(c, str):
+                                    if c.strip():
+                                        text_pieces.append(c.strip())
+                                    continue
+                                c_text = None
+                                if isinstance(c, dict):
+                                    c_text = c.get("text") or c.get("content") or None
+                                else:
+                                    c_text = getattr(c, "text", None) or getattr(c, "content", None)
+                                if c_text:
+                                    text_pieces.append(str(c_text).strip())
+                            continue
+                        # If the item itself contains direct text fields
+                        direct_txt = None
+                        if isinstance(item, dict):
+                            direct_txt = item.get("text") or item.get("output_text") or item.get("message")
+                        else:
+                            direct_txt = getattr(item, "text", None) or getattr(item, "output_text", None) or getattr(item, "message", None)
+                        if direct_txt:
+                            text_pieces.append(str(direct_txt).strip())
+                    # final fallback: top-level text on response
+                    if not text_pieces:
+                        top_text = None
+                        if isinstance(response, dict):
+                            top_text = response.get("text") or response.get("message") or None
+                        else:
+                            top_text = getattr(response, "text", None) or getattr(response, "message", None)
+                        if top_text:
+                            text_pieces.append(str(top_text).strip())
+                    # dedupe preserving order
+                    seen = set()
+                    filtered = []
+                    for t in text_pieces:
+                        if not isinstance(t, str):
+                            continue
+                        if t and t not in seen:
+                            filtered.append(t)
+                            seen.add(t)
+                    out = "\n\n".join(filtered)
+                # post-process output to remove prompt echo or placeholders
+                if out:
+                    out = remove_prompt_echo(prompt_text, out)
+                    p = prompt_text
+                    if p and out.strip().lower().startswith(p.lower()):
+                        out = out.strip()[len(p):].lstrip(" \n:-")
                     placeholders = ["enter analysis", "enter your analysis", "enter analysis here", "please enter analysis"]
                     low = out.strip().lower()
                     for ph in placeholders:
                         if low.startswith(ph):
                             out = out.strip()[len(ph):].lstrip(" \n:-")
                             break
+                    out = out.strip()
                 st.session_state["analysis_out"] = out
+                st.session_state["last_error"] = ""
                 st.subheader("Analysis Result")
                 st.markdown(out if out else "No analysis returned.")
+                st.caption(est_cost_caption)
             except Exception as e:
+                st.session_state["last_error"] = str(e)
                 st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
             finally:
                 st.session_state["busy"] = False
 if st.session_state.get("last_error"):
     with st.expander("Last Error", expanded=False):
+        st.write(st.session_state.get("last_error