Spaces:

Hug0endob
/

Video-Analysis

Build error

App Files Files Community

CB commited on Sep 11, 2025

Commit

44ae25c

verified ·

1 Parent(s): b0626f0

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +289 -308

streamlit_app.py CHANGED Viewed

@@ -1,12 +1,9 @@
 # streamlit_app.py
 import os
 import time
-import json
-import string
 from glob import glob
 from pathlib import Path
-import hashlib
-from difflib import SequenceMatcher
 from tempfile import NamedTemporaryFile
 import yt_dlp
@@ -16,44 +13,29 @@ from dotenv import load_dotenv
 load_dotenv()
-try:
-    from phi.agent import Agent
-    from phi.model.google import Gemini
-    from phi.tools.duckduckgo import DuckDuckGo
-    HAS_PHI = True
-except Exception:
-    Agent = Gemini = DuckDuckGo = None
-    HAS_PHI = False
-try:
-    import google.generativeai as genai
-    from google.generativeai import upload_file, get_file  # type: ignore
-    HAS_GENAI = True
-except Exception:
-    genai = None
-    upload_file = get_file = None
-    HAS_GENAI = False
 st.set_page_config(page_title="Generate the story of videos", layout="wide")
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
-# Session state defaults
-st.session_state.setdefault("videos", "")
-st.session_state.setdefault("loop_video", False)
-st.session_state.setdefault("uploaded_file", None)
-st.session_state.setdefault("processed_file", None)
-st.session_state.setdefault("busy", False)
-st.session_state.setdefault("last_loaded_path", "")
-st.session_state.setdefault("analysis_out", "")
-st.session_state.setdefault("last_error", "")
-st.session_state.setdefault("file_hash", None)
-st.session_state.setdefault("fast_mode", False)
-# Helpers
 def sanitize_filename(path_str: str):
-    name = Path(path_str).name
-    return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
 def file_sha256(path: str, block_size: int = 65536) -> str:
     h = hashlib.sha256()
@@ -73,32 +55,31 @@ def safe_ffmpeg_run(stream_cmd):
             return False, str(e)
 def convert_video_to_mp4(video_path: str) -> str:
-    target_path = Path(video_path).with_suffix(".mp4")
-    if target_path.exists():
-        return str(target_path)
-    tmp = NamedTemporaryFile(prefix=target_path.stem + "_", suffix=".mp4", delete=False, dir=target_path.parent)
     tmp.close()
-    success, err = safe_ffmpeg_run(ffmpeg.input(video_path).output(str(tmp.name)))
-    if not success:
         try:
             os.remove(tmp.name)
         except Exception:
             pass
-        raise RuntimeError(f"ffmpeg conversion failed: {err}")
-    os.replace(tmp.name, str(target_path))
-    # optional: remove original if different extension
     if Path(video_path).suffix.lower() != ".mp4":
         try:
             os.remove(video_path)
         except Exception:
             pass
-    return str(target_path)
 def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
     tmp = NamedTemporaryFile(prefix=Path(target_path).stem + "_", suffix=".mp4", delete=False, dir=Path(target_path).parent)
     tmp.close()
-    success, err = safe_ffmpeg_run(ffmpeg.input(input_path).output(str(tmp.name), vcodec="libx264", crf=crf, preset=preset))
-    if not success:
         try:
             os.remove(tmp.name)
         except Exception:
@@ -111,10 +92,10 @@ def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) ->
     if not url:
         raise ValueError("No URL provided")
     outtmpl = str(Path(save_dir) / "%(id)s.%(ext)s")
-    ydl_opts = {"outtmpl": outtmpl, "format": "best"}
     if video_password:
-        ydl_opts["videopassword"] = video_password
-    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         info = ydl.extract_info(url, download=True)
     video_id = info.get("id") if isinstance(info, dict) else None
     if video_id:
@@ -126,226 +107,217 @@ def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) ->
     return convert_video_to_mp4(matches[0])
 def file_name_or_id(file_obj):
-    if file_obj is None:
         return None
-    # dict-like
     if isinstance(file_obj, dict):
-        for key in ("name", "id", "fileId", "file_id"):
             val = file_obj.get(key)
             if val:
                 s = str(val)
                 if s.startswith("files/"):
                     return s
-                # if id-like (12 chars) return files/{id}
-                if len(s) == 12 and "/" not in s:
                     return f"files/{s}"
                 return s
         uri = file_obj.get("uri") or file_obj.get("url")
         if uri:
-            tail = uri.rstrip("/").split("/")[-1]
-            if tail:
-                return tail if tail.startswith("files/") else f"files/{tail}"
         return None
-    # object-like (SDK)
-    for attr in ("name", "id", "fileId", "file_id", "uri"):
         val = getattr(file_obj, attr, None)
         if val:
             s = str(val)
             if s.startswith("files/"):
                 return s
-            if len(s) == 12 and "/" not in s:
                 return f"files/{s}"
             return s
-    # last resort: parse string
     s = str(file_obj)
     if "files/" in s:
         idx = s.find("files/")
         return s[idx:] if s[idx:].startswith("files/") else f"files/{s[idx+6:]}"
     return None
-# Configure Google SDK if key present
-if os.getenv("GOOGLE_API_KEY") and HAS_GENAI:
     try:
-        genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
     except Exception:
-        pass
-# UI: Sidebar inputs
 st.sidebar.header("Video Input")
 st.sidebar.text_input("Video URL", key="url", placeholder="https://")
-settings_exp = st.sidebar.expander("Settings", expanded=False)
-env_api_key = os.getenv("GOOGLE_API_KEY", "")
-API_KEY = settings_exp.text_input("Google API Key", value=env_api_key, placeholder="Set GOOGLE_API_KEY in .env or enter here", type="password")
-model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite")
 model_id = model_input.strip() or "gemini-2.0-flash-lite"
 model_arg = model_id if not model_id.startswith("models/") else model_id.split("/", 1)[1]
-analysis_prompt = settings_exp.text_area("Enter analysis", value="watch entire video and describe", height=120)
-settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
-settings_exp.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
-settings_exp.number_input("Max output tokens", key="max_output_tokens", value=1024, min_value=128, max_value=8192, step=128)
-if not API_KEY and not os.getenv("GOOGLE_API_KEY"):
-    settings_exp.warning("No Google API key provided; upload/generation disabled.", icon="⚠️")
-safety_settings = [
-    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "OFF"},
-    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "OFF"},
-    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "OFF"},
-    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
-]
-# Build Agent if available
-_agent = None
-if HAS_PHI and HAS_GENAI and (API_KEY or os.getenv("GOOGLE_API_KEY")):
-    try:
-        key_to_use = API_KEY if API_KEY else os.getenv("GOOGLE_API_KEY")
-        genai.configure(api_key=key_to_use)
-        _agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
-    except Exception:
-        _agent = None
-def clear_all_video_state():
-    st.session_state.pop("uploaded_file", None)
-    st.session_state.pop("processed_file", None)
-    st.session_state["videos"] = ""
-    st.session_state["last_loaded_path"] = ""
-    st.session_state["analysis_out"] = ""
-    st.session_state["last_error"] = ""
-    st.session_state["file_hash"] = None
-    for f in glob(str(DATA_DIR / "*")):
-        try:
-            os.remove(f)
-        except Exception:
-            pass
-# Track URL changes
-if "last_url_value" not in st.session_state:
-    st.session_state["last_url_value"] = st.session_state.get("url", "")
-current_url = st.session_state.get("url", "")
-if current_url != st.session_state.get("last_url_value"):
-    clear_all_video_state()
-    st.session_state["last_url_value"] = current_url
-# Load video button
 if st.sidebar.button("Load Video", use_container_width=True):
     try:
         vpw = st.session_state.get("video-password", "")
         path = download_video_ytdlp(st.session_state.get("url", ""), str(DATA_DIR), vpw)
         st.session_state["videos"] = path
         st.session_state["last_loaded_path"] = path
-        st.session_state.pop("uploaded_file", None)
-        st.session_state.pop("processed_file", None)
         st.session_state["file_hash"] = file_sha256(path)
     except Exception as e:
-        st.sidebar.error(f"Failed to load video: {e}")
-# Sidebar preview & options
 if st.session_state["videos"]:
     try:
         st.sidebar.video(st.session_state["videos"], loop=st.session_state.get("loop_video", False))
     except Exception:
         st.sidebar.write("Couldn't preview video")
     with st.sidebar.expander("Options", expanded=False):
         loop_checkbox = st.checkbox("Enable Loop", value=st.session_state.get("loop_video", False))
         st.session_state["loop_video"] = loop_checkbox
         if st.button("Clear Video(s)"):
-            clear_all_video_state()
         try:
             with open(st.session_state["videos"], "rb") as vf:
                 st.download_button("Download Video", data=vf, file_name=sanitize_filename(st.session_state["videos"]), mime="video/mp4", use_container_width=True)
         except Exception:
-            st.sidebar.error("Failed to prepare download")
     st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
-# Upload helpers
-def upload_video_sdk(filepath: str):
-    key = API_KEY if API_KEY else os.getenv("GOOGLE_API_KEY")
-    if not key:
-        raise RuntimeError("No API key provided")
-    if not HAS_GENAI or upload_file is None:
-        raise RuntimeError("google.generativeai SDK not available; cannot upload")
-    genai.configure(api_key=key)
-    return upload_file(filepath)
-def wait_for_processed(file_obj, timeout=180):
-    if not HAS_GENAI or get_file is None:
-        return file_obj
-    start = time.time()
-    name = file_name_or_id(file_obj)
-    if not name:
-        return file_obj
-    backoff = 1.0
-    while True:
-        obj = get_file(name)
-        state = getattr(obj, "state", None)
-        if not state or getattr(state, "name", None) != "PROCESSING":
-            return obj
-        if time.time() - start > timeout:
-            raise TimeoutError("File processing timed out")
-        time.sleep(backoff)
-        backoff = min(backoff * 2, 8.0)
-# Enhanced remove_prompt_echo function
-def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_threshold: float = 0.68):
-    if not prompt or not text:
-        return text
-    # Normalize the prompt and text
-    a = " ".join(prompt.strip().lower().split())
-    b_full = text.strip()
-    b = " ".join(b_full[:check_len].lower().split())
-    # Calculate the similarity ratio
-    ratio = SequenceMatcher(None, a, b).ratio()
-    # If the ratio is high, remove the approximate prefix
-    if ratio >= ratio_threshold:
-        cut = min(len(b_full), max(int(len(prompt) * 0.9), len(a)))
-        new_text = b_full[cut:].lstrip(" \n:-")
-        if len(new_text) >= 3:
-            return new_text
-    # Remove common placeholder prefixes
-    placeholders = ["enter analysis", "enter your analysis", "enter analysis here", "please enter analysis"]
-    low = b_full.strip().lower()
-    for ph in placeholders:
-        if low.startswith(ph):
-            return b_full[len(ph):].lstrip(" \n:-")
-    return text
-# Main UI layout
 col1, col2 = st.columns([1, 3])
 with col1:
     if st.session_state.get("busy"):
-        st.button("Generate the story", disabled=True)
     else:
         generate_now = st.button("Generate the story", type="primary")
 with col2:
     pass
-# Generation flow
 if (st.session_state.get("busy") is False) and ('generate_now' in locals() and generate_now):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
     else:
-        key_to_use = API_KEY if API_KEY else os.getenv("GOOGLE_API_KEY")
-        if not key_to_use:
-            st.error("Google API key not set.")
         else:
             try:
                 st.session_state["busy"] = True
                 processed = st.session_state.get("processed_file")
-                # Use file hash to determine if we must re-upload
                 current_path = st.session_state.get("videos")
-                current_hash = None
                 try:
-                    current_hash = file_sha256(current_path) if current_path and os.path.exists(current_path) else None
                 except Exception:
                     current_hash = None
@@ -355,9 +327,8 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
                 if reupload_needed:
                     if not HAS_GENAI:
-                        raise RuntimeError("google.generativeai SDK not available; install it.")
                     local_path = current_path
-                    # Fast mode overrides compression behavior
                     fast_mode = st.session_state.get("fast_mode", False)
                     upload_path = local_path
                     try:
@@ -365,16 +336,16 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
                     except Exception:
                         file_size_mb = 0
-                    # Only compress if large and not in fast mode
-                    if not fast_mode and file_size_mb > 50:
                         compressed_path = str(Path(local_path).with_name(Path(local_path).stem + "_compressed.mp4"))
                         try:
-                            # Use faster preset when focusing on speed
                             preset = "veryfast" if fast_mode else "fast"
                             upload_path = compress_video(local_path, compressed_path, crf=28, preset=preset)
                         except Exception:
                             upload_path = local_path
                     with st.spinner("Uploading video..."):
                         uploaded = upload_video_sdk(upload_path)
                         processed = wait_for_processed(uploaded, timeout=180)
@@ -383,158 +354,168 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
                         st.session_state["last_loaded_path"] = current_path
                         st.session_state["file_hash"] = current_hash
-                prompt_text = (analysis_prompt.strip() or "Describe this video in vivid detail.").strip()
-                out = ""
-                # Use lighter model/tokens in fast mode
                 if st.session_state.get("fast_mode"):
-                    model_used = model_arg if model_arg else "gemini-2.0-flash-lite"
                     max_tokens = min(st.session_state.get("max_output_tokens", 512), 1024)
                 else:
                     model_used = model_arg
                     max_tokens = st.session_state.get("max_output_tokens", 1024)
-                # Prepare concise messages
                 system_msg = {"role": "system", "content": "You are a helpful assistant that summarizes videos concisely in vivid detail."}
                 user_msg = {"role": "user", "content": prompt_text}
-                debug_info = {"request": None, "response": None, "fname": None}
-                def call_responses_once(model_used, system_msg, user_msg, fname, max_tokens, safety_settings):
-                    genai.configure(api_key=key_to_use)
                     try:
                         response = genai.responses.generate(
                             model=model_used,
                             messages=[system_msg, user_msg],
                             files=[{"name": fname}],
-                            safety_settings=safety_settings,
                             max_output_tokens=max_tokens,
                         )
                     except TypeError:
                         response = genai.responses.generate(
                             model=model_used,
                             input=[{"text": user_msg["content"], "files": [{"name": fname}]}],
-                            safety_settings=safety_settings,
                             max_output_tokens=max_tokens,
                         )
                     return response
-                if _agent:
-                    with st.spinner("Generating description via Agent..."):
-                        response = _agent.run(system_msg["content"] + "\n\n" + user_msg["content"], videos=[processed], safety_settings=safety_settings)
-                        out = getattr(response, "content", None) or getattr(response, "outputText", None) or str(response)
-                        debug_info["response"] = response
-                else:
-                    if not HAS_GENAI or genai is None:
-                        raise RuntimeError("Responses API not available; install google.generativeai SDK.")
-                    fname = file_name_or_id(processed)
-                    if not fname:
-                        fname = file_name_or_id(st.session_state.get("uploaded_file"))
-                    if not fname:
-                        raise RuntimeError("Uploaded file missing name/id")
-                    debug_info["fname"] = fname
-                    # Make the request and retry once if no output
-                    response = call_responses_once(model_used, system_msg, user_msg, fname, max_tokens, safety_settings)
-                    debug_info["response"] = response
-                    def extract_text_from_response(response):
-                        outputs = getattr(response, "output", None) or (response.get("output") if isinstance(response, dict) else None) or []
-                        if not outputs and isinstance(response, dict):
-                            outputs = response.get("output", [])
-                        text_pieces = []
-                        for item in outputs or []:
-                            contents = getattr(item, "content", None) or (item.get("content") if isinstance(item, dict) else None) or []
-                            for c in contents:
-                                ctype = getattr(c, "type", None) or (c.get("type") if isinstance(c, dict) else None)
-                                if ctype in ("output_text", "text") or ctype is None:
-                                    txt = getattr(c, "text", None) or (c.get("text") if isinstance(c, dict) else None)
-                                    if txt:
-                                        text_pieces.append(txt)
-                        if not text_pieces:
-                            top_text = getattr(response, "text", None) or (response.get("text") if isinstance(response, dict) else None)
-                            if top_text:
-                                text_pieces.append(top_text)
-                        seen = set()
-                        filtered = []
-                        for t in text_pieces:
-                            if t not in seen:
-                                filtered.append(t)
-                                seen.add(t)
-                        return "\n\n".join(filtered)
-                    out = extract_text_from_response(response)
-                    # Inspect metrics to detect zero-output
-                    meta = getattr(response, "metrics", None) or (response.get("metrics") if isinstance(response, dict) else None) or {}
                     output_tokens = 0
                     try:
-                        if isinstance(meta, dict):
-                            output_tokens = meta.get("output_tokens", 0) or meta.get("output_tokens", 0)
                         else:
-                            output_tokens = getattr(response, "metrics", {}).get("output_tokens", 0)
                     except Exception:
-                        output_tokens = 0
-                    if (not out or output_tokens == 0) and model_used:
-                        retry_prompt = "Summarize the video content briefly and vividly (2-4 paragraphs)."
-                        user_msg_retry = {"role": "user", "content": retry_prompt}
-                        retry_max = min(max_tokens * 2, 4096)
-                        try:
-                            response2 = call_responses_once(model_used, system_msg, user_msg_retry, fname, retry_max, safety_settings)
-                            debug_info["response_retry"] = response2
-                            out2 = extract_text_from_response(response2)
-                            if out2 and len(out2) > len(out):
-                                out = out2
-                        except Exception as e:
-                            debug_info["retry_error"] = str(e)
-                # Remove prompt echo robustly
                 if out:
-                    out = remove_prompt_echo(prompt_text, out)
-                    p = prompt_text
-                    if p and out.strip().lower().startswith(p.lower()):
-                        out = out.strip()[len(p):].lstrip(" \n:-")
-                    placeholders = ["enter analysis", "enter your analysis", "enter analysis here", "please enter analysis"]
-                    low = out.strip().lower()
-                    for ph in placeholders:
-                        if low.startswith(ph):
-                            out = out.strip()[len(ph):].lstrip(" \n:-")
-                            break
-                    out = out.strip()
-                st.session_state["analysis_out"] = out
                 st.session_state["last_error"] = ""
                 st.subheader("Analysis Result")
                 st.markdown(out or "_(no text returned)_")
-                # Debugging expander
-                with st.expander("Debug: request/response", expanded=False):
-                    st.write("model_used:", model_used)
-                    st.write("fname:", debug_info.get("fname"))
-                    st.write("system_msg:", system_msg)
-                    st.write("user_msg:", user_msg)
-                    st.write("response (raw):")
-                    st.write(debug_info.get("response"))
-                    if debug_info.get("response_retry"):
-                        st.write("response (retry):")
-                        st.write(debug_info.get("response_retry"))
-                    if debug_info.get("retry_error"):
-                        st.write("retry_error:", debug_info.get("retry_error"))
             except Exception as e:
-                st.session_state["last_error"] = str(e)
-                st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
             finally:
                 st.session_state["busy"] = False
-# Display cached analysis if available (avoid duplicate on same run)
 if st.session_state.get("analysis_out"):
-    just_loaded_same = (st.session_state.get("last_loaded_path") == st.session_state.get("videos"))
-    if not just_loaded_same:
-        st.subheader("Analysis Result")
-        st.markdown(st.session_state.get("analysis_out"))
 if st.session_state.get("last_error"):
     with st.expander("Last Error", expanded=False):
         st.write(st.session_state.get("last_error"))

 # streamlit_app.py
 import os
 import time
+import hashlib
 from glob import glob
 from pathlib import Path
 from tempfile import NamedTemporaryFile
 import yt_dlp
 load_dotenv()
 st.set_page_config(page_title="Generate the story of videos", layout="wide")
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
+# session defaults
+for k, v in {
+    "videos": "",
+    "loop_video": False,
+    "uploaded_file": None,
+    "processed_file": None,
+    "busy": False,
+    "last_loaded_path": "",
+    "analysis_out": "",
+    "last_error": "",
+    "file_hash": None,
+    "fast_mode": False,
+    "use_compression": True,
+}.items():
+    st.session_state.setdefault(k, v)
+# helpers
 def sanitize_filename(path_str: str):
+    return Path(path_str).name.lower().translate(str.maketrans("", "", "!?\"'`~@#$%^&*()[]{}<>:,;\\/|+=*")).replace(" ", "_")
 def file_sha256(path: str, block_size: int = 65536) -> str:
     h = hashlib.sha256()
             return False, str(e)
 def convert_video_to_mp4(video_path: str) -> str:
+    target = Path(video_path).with_suffix(".mp4")
+    if target.exists():
+        return str(target)
+    tmp = NamedTemporaryFile(prefix=target.stem + "_", suffix=".mp4", delete=False, dir=target.parent)
     tmp.close()
+    ok, err = safe_ffmpeg_run(ffmpeg.input(video_path).output(str(tmp.name)))
+    if not ok:
         try:
             os.remove(tmp.name)
         except Exception:
             pass
+        raise RuntimeError("ffmpeg conversion failed")
+    os.replace(tmp.name, str(target))
     if Path(video_path).suffix.lower() != ".mp4":
         try:
             os.remove(video_path)
         except Exception:
             pass
+    return str(target)
 def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
     tmp = NamedTemporaryFile(prefix=Path(target_path).stem + "_", suffix=".mp4", delete=False, dir=Path(target_path).parent)
     tmp.close()
+    ok, err = safe_ffmpeg_run(ffmpeg.input(input_path).output(str(tmp.name), vcodec="libx264", crf=crf, preset=preset))
+    if not ok:
         try:
             os.remove(tmp.name)
         except Exception:
     if not url:
         raise ValueError("No URL provided")
     outtmpl = str(Path(save_dir) / "%(id)s.%(ext)s")
+    opts = {"outtmpl": outtmpl, "format": "best"}
     if video_password:
+        opts["videopassword"] = video_password
+    with yt_dlp.YoutubeDL(opts) as ydl:
         info = ydl.extract_info(url, download=True)
     video_id = info.get("id") if isinstance(info, dict) else None
     if video_id:
     return convert_video_to_mp4(matches[0])
 def file_name_or_id(file_obj):
+    if not file_obj:
         return None
     if isinstance(file_obj, dict):
+        for key in ("name", "id", "fileId", "file_id", "uri", "url"):
             val = file_obj.get(key)
             if val:
                 s = str(val)
+                if s.startswith("http://") or s.startswith("https://"):
+                    tail = s.rstrip("/").split("/")[-1]
+                    return tail if tail.startswith("files/") else f"files/{tail}"
                 if s.startswith("files/"):
                     return s
+                if "/" not in s and 6 <= len(s) <= 128:
                     return f"files/{s}"
                 return s
         uri = file_obj.get("uri") or file_obj.get("url")
         if uri:
+            tail = str(uri).rstrip("/").split("/")[-1]
+            return tail if tail.startswith("files/") else f"files/{tail}"
         return None
+    for attr in ("name", "id", "fileId", "file_id", "uri", "url"):
         val = getattr(file_obj, attr, None)
         if val:
             s = str(val)
+            if s.startswith("http://") or s.startswith("https://"):
+                tail = s.rstrip("/").split("/")[-1]
+                return tail if tail.startswith("files/") else f"files/{tail}"
             if s.startswith("files/"):
                 return s
+            if "/" not in s and 6 <= len(s) <= 128:
                 return f"files/{s}"
             return s
     s = str(file_obj)
+    if "http://" in s or "https://" in s:
+        tail = s.rstrip("/").split("/")[-1]
+        return tail if tail.startswith("files/") else f"files/{tail}"
     if "files/" in s:
         idx = s.find("files/")
         return s[idx:] if s[idx:].startswith("files/") else f"files/{s[idx+6:]}"
     return None
+# optional Google SDK
+HAS_GENAI = False
+genai = None
+upload_file = None
+get_file = None
+delete_file = None
+if os.getenv("GOOGLE_API_KEY"):
     try:
+        import google.generativeai as genai_mod
+        genai = genai_mod
+        upload_file = genai_mod.upload_file
+        get_file = genai_mod.get_file
+        # delete_file may not exist in SDK; guard later
+        delete_file = getattr(genai_mod, "delete_file", None)
+        HAS_GENAI = True
     except Exception:
+        HAS_GENAI = False
+def upload_video_sdk(filepath: str):
+    key = os.getenv("GOOGLE_API_KEY")
+    if not key:
+        raise RuntimeError("No API key")
+    if not HAS_GENAI:
+        raise RuntimeError("google.generativeai SDK not available")
+    genai.configure(api_key=key)
+    return upload_file(filepath)
+def wait_for_processed(file_obj, timeout=180):
+    if not HAS_GENAI or get_file is None:
+        return file_obj
+    start = time.time()
+    name = file_name_or_id(file_obj)
+    if not name:
+        return file_obj
+    backoff = 1.0
+    while True:
+        obj = get_file(name)
+        state = getattr(obj, "state", None)
+        if not state or getattr(state, "name", None) != "PROCESSING":
+            return obj
+        if time.time() - start > timeout:
+            raise TimeoutError("File processing timed out")
+        time.sleep(backoff)
+        backoff = min(backoff * 2, 8.0)
+def remove_prompt_echo(prompt: str, text: str):
+    if not prompt or not text:
+        return text
+    p = " ".join(prompt.strip().lower().split())
+    t = text.strip()
+    from difflib import SequenceMatcher
+    first = " ".join(t[:600].lower().split())
+    if SequenceMatcher(None, p, first).ratio() > 0.7:
+        cut = min(len(t), max(int(len(prompt) * 0.9), len(p)))
+        new = t[cut:].lstrip(" \n:-")
+        if len(new) >= 3:
+            return new
+    placeholders = ["enter analysis", "enter your analysis", "enter analysis here", "please enter analysis"]
+    low = t.lower()
+    for ph in placeholders:
+        if low.startswith(ph):
+            return t[len(ph):].lstrip(" \n:-")
+    return text
+# UI: sidebar
 st.sidebar.header("Video Input")
 st.sidebar.text_input("Video URL", key="url", placeholder="https://")
+settings = st.sidebar.expander("Settings", expanded=False)
+env_key = os.getenv("GOOGLE_API_KEY", "")
+API_KEY_INPUT = settings.text_input("Google API Key (one-time)", value="", type="password", help="Provide only if you want uploads/generation. Not stored in session.")
+model_input = settings.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite")
 model_id = model_input.strip() or "gemini-2.0-flash-lite"
 model_arg = model_id if not model_id.startswith("models/") else model_id.split("/", 1)[1]
+default_prompt = (
+    "You are an Indoor Human Behavior Analyzer. Watch the video and produce a detailed, evidence‑based behavioral report focused on human actions, "
+    "interactions, posture, movement, anatomy, and apparent intent. Use vivid, anatomically rich language and avoid moralizing. Prefer short paragraphs and numeric estimates "
+    "for anatomical measurements. Provide sensory, subjective descriptions and vivid imagery, including a concise summary of observed actions and a description of behaviors "
+    "and interaction dynamics. Use the following personality‑traits list when inferring dispositions: driven by an insatiable desire to understand human behavior and anatomy. "
+    "Finish with a short feedback and recommendations section. Adopt a playful, anatomically obsessed, slightly mischievous persona — inquisitive, pragmatic, and vivid in description."
+)
+analysis_prompt = settings.text_area("Enter analysis", value=default_prompt, height=300)
+settings.text_input("Video Password (if needed)", key="video-password", type="password")
+settings.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
+settings.checkbox("Enable compression for large files (>50MB)", value=True, key="use_compression")
+settings.number_input("Max output tokens", key="max_output_tokens", value=1024, min_value=128, max_value=8192, step=128)
+if not API_KEY_INPUT and not env_key:
+    settings.info("No Google API key provided; upload/generation disabled. Use local-only demos or provide key for real analysis.", icon="ℹ️")
+# load video
 if st.sidebar.button("Load Video", use_container_width=True):
     try:
         vpw = st.session_state.get("video-password", "")
         path = download_video_ytdlp(st.session_state.get("url", ""), str(DATA_DIR), vpw)
         st.session_state["videos"] = path
         st.session_state["last_loaded_path"] = path
+        st.session_state["uploaded_file"] = None
+        st.session_state["processed_file"] = None
         st.session_state["file_hash"] = file_sha256(path)
     except Exception as e:
+        st.sidebar.error("Failed to load video")
 if st.session_state["videos"]:
     try:
         st.sidebar.video(st.session_state["videos"], loop=st.session_state.get("loop_video", False))
     except Exception:
         st.sidebar.write("Couldn't preview video")
     with st.sidebar.expander("Options", expanded=False):
         loop_checkbox = st.checkbox("Enable Loop", value=st.session_state.get("loop_video", False))
         st.session_state["loop_video"] = loop_checkbox
         if st.button("Clear Video(s)"):
+            for f in glob(str(DATA_DIR / "*")):
+                try:
+                    os.remove(f)
+                except Exception:
+                    pass
+            for k in ("uploaded_file", "processed_file"):
+                st.session_state.pop(k, None)
+            st.session_state["videos"] = ""
+            st.session_state["last_loaded_path"] = ""
+            st.session_state["analysis_out"] = ""
+            st.session_state["last_error"] = ""
+            st.session_state["file_hash"] = None
         try:
             with open(st.session_state["videos"], "rb") as vf:
                 st.download_button("Download Video", data=vf, file_name=sanitize_filename(st.session_state["videos"]), mime="video/mp4", use_container_width=True)
         except Exception:
+            pass
     st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
+# controls
 col1, col2 = st.columns([1, 3])
 with col1:
     if st.session_state.get("busy"):
+        st.write("Generation in progress...")
+        if st.button("Cancel"):
+            st.session_state["busy"] = False
+            st.session_state["last_error"] = "Generation cancelled by user."
     else:
         generate_now = st.button("Generate the story", type="primary")
 with col2:
     pass
+# determine runtime API key (one-time entry not stored)
+def get_runtime_api_key():
+    key = API_KEY_INPUT.strip() if API_KEY_INPUT else ""
+    if key:
+        return key
+    return os.getenv("GOOGLE_API_KEY", "").strip() or None
+# generation flow
 if (st.session_state.get("busy") is False) and ('generate_now' in locals() and generate_now):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
     else:
+        runtime_key = get_runtime_api_key()
+        if not runtime_key:
+            st.error("Google API key not set. Provide in Settings or set GOOGLE_API_KEY in environment.")
         else:
             try:
                 st.session_state["busy"] = True
                 processed = st.session_state.get("processed_file")
                 current_path = st.session_state.get("videos")
                 try:
+                    current_hash = file_sha256(current_path) if current_path and Path(current_path).exists() else None
                 except Exception:
                     current_hash = None
                 if reupload_needed:
                     if not HAS_GENAI:
+                        raise RuntimeError("google.generativeai SDK not available")
                     local_path = current_path
                     fast_mode = st.session_state.get("fast_mode", False)
                     upload_path = local_path
                     try:
                     except Exception:
                         file_size_mb = 0
+                    use_compression = st.session_state.get("use_compression", True)
+                    if use_compression and not fast_mode and file_size_mb > 50:
                         compressed_path = str(Path(local_path).with_name(Path(local_path).stem + "_compressed.mp4"))
                         try:
                             preset = "veryfast" if fast_mode else "fast"
                             upload_path = compress_video(local_path, compressed_path, crf=28, preset=preset)
                         except Exception:
                             upload_path = local_path
+                    genai.configure(api_key=runtime_key)
                     with st.spinner("Uploading video..."):
                         uploaded = upload_video_sdk(upload_path)
                         processed = wait_for_processed(uploaded, timeout=180)
                         st.session_state["last_loaded_path"] = current_path
                         st.session_state["file_hash"] = current_hash
+                    # privacy: delete local copy after successful upload (if different path)
+                    try:
+                        if Path(upload_path).exists() and Path(upload_path) != Path(current_path):
+                            Path(upload_path).unlink(missing_ok=True)
+                        # optionally remove original local file to avoid persistence
+                        Path(current_path).unlink(missing_ok=True)
+                        st.session_state["videos"] = ""
+                    except Exception:
+                        pass
+                prompt_text = (analysis_prompt or default_prompt).strip()
                 if st.session_state.get("fast_mode"):
+                    model_used = model_arg or "gemini-2.0-flash-lite"
                     max_tokens = min(st.session_state.get("max_output_tokens", 512), 1024)
                 else:
                     model_used = model_arg
                     max_tokens = st.session_state.get("max_output_tokens", 1024)
                 system_msg = {"role": "system", "content": "You are a helpful assistant that summarizes videos concisely in vivid detail."}
                 user_msg = {"role": "user", "content": prompt_text}
+                def call_responses_once(model_used, system_msg, user_msg, fname, max_tokens):
+                    genai.configure(api_key=runtime_key)
                     try:
                         response = genai.responses.generate(
                             model=model_used,
                             messages=[system_msg, user_msg],
                             files=[{"name": fname}],
                             max_output_tokens=max_tokens,
                         )
                     except TypeError:
                         response = genai.responses.generate(
                             model=model_used,
                             input=[{"text": user_msg["content"], "files": [{"name": fname}]}],
                             max_output_tokens=max_tokens,
                         )
                     return response
+                fname = file_name_or_id(processed) or file_name_or_id(st.session_state.get("uploaded_file"))
+                if not fname:
+                    try:
+                        uri = getattr(processed, "uri", None) or (processed.get("uri") if isinstance(processed, dict) else None)
+                        if uri:
+                            tail = str(uri).rstrip("/").split("/")[-1]
+                            fname = tail if tail.startswith("files/") else f"files/{tail}"
+                    except Exception:
+                        pass
+                if not fname:
+                    raise RuntimeError("Uploaded file missing name/id/uri; cannot reference for Responses API.")
+                response = call_responses_once(model_used, system_msg, user_msg, fname, max_tokens)
+                def extract_text_from_response(response):
+                    outputs = getattr(response, "output", None) or (response.get("output") if isinstance(response, dict) else None) or []
+                    if not outputs and isinstance(response, dict):
+                        outputs = response.get("output", [])
+                    text_pieces = []
+                    for item in outputs or []:
+                        contents = getattr(item, "content", None) or (item.get("content") if isinstance(item, dict) else None) or []
+                        for c in contents:
+                            ctype = getattr(c, "type", None) or (c.get("type") if isinstance(c, dict) else None)
+                            if ctype in ("output_text", "text") or ctype is None:
+                                txt = getattr(c, "text", None) or (c.get("text") if isinstance(c, dict) else None)
+                                if txt:
+                                    text_pieces.append(txt)
+                    if not text_pieces:
+                        top_text = getattr(response, "text", None) or (response.get("text") if isinstance(response, dict) else None)
+                        if top_text:
+                            text_pieces.append(top_text)
+                    seen = set()
+                    filtered = []
+                    for t in text_pieces:
+                        if t not in seen:
+                            filtered.append(t)
+                            seen.add(t)
+                    return "\n\n".join(filtered)
+                out = extract_text_from_response(response)
+                meta = getattr(response, "metrics", None) or (response.get("metrics") if isinstance(response, dict) else None) or {}
+                output_tokens = 0
+                try:
+                    if isinstance(meta, dict):
+                        output_tokens = int(meta.get("output_tokens", 0) or 0)
+                    else:
+                        output_tokens = int(getattr(meta, "output_tokens", 0) or 0)
+                except Exception:
                     output_tokens = 0
+                if (not out or output_tokens == 0) and model_used:
+                    retry_prompt = "Summarize the video content briefly and vividly (2-4 paragraphs)."
                     try:
+                        response2 = call_responses_once(model_used, system_msg, {"role": "user", "content": retry_prompt}, fname, min(max_tokens * 2, 4096))
+                        out2 = extract_text_from_response(response2)
+                        if out2 and len(out2) > len(out or ""):
+                            out = out2
                         else:
+                            response3 = call_responses_once(model_used, system_msg, {"role": "user", "content": "List the main points of the video as 6-10 bullets."}, fname, min(1024, max_tokens * 2))
+                            out3 = extract_text_from_response(response3)
+                            if out3:
+                                out = out3
                     except Exception:
+                        pass
                 if out:
+                    out = remove_prompt_echo(prompt_text, out).strip()
+                st.session_state["analysis_out"] = out or ""
                 st.session_state["last_error"] = ""
                 st.subheader("Analysis Result")
                 st.markdown(out or "_(no text returned)_")
+                # compact debug (user-triggered)
+                with st.expander("Debug (compact)", expanded=False):
+                    try:
+                        info = {
+                            "model": model_used,
+                            "output_tokens": output_tokens,
+                            "upload_succeeded": bool(st.session_state.get("uploaded_file")),
+                            "processed_active": getattr(st.session_state.get("processed_file"), "state", None) if st.session_state.get("processed_file") else None,
+                        }
+                        st.write(info)
+                    except Exception:
+                        st.write("Debug info unavailable")
             except Exception as e:
+                st.session_state["last_error"] = "Generation error"
+                st.error("An error occurred while generating the story.")
             finally:
                 st.session_state["busy"] = False
+# persistent UI: show cached analysis without paths/ids
 if st.session_state.get("analysis_out"):
+    st.subheader("Analysis Result")
+    st.markdown(st.session_state.get("analysis_out"))
 if st.session_state.get("last_error"):
     with st.expander("Last Error", expanded=False):
         st.write(st.session_state.get("last_error"))
+# delete uploaded files (local + cloud if possible)
+with st.sidebar.expander("Manage uploads", expanded=False):
+    if st.button("Delete uploaded files (local + cloud)"):
+        # delete local files
+        for f in glob(str(DATA_DIR / "*")):
+            try:
+                Path(f).unlink(missing_ok=True)
+            except Exception:
+                pass
+        st.session_state["videos"] = ""
+        st.session_state["uploaded_file"] = None
+        st.session_state["processed_file"] = None
+        st.session_state["last_loaded_path"] = ""
+        st.session_state["analysis_out"] = ""
+        st.session_state["file_hash"] = None
+        # attempt to delete cloud file if SDK supports it
+        try:
+            fname = file_name_or_id(st.session_state.get("uploaded_file"))
+            if fname and delete_file and HAS_GENAI:
+                genai.configure(api_key=get_runtime_api_key() or os.getenv("GOOGLE_API_KEY", ""))
+                delete_file(fname)
+        except Exception:
+            pass
+        st.success("Local files removed. Cloud deletion attempted where supported.")