Video-Analysis-Tool

Sleeping

App Files Files Community

Hug0endob commited on Nov 20, 2025

Commit

1cd718a

verified ·

1 Parent(s): 7b0be11

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +58 -373

streamlit_app.py CHANGED Viewed

@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 """
-Video‑analysis Streamlit app (refactored).
 """
 # ----------------------------------------------------------------------
@@ -26,296 +26,31 @@ import snscrape.modules.twitter as sntwitter
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
-def _compress_video(inp: Path, crf: int = 28, preset: str = "fast") -> Path:
-    """Compress *inp* using libx264; return the compressed file."""
-    out = inp.with_name(f"{inp.stem}_compressed.mp4")
-    try:
-        ffmpeg.input(str(inp)).output(
-            str(out), vcodec="libx264", crf=crf, preset=preset
-        ).overwrite_output().run(capture_stdout=True, capture_stderr=True)
-    except ffmpeg.Error as e:
-        raise RuntimeError(f"ffmpeg compression failed: {e.stderr.decode()}") from e
-    return out if out.exists() else inp
-def _maybe_compress(path: Path, limit_mb: int) -> Tuple[Path, bool]:
-    """Compress *path* if its size exceeds *limit_mb*."""
-    size_mb = path.stat().st_size / (1024 * 1024)
-    if size_mb <= limit_mb:
-        return path, False
-    return _compress_video(path), True
-def _download_direct(url: str, dst: Path) -> Path:
-    """Download a raw video file via HTTP GET."""
-    r = requests.get(url, stream=True, timeout=30)
-    r.raise_for_status()
-    out = dst / _sanitize_filename(url.split("/")[-1])
-    with out.open("wb") as f:
-        for chunk in r.iter_content(chunk_size=8192):
-            if chunk:
-                f.write(chunk)
-    return out
-def _download_with_yt_dlp(url: str, dst: Path, password: str = "") -> Path:
-    """Download via yt‑dlp, ensuring the complete file is retrieved."""
-    tmpl = str(dst / "%(id)s.%(ext)s")
-    # Prefer a full‑container MP4; fall back to the best available format.
-    fmt = "best[ext=mp4]/best"
-    opts = {
-        "outtmpl": tmpl,
-        "format": fmt,
-        "quiet": True,
-        "noprogress": True,
-        "nocheckcertificate": True,
-        "merge_output_format": "mp4",   # force a single MP4 file
-        "fragment_retries": 0,          # avoid fragmented downloads
-    }
-    if password:
-        opts["videopassword"] = password
-    progress_bar = st.empty()
-    status_text = st.empty()
-    def _progress_hook(d):
-        if d["status"] == "downloading":
-            total = d.get("total_bytes") or d.get("total_bytes_estimate")
-            downloaded = d.get("downloaded_bytes", 0)
-            if total:
-                pct = downloaded / total
-                progress_bar.progress(pct)
-                status_text.caption(f"Downloading… {pct:.0%}")
-        elif d["status"] == "finished":
-            progress_bar.progress(1.0)
-            status_text.caption("Download complete, processing…")
-    opts["progress_hooks"] = [_progress_hook]
-    try:
-        with yt_dlp.YoutubeDL(opts) as ydl:
-            ydl.extract_info(url, download=True)
-    except Exception as e:
-        raise RuntimeError(f"yt‑dlp could not download the URL: {e}") from e
-    finally:
-        progress_bar.empty()
-        status_text.empty()
-    # yt‑dlp may have produced several files; pick the newest MP4
-    mp4_files = list(dst.glob("*.mp4"))
-    if not mp4_files:
-        raise RuntimeError("No MP4 file was created.")
-    newest = max(mp4_files, key=lambda p: p.stat().st_mtime)
-    # Optional cache: if a file with the same SHA‑256 already exists, reuse it
-    sha = _file_sha256(newest)
-    if sha:
-        for existing in dst.iterdir():
-            if existing != newest and _file_sha256(existing) == sha:
-                newest.unlink()          # remove duplicate
-                return existing
-    return newest
-def download_video(url: str, dst: Path, password: str = "") -> Path:
-    """
-    Download a video from *url* and return an MP4 path.
-    Strategy
-    ---------
-    1. Direct video URL → HTTP GET.
-    2. Twitter status → scrape for embedded video URLs.
-    3. yt‑dlp fallback for everything else.
-    """
-    video_exts = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
-    if url.lower().endswith(video_exts):
-        return _download_direct(url, dst)
-    if "twitter.com" in url and "/status/" in url:
-        tweet_id = url.split("/")[-1].split("?")[0]
-        for tweet in sntwitter.TwitterTweetScraper(tweet_id).get_items():
-            for m in getattr(tweet, "media", []):
-                if getattr(m, "video_url", None):
-                    return download_video(m.video_url, dst)
-            for u in getattr(tweet, "urls", []):
-                if u.expandedUrl.lower().endswith(video_exts):
-                    return download_video(u.expandedUrl, dst)
-        raise RuntimeError("No video found in the tweet.")
-    # Fallback to yt‑dlp for any other URL
-    return _download_with_yt_dlp(url, dst, password)
-def _encode_video_b64(path: Path) -> str:
-    """Read *path* and return a base64‑encoded string."""
-    return base64.b64encode(path.read_bytes()).decode()
-def generate_report(
-    video_path: Path,
-    prompt: str,
-    model_id: str,
-    timeout: int = 300,
-) -> str:
-    """Send video + prompt to Gemini and return the text response."""
-    b64 = _encode_video_b64(video_path)
-    video_part = {"inline_data": {"mime_type": "video/mp4", "data": b64}}
-    model = genai.GenerativeModel(model_name=model_id)
-    resp = model.generate_content(
-        [prompt, video_part],
-        generation_config={"max_output_tokens": 1024},
-        request_options={"timeout": timeout},
-    )
-    return getattr(resp, "text", str(resp))
-def _strip_prompt_echo(prompt: str, text: str, threshold: float = 0.68) -> str:
-    """Remove the prompt if the model repeats it at the start of *text*."""
-    if not prompt or not text:
-        return text
-    clean_prompt = " ".join(prompt.lower().split())
-    snippet = " ".join(text.lower().split()[:600])
-    if SequenceMatcher(None, clean_prompt, snippet).ratio() > threshold:
-        cut = max(len(clean_prompt), int(len(prompt) * 0.9))
-        return text[cut:].lstrip(" \n:-")
-    return text
-# ----------------------------------------------------------------------
-# UI helpers
-# ----------------------------------------------------------------------
-def _expand_sidebar(width: int = 380) -> None:
-    """Inject CSS to make the sidebar wider."""
-    st.markdown(
-        f"""
-        <style>
-        .css-1d391kg {{  /* may vary with Streamlit versions */
-            width: {width}px !important;
-            min-width: {width}px !important;
-        }}
-        </style>
-        """,
-        unsafe_allow_html=True,
-    )
-# ----------------------------------------------------------------------
-# Streamlit UI
-# ----------------------------------------------------------------------
-def main() -> None:
-    st.set_page_config(page_title="Video Analysis", layout="wide")
-    _expand_sidebar()
-    # ---------- Sidebar ----------
-    st.sidebar.header("Video Input")
-    st.sidebar.text_input("Video URL", key="url", placeholder="https://")
-    if st.sidebar.button("Load Video"):
-        try:
-            with st.spinner("Downloading video…"):
-                raw_path = download_video(
-                    st.session_state["url"], DATA_DIR, st.session_state["video_password"]
-                )
-                mp4_path = _convert_to_mp4(Path(raw_path))
-            st.session_state["video_path"] = str(mp4_path)
-            st.session_state["last_error"] = ""
-            st.toast("Video ready")
-            st.experimental_rerun()
-        except Exception as e:
-            st.session_state["last_error"] = f"Download failed: {e}"
-            st.sidebar.error(st.session_state["last_error"])
-    # ---------- Settings ----------
-    with st.sidebar.expander("Settings", expanded=False):
-        model = st.selectbox(
-            "Model", MODEL_OPTIONS, index=MODEL_OPTIONS.index(DEFAULT_MODEL)
-        )
-        if model == "custom":
-            model = st.text_input("Custom model ID", value=DEFAULT_MODEL, key="custom_model")
-        st.session_state["model_input"] = model
-        # API key handling
-        secret_key = os.getenv("GOOGLE_API_KEY", "")
-        if secret_key:
-            st.session_state["api_key"] = secret_key
-        st.text_input("Google API Key", key="api_key", type="password")
-        st.text_area(
-            "Analysis prompt",
-            value=DEFAULT_PROMPT,
-            key="prompt",
-            height=140,
-        )
-        st.text_input(
-            "Video password (if needed)",
-            key="video_password",
-            type="password",
-        )
-        st.number_input(
-            "Compress if > (MB)",
-            min_value=10,
-            max_value=2000,
-            value=st.session_state.get("compress_mb", 200),
-            step=10,
-            key="compress_mb",
-        )
-        if st.sidebar.button("Clear Video"):
-            for f in DATA_DIR.iterdir():
-                try:
-                    f.unlink()
-                except Exception:
-                    pass
-            st
-]
 DEFAULT_MODEL = "gemini-2.0-flash-lite"
 DEFAULT_PROMPT = (
     "Watch the video and provide a detailed behavioral report focusing on human actions, "
     "interactions, posture, movement, and apparent intent. Keep language professional. "
     "Include a list of observations for notable events."
 )
-# ----------------------------------------------------------------------
-# Session‑state defaults
-# ----------------------------------------------------------------------
-def _init_state() -> None:
-    defaults = {
-        "url": "",
-        "video_path": "",
-        "model_input": DEFAULT_MODEL,
-        "prompt": DEFAULT_PROMPT,
-        "api_key": os.getenv("GOOGLE_API_KEY", "AIzaSyBiAW2GQLid0HGe9Vs_ReKwkwsSVNegNzs"),
-        "video_password": "",
-        "compress_mb": 200,
-        "busy": False,
-        "last_error": "",
-        "analysis_out": "",
-        "raw_output": "",
-        "last_error_detail": "",
-        "show_raw_on_error": False,
-        "show_analysis": False,
-    }
-    for k, v in defaults.items():
-        st.session_state.setdefault(k, v)
-_init_state()
 # ----------------------------------------------------------------------
 # Helper utilities
 # ----------------------------------------------------------------------
 def _sanitize_filename(url: str) -> str:
     name = Path(url).name.lower()
     return name.translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
 def _file_sha256(path: Path) -> Optional[str]:
     try:
         h = hashlib.sha256()
         with path.open("rb") as f:
@@ -327,6 +62,7 @@ def _file_sha256(path: Path) -> Optional[str]:
 def _convert_to_mp4(src: Path) -> Path:
     dst = src.with_suffix(".mp4")
     if dst.exists():
         return dst
@@ -343,7 +79,7 @@ def _convert_to_mp4(src: Path) -> Path:
 def _compress_video(inp: Path, crf: int = 28, preset: str = "fast") -> Path:
-    """Compress *inp* using libx264; return the compressed file."""
     out = inp.with_name(f"{inp.stem}_compressed.mp4")
     try:
         ffmpeg.input(str(inp)).output(
@@ -355,7 +91,7 @@ def _compress_video(inp: Path, crf: int = 28, preset: str = "fast") -> Path:
 def _maybe_compress(path: Path, limit_mb: int) -> Tuple[Path, bool]:
-    """Compress *path* if its size exceeds *limit_mb*."""
     size_mb = path.stat().st_size / (1024 * 1024)
     if size_mb <= limit_mb:
         return path, False
@@ -363,7 +99,7 @@ def _maybe_compress(path: Path, limit_mb: int) -> Tuple[Path, bool]:
 def _download_direct(url: str, dst: Path) -> Path:
-    """Download a raw video file via HTTP GET."""
     r = requests.get(url, stream=True, timeout=30)
     r.raise_for_status()
     out = dst / _sanitize_filename(url.split("/")[-1])
@@ -375,9 +111,8 @@ def _download_direct(url: str, dst: Path) -> Path:
 def _download_with_yt_dlp(url: str, dst: Path, password: str = "") -> Path:
-    """Download via yt‑dlp, ensuring the complete file is retrieved."""
     tmpl = str(dst / "%(id)s.%(ext)s")
-    # Prefer a full‑container MP4; fall back to the best available format.
     fmt = "best[ext=mp4]/best"
     opts = {
@@ -386,8 +121,8 @@ def _download_with_yt_dlp(url: str, dst: Path, password: str = "") -> Path:
         "quiet": True,
         "noprogress": True,
         "nocheckcertificate": True,
-        "merge_output_format": "mp4",   # force a single MP4 file
-        "fragment_retries": 0,          # avoid fragmented downloads
     }
     if password:
         opts["videopassword"] = password
@@ -412,38 +147,27 @@ def _download_with_yt_dlp(url: str, dst: Path, password: str = "") -> Path:
     try:
         with yt_dlp.YoutubeDL(opts) as ydl:
             ydl.extract_info(url, download=True)
-    except Exception as e:
-        raise RuntimeError(f"yt‑dlp could not download the URL: {e}") from e
     finally:
         progress_bar.empty()
         status_text.empty()
-    # yt‑dlp may have produced several files; pick the newest MP4
     mp4_files = list(dst.glob("*.mp4"))
     if not mp4_files:
         raise RuntimeError("No MP4 file was created.")
     newest = max(mp4_files, key=lambda p: p.stat().st_mtime)
-    # Optional cache: if a file with the same SHA‑256 already exists, reuse it
     sha = _file_sha256(newest)
     if sha:
         for existing in dst.iterdir():
             if existing != newest and _file_sha256(existing) == sha:
-                newest.unlink()          # remove duplicate
                 return existing
     return newest
 def download_video(url: str, dst: Path, password: str = "") -> Path:
-    """
-    Download a video from *url* and return an MP4 path.
-    Strategy
-    ---------
-    1. Direct video URL → HTTP GET.
-    2. Twitter status → scrape for embedded video URLs.
-    3. yt‑dlp fallback for everything else.
-    """
     video_exts = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
     if url.lower().endswith(video_exts):
@@ -460,22 +184,16 @@ def download_video(url: str, dst: Path, password: str = "") -> Path:
                     return download_video(u.expandedUrl, dst)
         raise RuntimeError("No video found in the tweet.")
-    # Fallback to yt‑dlp for any other URL
     return _download_with_yt_dlp(url, dst, password)
 def _encode_video_b64(path: Path) -> str:
-    """Read *path* and return a base64‑encoded string."""
     return base64.b64encode(path.read_bytes()).decode()
-def generate_report(
-    video_path: Path,
-    prompt: str,
-    model_id: str,
-    timeout: int = 300,
-) -> str:
-    """Send video + prompt to Gemini and return the text response."""
     b64 = _encode_video_b64(video_path)
     video_part = {"inline_data": {"mime_type": "video/mp4", "data": b64}}
     model = genai.GenerativeModel(model_name=model_id)
@@ -489,13 +207,11 @@ def generate_report(
 def _strip_prompt_echo(prompt: str, text: str, threshold: float = 0.68) -> str:
-    """Remove the prompt if the model repeats it at the start of *text*."""
     if not prompt or not text:
         return text
     clean_prompt = " ".join(prompt.lower().split())
     snippet = " ".join(text.lower().split()[:600])
     if SequenceMatcher(None, clean_prompt, snippet).ratio() > threshold:
         cut = max(len(clean_prompt), int(len(prompt) * 0.9))
         return text[cut:].lstrip(" \n:-")
@@ -506,7 +222,7 @@ def _strip_prompt_echo(prompt: str, text: str, threshold: float = 0.68) -> str:
 # UI helpers
 # ----------------------------------------------------------------------
 def _expand_sidebar(width: int = 380) -> None:
-    """Inject CSS to make the sidebar wider."""
     st.markdown(
         f"""
         <style>
@@ -519,72 +235,41 @@ def _expand_sidebar(width: int = 380) -> None:
         unsafe_allow_html=True,
     )
 # ----------------------------------------------------------------------
-# Streamlit UI
 # ----------------------------------------------------------------------
-def main() -> None:
-    st.set_page_config(page_title="Video Analysis", layout="wide")
-    _expand_sidebar()
-    # ---------- Sidebar ----------
-    st.sidebar.header("Video Input")
-    st.sidebar.text_input("Video URL", key="url", placeholder="https://")
-    if st.sidebar.button("Load Video"):
-        try:
-            with st.spinner("Downloading video…"):
-                raw_path = download_video(
-                    st.session_state["url"], DATA_DIR, st.session_state["video_password"]
-                )
-                mp4_path = _convert_to_mp4(Path(raw_path))
-            st.session_state["video_path"] = str(mp4_path)
-            st.session_state["last_error"] = ""
-            st.toast("Video ready")
-            st.experimental_rerun()
-        except Exception as e:
-            st.session_state["last_error"] = f"Download failed: {e}"
-            st.sidebar.error(st.session_state["last_error"])
-    # ---------- Settings ----------
-    with st.sidebar.expander("Settings", expanded=False):
-        model = st.selectbox(
-            "Model", MODEL_OPTIONS, index=MODEL_OPTIONS.index(DEFAULT_MODEL)
-        )
-        if model == "custom":
-            model = st.text_input("Custom model ID", value=DEFAULT_MODEL, key="custom_model")
-        st.session_state["model_input"] = model
-        # API key handling
-        secret_key = os.getenv("GOOGLE_API_KEY", "")
-        if secret_key:
-            st.session_state["api_key"] = secret_key
-        st.text_input("Google API Key", key="api_key", type="password")
-        st.text_area(
-            "Analysis prompt",
-            value=DEFAULT_PROMPT,
-            key="prompt",
-            height=140,
-        )
-        st.text_input(
-            "Video password (if needed)",
-            key="video_password",
-            type="password",
-        )
-        st.number_input(
-            "Compress if > (MB)",
-            min_value=10,
-            max_value=2000,
-            value=st.session_state.get("compress_mb", 200),
-            step=10,
-            key="compress_mb",
-        )
-        if st.sidebar.button("Clear Video"):
-            for f in DATA_DIR.iterdir():
-                try:
-                    f.unlink()
-                except Exception:
-                    pass
-            st

 # -*- coding: utf-8 -*-
 """
+Video‑analysis Streamlit app (refactored & fixed).
 """
 # ----------------------------------------------------------------------
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
 DEFAULT_MODEL = "gemini-2.0-flash-lite"
 DEFAULT_PROMPT = (
     "Watch the video and provide a detailed behavioral report focusing on human actions, "
     "interactions, posture, movement, and apparent intent. Keep language professional. "
     "Include a list of observations for notable events."
 )
+MODEL_OPTIONS = [
+    "gemini-1.5-pro",
+    "gemini-1.5-flash",
+    "gemini-2.0-flash-lite",
+    "custom",
+]
 # ----------------------------------------------------------------------
 # Helper utilities
 # ----------------------------------------------------------------------
 def _sanitize_filename(url: str) -> str:
+    """Make a safe filename from a URL."""
     name = Path(url).name.lower()
     return name.translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
 def _file_sha256(path: Path) -> Optional[str]:
+    """Return SHA‑256 hex digest of *path* or None on error."""
     try:
         h = hashlib.sha256()
         with path.open("rb") as f:
 def _convert_to_mp4(src: Path) -> Path:
+    """Convert *src* to MP4 (ffmpeg) and delete the original."""
     dst = src.with_suffix(".mp4")
     if dst.exists():
         return dst
 def _compress_video(inp: Path, crf: int = 28, preset: str = "fast") -> Path:
+    """Compress *inp* with libx264; return the new file."""
     out = inp.with_name(f"{inp.stem}_compressed.mp4")
     try:
         ffmpeg.input(str(inp)).output(
 def _maybe_compress(path: Path, limit_mb: int) -> Tuple[Path, bool]:
+    """Compress *path* if it exceeds *limit_mb*."""
     size_mb = path.stat().st_size / (1024 * 1024)
     if size_mb <= limit_mb:
         return path, False
 def _download_direct(url: str, dst: Path) -> Path:
+    """Simple HTTP GET download."""
     r = requests.get(url, stream=True, timeout=30)
     r.raise_for_status()
     out = dst / _sanitize_filename(url.split("/")[-1])
 def _download_with_yt_dlp(url: str, dst: Path, password: str = "") -> Path:
+    """Download via yt‑dlp, returning the newest MP4."""
     tmpl = str(dst / "%(id)s.%(ext)s")
     fmt = "best[ext=mp4]/best"
     opts = {
         "quiet": True,
         "noprogress": True,
         "nocheckcertificate": True,
+        "merge_output_format": "mp4",
+        "fragment_retries": 0,
     }
     if password:
         opts["videopassword"] = password
     try:
         with yt_dlp.YoutubeDL(opts) as ydl:
             ydl.extract_info(url, download=True)
     finally:
         progress_bar.empty()
         status_text.empty()
     mp4_files = list(dst.glob("*.mp4"))
     if not mp4_files:
         raise RuntimeError("No MP4 file was created.")
     newest = max(mp4_files, key=lambda p: p.stat().st_mtime)
+    # Deduplicate via SHA‑256 cache
     sha = _file_sha256(newest)
     if sha:
         for existing in dst.iterdir():
             if existing != newest and _file_sha256(existing) == sha:
+                newest.unlink()
                 return existing
     return newest
 def download_video(url: str, dst: Path, password: str = "") -> Path:
+    """Unified download entry point."""
     video_exts = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
     if url.lower().endswith(video_exts):
                     return download_video(u.expandedUrl, dst)
         raise RuntimeError("No video found in the tweet.")
     return _download_with_yt_dlp(url, dst, password)
 def _encode_video_b64(path: Path) -> str:
+    """Base64‑encode a file."""
     return base64.b64encode(path.read_bytes()).decode()
+def generate_report(video_path: Path, prompt: str, model_id: str, timeout: int = 300) -> str:
+    """Send video + prompt to Gemini and return the response text."""
     b64 = _encode_video_b64(video_path)
     video_part = {"inline_data": {"mime_type": "video/mp4", "data": b64}}
     model = genai.GenerativeModel(model_name=model_id)
 def _strip_prompt_echo(prompt: str, text: str, threshold: float = 0.68) -> str:
+    """Trim the prompt if the model repeats it at the start."""
     if not prompt or not text:
         return text
     clean_prompt = " ".join(prompt.lower().split())
     snippet = " ".join(text.lower().split()[:600])
     if SequenceMatcher(None, clean_prompt, snippet).ratio() > threshold:
         cut = max(len(clean_prompt), int(len(prompt) * 0.9))
         return text[cut:].lstrip(" \n:-")
 # UI helpers
 # ----------------------------------------------------------------------
 def _expand_sidebar(width: int = 380) -> None:
+    """Make the Streamlit sidebar wider."""
     st.markdown(
         f"""
         <style>
         unsafe_allow_html=True,
     )
+# ----------------------------------------------------------------------
+# Session‑state defaults
+# ----------------------------------------------------------------------
+def _init_state() -> None:
+    """Populate Streamlit's session_state with sensible defaults."""
+    defaults = {
+        "url": "",
+        "video_path": "",
+        "model_input": DEFAULT_MODEL,
+        "prompt": DEFAULT_PROMPT,
+        "api_key": os.getenv("GOOGLE_API_KEY", "AIzaSyBiAW2GQLid0HGe9Vs_ReKwkwsSVNegNzs"),
+        "video_password": "",
+        "compress_mb": 200,
+        "busy": False,
+        "last_error": "",
+        "analysis_out": "",
+        "raw_output": "",
+        "last_error_detail": "",
+        "show_raw_on_error": False,
+        "show_analysis": False,
+    }
+    for k, v in defaults.items():
+        st.session_state.setdefault(k, v)
 # ----------------------------------------------------------------------
+# Main entry point
 # ----------------------------------------------------------------------
+if __name__ == "__main__":
+    # Initialise session state before any UI code runs
+    _init_state()
+    # Initialise the Gemini API – the key can be supplied via the sidebar or env var
+    if st.session_state["api_key"]:
+        genai.configure(api_key=st.session_state["api_key"])
+    # Run the Streamlit app
+    main()