Video-Analysis-Tool

Sleeping

App Files Files Community

CB commited on Sep 11, 2025

Commit

4633b20

verified ·

1 Parent(s): ef36655

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +11 -41

streamlit_app.py CHANGED Viewed

@@ -1,11 +1,10 @@
 # streamlit_app.py
 import os
 import time
-import json
 import string
 from glob import glob
 from pathlib import Path
-import hashlib
 from difflib import SequenceMatcher
 import yt_dlp
@@ -37,7 +36,6 @@ st.set_page_config(page_title="Generate the story of videos", layout="wide")
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
-# Session state defaults
 st.session_state.setdefault("videos", "")
 st.session_state.setdefault("loop_video", False)
 st.session_state.setdefault("uploaded_file", None)
@@ -49,7 +47,6 @@ st.session_state.setdefault("last_error", "")
 st.session_state.setdefault("file_hash", None)
 st.session_state.setdefault("fast_mode", False)
-# Helpers
 def sanitize_filename(path_str: str):
     name = Path(path_str).name
     return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
@@ -104,14 +101,12 @@ def file_name_or_id(file_obj):
         return file_obj.get("name") or file_obj.get("id")
     return getattr(file_obj, "name", None) or getattr(file_obj, "id", None) or getattr(file_obj, "fileId", None)
-# Configure Google SDK if key present
 if os.getenv("GOOGLE_API_KEY") and HAS_GENAI:
     try:
         genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
     except Exception:
         pass
-# UI: Sidebar inputs
 st.sidebar.header("Video Input")
 st.sidebar.text_input("Video URL", key="url", placeholder="https://")
@@ -121,7 +116,12 @@ API_KEY = settings_exp.text_input("Google API Key", value=env_api_key, placehold
 model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite")
 model_id = model_input.strip() or "gemini-2.0-flash-lite"
 model_arg = model_id if not model_id.startswith("models/") else model_id.split("/", 1)[1]
-analysis_prompt = settings_exp.text_area("Enter analysis", value="watch entire video and describe", height=120)
 settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
 settings_exp.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
@@ -135,7 +135,6 @@ safety_settings = [
     {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
 ]
-# Build Agent if available
 _agent = None
 if HAS_PHI and HAS_GENAI and (API_KEY or os.getenv("GOOGLE_API_KEY")):
     try:
@@ -159,7 +158,6 @@ def clear_all_video_state():
         except Exception:
             pass
-# Track URL changes
 if "last_url_value" not in st.session_state:
     st.session_state["last_url_value"] = st.session_state.get("url", "")
 current_url = st.session_state.get("url", "")
@@ -167,7 +165,6 @@ if current_url != st.session_state.get("last_url_value"):
     clear_all_video_state()
     st.session_state["last_url_value"] = current_url
-# Load video button
 if st.sidebar.button("Load Video", use_container_width=True):
     try:
         vpw = st.session_state.get("video-password", "")
@@ -180,7 +177,6 @@ if st.sidebar.button("Load Video", use_container_width=True):
     except Exception as e:
         st.sidebar.error(f"Failed to load video: {e}")
-# Sidebar preview & options
 if st.session_state["videos"]:
     try:
         st.sidebar.video(st.session_state["videos"], loop=st.session_state.get("loop_video", False))
@@ -202,7 +198,6 @@ if st.session_state["videos"]:
     st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
-# Upload helpers
 def upload_video_sdk(filepath: str):
     key = API_KEY if API_KEY else os.getenv("GOOGLE_API_KEY")
     if not key:
@@ -230,7 +225,6 @@ def wait_for_processed(file_obj, timeout=180):
         time.sleep(backoff)
         backoff = min(backoff * 2, 8.0)
-# Robust prompt-echo removal
 def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_threshold: float = 0.68):
     if not prompt or not text:
         return text
@@ -239,13 +233,10 @@ def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_thres
     b = " ".join(b_full[:check_len].lower().split())
     ratio = SequenceMatcher(None, a, b).ratio()
     if ratio >= ratio_threshold:
-        # remove the approximate prefix by length of prompt, but be conservative
         cut = min(len(b_full), max(int(len(prompt) * 0.9), len(a)))
         new_text = b_full[cut:].lstrip(" \n:-")
-        # If result is empty or too small, return original to avoid data loss
         if len(new_text) >= 3:
             return new_text
-    # also remove common placeholder prefixes
     placeholders = ["enter analysis", "enter your analysis", "enter analysis here", "please enter analysis"]
     low = b_full.strip().lower()
     for ph in placeholders:
@@ -253,18 +244,13 @@ def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_thres
             return b_full[len(ph):].lstrip(" \n:-")
     return text
-# Main UI layout
 col1, col2 = st.columns([1, 3])
 with col1:
-    if st.session_state.get("busy"):
-        st.button("Generate the story", disabled=True)
-    else:
-        generate_now = st.button("Generate the story", type="primary")
 with col2:
     pass
-# Generation flow
-if (st.session_state.get("busy") is False) and ('generate_now' in locals() and generate_now):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
     else:
@@ -275,9 +261,7 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
             try:
                 st.session_state["busy"] = True
                 processed = st.session_state.get("processed_file")
-                # Use file hash to determine if we must re-upload
                 current_path = st.session_state.get("videos")
-                current_hash = None
                 try:
                     current_hash = file_sha256(current_path) if current_path and os.path.exists(current_path) else None
                 except Exception:
@@ -291,7 +275,6 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
                     if not HAS_GENAI:
                         raise RuntimeError("google.generativeai SDK not available; install it.")
                     local_path = current_path
-                    # Fast mode overrides compression behavior
                     fast_mode = st.session_state.get("fast_mode", False)
                     upload_path = local_path
                     try:
@@ -299,11 +282,9 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
                     except Exception:
                         file_size_mb = 0
-                    # Only compress if large and not in fast mode
                     if not fast_mode and file_size_mb > 50:
                         compressed_path = str(Path(local_path).with_name(Path(local_path).stem + "_compressed.mp4"))
                         try:
-                            # Use faster preset when focusing on speed
                             preset = "veryfast" if fast_mode else "fast"
                             upload_path = compress_video(local_path, compressed_path, crf=28, preset=preset)
                         except Exception:
@@ -317,10 +298,9 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
                         st.session_state["last_loaded_path"] = current_path
                         st.session_state["file_hash"] = current_hash
-                prompt_text = (analysis_prompt.strip() or "Describe this video in vivid detail.").strip()
                 out = ""
-                # Use lighter model/tokens in fast mode
                 if st.session_state.get("fast_mode"):
                     model_used = model_arg if model_arg else "gemini-2.0-flash-lite"
                     max_tokens = 512
@@ -370,13 +350,10 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
                                 txt = getattr(c, "text", None) or (c.get("text") if isinstance(c, dict) else None)
                                 if txt:
                                     text_pieces.append(txt)
                     if not text_pieces:
                         top_text = getattr(response, "text", None) or (response.get("text") if isinstance(response, dict) else None)
                         if top_text:
                             text_pieces.append(top_text)
-                    # dedupe preserving order
                     seen = set()
                     filtered = []
                     for t in text_pieces:
@@ -385,23 +362,17 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
                             seen.add(t)
                     out = "\n\n".join(filtered)
-                # Remove prompt echo robustly
                 if out:
                     out = remove_prompt_echo(prompt_text, out)
-                    # fallback: trim if startswith prompt exactly (legacy)
                     p = prompt_text
                     if p and out.strip().lower().startswith(p.lower()):
                         out = out.strip()[len(p):].lstrip(" \n:-")
-                    # strip placeholders
                     placeholders = ["enter analysis", "enter your analysis", "enter analysis here", "please enter analysis"]
                     low = out.strip().lower()
                     for ph in placeholders:
                         if low.startswith(ph):
                             out = out.strip()[len(ph):].lstrip(" \n:-")
                             break
                     out = out.strip()
                 st.session_state["analysis_out"] = out
@@ -414,7 +385,6 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
             finally:
                 st.session_state["busy"] = False
-# Display cached analysis if available (avoid duplicate on same run)
 if st.session_state.get("analysis_out"):
     just_loaded_same = (st.session_state.get("last_loaded_path") == st.session_state.get("videos"))
     if not just_loaded_same:
@@ -423,4 +393,4 @@ if st.session_state.get("analysis_out"):
 if st.session_state.get("last_error"):
     with st.expander("Last Error", expanded=False):
-        st.write(st.session_state.get("last_error"))

 # streamlit_app.py
 import os
 import time
 import string
+import hashlib
 from glob import glob
 from pathlib import Path
 from difflib import SequenceMatcher
 import yt_dlp
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
 st.session_state.setdefault("videos", "")
 st.session_state.setdefault("loop_video", False)
 st.session_state.setdefault("uploaded_file", None)
 st.session_state.setdefault("file_hash", None)
 st.session_state.setdefault("fast_mode", False)
 def sanitize_filename(path_str: str):
     name = Path(path_str).name
     return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
         return file_obj.get("name") or file_obj.get("id")
     return getattr(file_obj, "name", None) or getattr(file_obj, "id", None) or getattr(file_obj, "fileId", None)
 if os.getenv("GOOGLE_API_KEY") and HAS_GENAI:
     try:
         genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
     except Exception:
         pass
 st.sidebar.header("Video Input")
 st.sidebar.text_input("Video URL", key="url", placeholder="https://")
 model_input = settings_exp.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite")
 model_id = model_input.strip() or "gemini-2.0-flash-lite"
 model_arg = model_id if not model_id.startswith("models/") else model_id.split("/", 1)[1]
+default_prompt = (
+    "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. "
+    "Use vivid, anatomically rich descriptions with numeric estimates for measurements. Include a list of detailed anatomical observations and measurements. "
+    "Adopt a playful, inquisitive persona and ensure the report is engaging and informative."
+)
+analysis_prompt = settings_exp.text_area("Enter analysis", value=default_prompt, height=140)
 settings_exp.text_input("Video Password (if needed)", key="video-password", placeholder="password", type="password")
 settings_exp.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
     {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
 ]
 _agent = None
 if HAS_PHI and HAS_GENAI and (API_KEY or os.getenv("GOOGLE_API_KEY")):
     try:
         except Exception:
             pass
 if "last_url_value" not in st.session_state:
     st.session_state["last_url_value"] = st.session_state.get("url", "")
 current_url = st.session_state.get("url", "")
     clear_all_video_state()
     st.session_state["last_url_value"] = current_url
 if st.sidebar.button("Load Video", use_container_width=True):
     try:
         vpw = st.session_state.get("video-password", "")
     except Exception as e:
         st.sidebar.error(f"Failed to load video: {e}")
 if st.session_state["videos"]:
     try:
         st.sidebar.video(st.session_state["videos"], loop=st.session_state.get("loop_video", False))
     st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
 def upload_video_sdk(filepath: str):
     key = API_KEY if API_KEY else os.getenv("GOOGLE_API_KEY")
     if not key:
         time.sleep(backoff)
         backoff = min(backoff * 2, 8.0)
 def remove_prompt_echo(prompt: str, text: str, check_len: int = 600, ratio_threshold: float = 0.68):
     if not prompt or not text:
         return text
     b = " ".join(b_full[:check_len].lower().split())
     ratio = SequenceMatcher(None, a, b).ratio()
     if ratio >= ratio_threshold:
         cut = min(len(b_full), max(int(len(prompt) * 0.9), len(a)))
         new_text = b_full[cut:].lstrip(" \n:-")
         if len(new_text) >= 3:
             return new_text
     placeholders = ["enter analysis", "enter your analysis", "enter analysis here", "please enter analysis"]
     low = b_full.strip().lower()
     for ph in placeholders:
             return b_full[len(ph):].lstrip(" \n:-")
     return text
 col1, col2 = st.columns([1, 3])
 with col1:
+    generate_now = st.button("Generate the story", type="primary")
 with col2:
     pass
+if generate_now and not st.session_state.get("busy"):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
     else:
             try:
                 st.session_state["busy"] = True
                 processed = st.session_state.get("processed_file")
                 current_path = st.session_state.get("videos")
                 try:
                     current_hash = file_sha256(current_path) if current_path and os.path.exists(current_path) else None
                 except Exception:
                     if not HAS_GENAI:
                         raise RuntimeError("google.generativeai SDK not available; install it.")
                     local_path = current_path
                     fast_mode = st.session_state.get("fast_mode", False)
                     upload_path = local_path
                     try:
                     except Exception:
                         file_size_mb = 0
                     if not fast_mode and file_size_mb > 50:
                         compressed_path = str(Path(local_path).with_name(Path(local_path).stem + "_compressed.mp4"))
                         try:
                             preset = "veryfast" if fast_mode else "fast"
                             upload_path = compress_video(local_path, compressed_path, crf=28, preset=preset)
                         except Exception:
                         st.session_state["last_loaded_path"] = current_path
                         st.session_state["file_hash"] = current_hash
+                prompt_text = (analysis_prompt.strip() or default_prompt).strip()
                 out = ""
                 if st.session_state.get("fast_mode"):
                     model_used = model_arg if model_arg else "gemini-2.0-flash-lite"
                     max_tokens = 512
                                 txt = getattr(c, "text", None) or (c.get("text") if isinstance(c, dict) else None)
                                 if txt:
                                     text_pieces.append(txt)
                     if not text_pieces:
                         top_text = getattr(response, "text", None) or (response.get("text") if isinstance(response, dict) else None)
                         if top_text:
                             text_pieces.append(top_text)
                     seen = set()
                     filtered = []
                     for t in text_pieces:
                             seen.add(t)
                     out = "\n\n".join(filtered)
                 if out:
                     out = remove_prompt_echo(prompt_text, out)
                     p = prompt_text
                     if p and out.strip().lower().startswith(p.lower()):
                         out = out.strip()[len(p):].lstrip(" \n:-")
                     placeholders = ["enter analysis", "enter your analysis", "enter analysis here", "please enter analysis"]
                     low = out.strip().lower()
                     for ph in placeholders:
                         if low.startswith(ph):
                             out = out.strip()[len(ph):].lstrip(" \n:-")
                             break
                     out = out.strip()
                 st.session_state["analysis_out"] = out
             finally:
                 st.session_state["busy"] = False
 if st.session_state.get("analysis_out"):
     just_loaded_same = (st.session_state.get("last_loaded_path") == st.session_state.get("videos"))
     if not just_loaded_same:
 if st.session_state.get("last_error"):
     with st.expander("Last Error", expanded=False):
+        st.write(st.session_state.get("last_error"))