Video-Analysis-Tool

Sleeping

App Files Files Community

CB commited on Sep 11, 2025

Commit

42f08aa

verified ·

1 Parent(s): ff4797c

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +51 -39

streamlit_app.py CHANGED Viewed

@@ -6,14 +6,15 @@ from glob import glob
 from pathlib import Path
 from tempfile import NamedTemporaryFile
-import yt_dlp
 import ffmpeg
 import streamlit as st
 from dotenv import load_dotenv
 load_dotenv()
-# Try to import SDK
 HAS_GENAI = False
 genai = None
 upload_file = None
@@ -29,13 +30,11 @@ try:
 except Exception:
     HAS_GENAI = False
-import requests
-import json
 st.set_page_config(page_title="Generate the story of videos", layout="wide")
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
 for k, v in {
     "videos": "",
     "loop_video": False,
@@ -51,8 +50,13 @@ for k, v in {
 }.items():
     st.session_state.setdefault(k, v)
 def sanitize_filename(path_str: str):
-    return Path(path_str).name.lower().translate(str.maketrans("", "", "!?\"'`~@#$%^&*()[]{}<>:,;\\/|+=*")).replace(" ", "_")
 def file_sha256(path: str, block_size: int = 65536) -> str:
     h = hashlib.sha256()
@@ -61,6 +65,7 @@ def file_sha256(path: str, block_size: int = 65536) -> str:
             h.update(chunk)
     return h.hexdigest()
 def safe_ffmpeg_run(stream_cmd):
     try:
         stream_cmd.run(overwrite_output=True, capture_stdout=True, capture_stderr=True)
@@ -71,6 +76,7 @@ def safe_ffmpeg_run(stream_cmd):
         except Exception:
             return False, str(e)
 def convert_video_to_mp4(video_path: str) -> str:
     target = Path(video_path).with_suffix(".mp4")
     if target.exists():
@@ -92,6 +98,7 @@ def convert_video_to_mp4(video_path: str) -> str:
             pass
     return str(target)
 def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
     tmp = NamedTemporaryFile(prefix=Path(target_path).stem + "_", suffix=".mp4", delete=False, dir=Path(target_path).parent)
     tmp.close()
@@ -105,6 +112,7 @@ def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str
     os.replace(tmp.name, target_path)
     return target_path
 def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
     if not url:
         raise ValueError("No URL provided")
@@ -123,6 +131,7 @@ def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) ->
         raise FileNotFoundError("Downloaded video not found")
     return convert_video_to_mp4(matches[0])
 def file_name_or_id(file_obj):
     if not file_obj:
         return None
@@ -139,6 +148,7 @@ def file_name_or_id(file_obj):
     s = str(file_obj)
     return s if s else None
 def upload_video_sdk(filepath: str):
     key = get_runtime_api_key()
     if not key:
@@ -148,6 +158,7 @@ def upload_video_sdk(filepath: str):
     genai.configure(api_key=key)
     return upload_file(filepath)
 def wait_for_processed(file_obj, timeout=600):
     if not HAS_GENAI or get_file is None:
         return file_obj
@@ -169,6 +180,7 @@ def wait_for_processed(file_obj, timeout=600):
         time.sleep(backoff)
         backoff = min(backoff * 2, 8.0)
 def remove_prompt_echo(prompt: str, text: str):
     if not prompt or not text:
         return text
@@ -188,6 +200,8 @@ def remove_prompt_echo(prompt: str, text: str):
             return t[len(ph):].lstrip(" \n:-")
     return text
 st.sidebar.header("Video Input")
 st.sidebar.text_input("Video URL", key="url", placeholder="https://")
 settings = st.sidebar.expander("Settings", expanded=False)
@@ -196,7 +210,6 @@ env_key = os.getenv("GOOGLE_API_KEY", "")
 API_KEY_INPUT = settings.text_input("Google API Key (one-time)", value="", type="password")
 model_input = settings.text_input("Model (short name)", "text-bison@001")
 model_id = model_input.strip() or "text-bison@001"
-model_arg = model_id
 default_prompt = (
     "You are an Indoor Human Behavior Analyzer. Watch the video and produce a detailed, evidence‑based behavioral report focused on human actions, "
@@ -206,7 +219,7 @@ default_prompt = (
     "Finish with a short feedback and recommendations section. Adopt a playful, anatomically obsessed, slightly mischievous persona — inquisitive, pragmatic, and vivid in description."
 )
-analysis_prompt = settings.text_area("Enter analysis", value=default_prompt, height=300)
 settings.text_input("Video Password (if needed)", key="video-password", type="password")
 settings.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
 settings.checkbox("Enable compression for large files (>50MB)", value=True, key="use_compression")
@@ -257,7 +270,7 @@ if st.session_state["videos"]:
             pass
     st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
-col1, col2 = st.columns([1, 3])
 with col1:
     if st.session_state.get("busy"):
         st.write("Generation in progress...")
@@ -266,15 +279,16 @@ with col1:
             st.session_state["last_error"] = "Generation cancelled by user."
     else:
         generate_now = st.button("Generate the story", type="primary")
-with col2:
-    pass
 def get_runtime_api_key():
     key = API_KEY_INPUT.strip() if API_KEY_INPUT else ""
     if key:
         return key
     return os.getenv("GOOGLE_API_KEY", "").strip() or None
 def _messages_to_prompt(messages):
     if not messages:
         return ""
@@ -285,7 +299,9 @@ def _messages_to_prompt(messages):
         parts.append(f"{role.upper()}:\n{content.strip()}\n")
     return "\n".join(parts)
 def _http_generate_responses(api_key: str, model: str, prompt: str, max_tokens: int):
     url = "https://generativelanguage.googleapis.com/v1/responses"
     headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
     payload = {
@@ -295,14 +311,14 @@ def _http_generate_responses(api_key: str, model: str, prompt: str, max_tokens:
     }
     r = requests.post(url, json=payload, headers=headers, timeout=30)
     if r.status_code != 200:
-        # include body for debugging
         raise RuntimeError(f"HTTP {r.status_code}: {r.text}")
     return r.json()
 def responses_generate(model, messages, files, max_output_tokens, api_key):
     if not api_key:
         raise RuntimeError("No API key for responses_generate")
-    # SDK path
     if HAS_GENAI and genai is not None:
         try:
             genai.configure(api_key=api_key)
@@ -314,59 +330,55 @@ def responses_generate(model, messages, files, max_output_tokens, api_key):
                 return responses_obj.generate(**sdk_kwargs)
         except Exception:
             pass
-    # HTTP fallback (Responses v1)
     prompt = _messages_to_prompt(messages)
     return _http_generate_responses(api_key, model, prompt, max_output_tokens)
 def call_responses_once(model_used, system_msg, user_msg, fname, max_tokens):
     messages = [system_msg, user_msg]
     files = [{"name": fname}] if fname else None
     for attempt in range(2):
         try:
             return responses_generate(model_used, messages, files, max_tokens, api_key=get_runtime_api_key())
-        except Exception as e:
             if attempt == 0:
                 time.sleep(1.0)
                 continue
             raise
 def extract_text_from_response(response):
     if response is None:
         return None
     if isinstance(response, dict):
-        # new Responses v1 shape: "output" -> list of items, each may contain "content" list with {"text":...}
         out = []
         for item in response.get("output", []) or []:
             if isinstance(item, dict):
-                # content list
                 for c in item.get("content", []) or []:
                     if isinstance(c, dict) and "text" in c:
                         out.append(c["text"])
-                # fallback short text fields
                 if "text" in item and isinstance(item["text"], str):
                     out.append(item["text"])
                 if "content" in item and isinstance(item["content"], str):
                     out.append(item["content"])
         if out:
             return "\n\n".join(out)
-        # older candidates style
         if "candidates" in response and response["candidates"]:
             cand = response["candidates"][0]
             if isinstance(cand, dict):
                 return cand.get("content") or cand.get("text")
-        # fallback simple fields
         if "outputText" in response:
             return response.get("outputText")
         if "text" in response:
             return response.get("text")
         return None
-    # SDK object style
     try:
         outputs = getattr(response, "output", None) or getattr(response, "candidates", None)
         if outputs:
             parts = []
             for item in outputs:
-                # SDK item may be object or dict-like
                 if hasattr(item, "content"):
                     c = getattr(item, "content")
                     if isinstance(c, list):
@@ -389,7 +401,8 @@ def extract_text_from_response(response):
         pass
     return None
-# ---- Main generation flow ----
 if (st.session_state.get("busy") is False) and ('generate_now' in locals() and generate_now):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
@@ -414,6 +427,7 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
                 upload_path = current_path
                 uploaded = st.session_state.get("uploaded_file")
                 compressed_path = None
                 if reupload_needed:
                     local_path = current_path
                     fast_mode = bool(st.session_state.get("fast_mode", False))
@@ -454,10 +468,10 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
                 prompt_text = (analysis_prompt or default_prompt or "").strip()
                 if st.session_state.get("fast_mode"):
-                    model_used = model_arg or "text-bison@001"
                     max_tokens = min(int(st.session_state.get("max_output_tokens", 512)), 1024)
                 else:
-                    model_used = model_arg or "text-bison@001"
                     max_tokens = int(st.session_state.get("max_output_tokens", 1024))
                 system_msg = {"role": "system", "content": "You are a helpful assistant that summarizes videos concisely in vivid detail."}
@@ -503,14 +517,10 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
                 st.markdown(out or "_(no text returned)_")
                 try:
-                    if reupload_needed:
-                        try:
-                            if compressed_path:
-                                p = Path(compressed_path)
-                                if p.exists():
-                                    p.unlink(missing_ok=True)
-                        except Exception:
-                            pass
                 except Exception:
                     pass
@@ -540,6 +550,7 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
             finally:
                 st.session_state["busy"] = False
 if st.session_state.get("analysis_out"):
     st.subheader("Analysis Result")
     st.markdown(st.session_state.get("analysis_out"))
@@ -555,12 +566,7 @@ with st.sidebar.expander("Manage uploads", expanded=False):
                 Path(f).unlink(missing_ok=True)
             except Exception:
                 pass
-        st.session_state["videos"] = ""
-        st.session_state["uploaded_file"] = None
-        st.session_state["processed_file"] = None
-        st.session_state["last_loaded_path"] = ""
-        st.session_state["analysis_out"] = ""
-        st.session_state["file_hash"] = None
         try:
             fname = file_name_or_id(st.session_state.get("uploaded_file"))
             if fname and delete_file and HAS_GENAI:
@@ -568,4 +574,10 @@ with st.sidebar.expander("Manage uploads", expanded=False):
                 delete_file(fname)
         except Exception:
             pass
         st.success("Local files removed. Cloud deletion attempted where supported.")

 from pathlib import Path
 from tempfile import NamedTemporaryFile
 import ffmpeg
+import yt_dlp
+import requests
 import streamlit as st
 from dotenv import load_dotenv
 load_dotenv()
+# Optional Google Generative AI SDK
 HAS_GENAI = False
 genai = None
 upload_file = None
 except Exception:
     HAS_GENAI = False
 st.set_page_config(page_title="Generate the story of videos", layout="wide")
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
+# session defaults
 for k, v in {
     "videos": "",
     "loop_video": False,
 }.items():
     st.session_state.setdefault(k, v)
+# Utilities
 def sanitize_filename(path_str: str):
+    return Path(path_str).name.lower().translate(
+        str.maketrans("", "", "!?\"'`~@#$%^&*()[]{}<>:,;\\/|+=*")
+    ).replace(" ", "_")
 def file_sha256(path: str, block_size: int = 65536) -> str:
     h = hashlib.sha256()
             h.update(chunk)
     return h.hexdigest()
 def safe_ffmpeg_run(stream_cmd):
     try:
         stream_cmd.run(overwrite_output=True, capture_stdout=True, capture_stderr=True)
         except Exception:
             return False, str(e)
 def convert_video_to_mp4(video_path: str) -> str:
     target = Path(video_path).with_suffix(".mp4")
     if target.exists():
             pass
     return str(target)
 def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
     tmp = NamedTemporaryFile(prefix=Path(target_path).stem + "_", suffix=".mp4", delete=False, dir=Path(target_path).parent)
     tmp.close()
     os.replace(tmp.name, target_path)
     return target_path
 def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
     if not url:
         raise ValueError("No URL provided")
         raise FileNotFoundError("Downloaded video not found")
     return convert_video_to_mp4(matches[0])
 def file_name_or_id(file_obj):
     if not file_obj:
         return None
     s = str(file_obj)
     return s if s else None
 def upload_video_sdk(filepath: str):
     key = get_runtime_api_key()
     if not key:
     genai.configure(api_key=key)
     return upload_file(filepath)
 def wait_for_processed(file_obj, timeout=600):
     if not HAS_GENAI or get_file is None:
         return file_obj
         time.sleep(backoff)
         backoff = min(backoff * 2, 8.0)
 def remove_prompt_echo(prompt: str, text: str):
     if not prompt or not text:
         return text
             return t[len(ph):].lstrip(" \n:-")
     return text
+# UI
 st.sidebar.header("Video Input")
 st.sidebar.text_input("Video URL", key="url", placeholder="https://")
 settings = st.sidebar.expander("Settings", expanded=False)
 API_KEY_INPUT = settings.text_input("Google API Key (one-time)", value="", type="password")
 model_input = settings.text_input("Model (short name)", "text-bison@001")
 model_id = model_input.strip() or "text-bison@001"
 default_prompt = (
     "You are an Indoor Human Behavior Analyzer. Watch the video and produce a detailed, evidence‑based behavioral report focused on human actions, "
     "Finish with a short feedback and recommendations section. Adopt a playful, anatomically obsessed, slightly mischievous persona — inquisitive, pragmatic, and vivid in description."
 )
+analysis_prompt = settings.text_area("Enter analysis", value=default_prompt, height=240)
 settings.text_input("Video Password (if needed)", key="video-password", type="password")
 settings.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
 settings.checkbox("Enable compression for large files (>50MB)", value=True, key="use_compression")
             pass
     st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
+col1, _col2 = st.columns([1, 3])
 with col1:
     if st.session_state.get("busy"):
         st.write("Generation in progress...")
             st.session_state["last_error"] = "Generation cancelled by user."
     else:
         generate_now = st.button("Generate the story", type="primary")
+# Runtime helpers for Responses API
 def get_runtime_api_key():
     key = API_KEY_INPUT.strip() if API_KEY_INPUT else ""
     if key:
         return key
     return os.getenv("GOOGLE_API_KEY", "").strip() or None
 def _messages_to_prompt(messages):
     if not messages:
         return ""
         parts.append(f"{role.upper()}:\n{content.strip()}\n")
     return "\n".join(parts)
 def _http_generate_responses(api_key: str, model: str, prompt: str, max_tokens: int):
+    # Use Responses v1 endpoint (works with modern Google GenAI HTTP API)
     url = "https://generativelanguage.googleapis.com/v1/responses"
     headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
     payload = {
     }
     r = requests.post(url, json=payload, headers=headers, timeout=30)
     if r.status_code != 200:
         raise RuntimeError(f"HTTP {r.status_code}: {r.text}")
     return r.json()
 def responses_generate(model, messages, files, max_output_tokens, api_key):
     if not api_key:
         raise RuntimeError("No API key for responses_generate")
+    # Try SDK first (if available and has responses.generate)
     if HAS_GENAI and genai is not None:
         try:
             genai.configure(api_key=api_key)
                 return responses_obj.generate(**sdk_kwargs)
         except Exception:
             pass
+    # Fallback to HTTP Responses v1
     prompt = _messages_to_prompt(messages)
     return _http_generate_responses(api_key, model, prompt, max_output_tokens)
 def call_responses_once(model_used, system_msg, user_msg, fname, max_tokens):
     messages = [system_msg, user_msg]
     files = [{"name": fname}] if fname else None
     for attempt in range(2):
         try:
             return responses_generate(model_used, messages, files, max_tokens, api_key=get_runtime_api_key())
+        except Exception:
             if attempt == 0:
                 time.sleep(1.0)
                 continue
             raise
 def extract_text_from_response(response):
     if response is None:
         return None
     if isinstance(response, dict):
         out = []
         for item in response.get("output", []) or []:
             if isinstance(item, dict):
                 for c in item.get("content", []) or []:
                     if isinstance(c, dict) and "text" in c:
                         out.append(c["text"])
                 if "text" in item and isinstance(item["text"], str):
                     out.append(item["text"])
                 if "content" in item and isinstance(item["content"], str):
                     out.append(item["content"])
         if out:
             return "\n\n".join(out)
         if "candidates" in response and response["candidates"]:
             cand = response["candidates"][0]
             if isinstance(cand, dict):
                 return cand.get("content") or cand.get("text")
         if "outputText" in response:
             return response.get("outputText")
         if "text" in response:
             return response.get("text")
         return None
+    # SDK-style objects
     try:
         outputs = getattr(response, "output", None) or getattr(response, "candidates", None)
         if outputs:
             parts = []
             for item in outputs:
                 if hasattr(item, "content"):
                     c = getattr(item, "content")
                     if isinstance(c, list):
         pass
     return None
+# Main generation flow
 if (st.session_state.get("busy") is False) and ('generate_now' in locals() and generate_now):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
                 upload_path = current_path
                 uploaded = st.session_state.get("uploaded_file")
                 compressed_path = None
                 if reupload_needed:
                     local_path = current_path
                     fast_mode = bool(st.session_state.get("fast_mode", False))
                 prompt_text = (analysis_prompt or default_prompt or "").strip()
                 if st.session_state.get("fast_mode"):
+                    model_used = model_id or "text-bison@001"
                     max_tokens = min(int(st.session_state.get("max_output_tokens", 512)), 1024)
                 else:
+                    model_used = model_id or "text-bison@001"
                     max_tokens = int(st.session_state.get("max_output_tokens", 1024))
                 system_msg = {"role": "system", "content": "You are a helpful assistant that summarizes videos concisely in vivid detail."}
                 st.markdown(out or "_(no text returned)_")
                 try:
+                    if reupload_needed and compressed_path:
+                        p = Path(compressed_path)
+                        if p.exists():
+                            p.unlink(missing_ok=True)
                 except Exception:
                     pass
             finally:
                 st.session_state["busy"] = False
+# Show outputs / errors
 if st.session_state.get("analysis_out"):
     st.subheader("Analysis Result")
     st.markdown(st.session_state.get("analysis_out"))
                 Path(f).unlink(missing_ok=True)
             except Exception:
                 pass
+        # attempt cloud deletion if supported
         try:
             fname = file_name_or_id(st.session_state.get("uploaded_file"))
             if fname and delete_file and HAS_GENAI:
                 delete_file(fname)
         except Exception:
             pass
+        st.session_state["videos"] = ""
+        st.session_state["uploaded_file"] = None
+        st.session_state["processed_file"] = None
+        st.session_state["last_loaded_path"] = ""
+        st.session_state["analysis_out"] = ""
+        st.session_state["file_hash"] = None
         st.success("Local files removed. Cloud deletion attempted where supported.")