Video-Analysis-Tool

Sleeping

App Files Files Community

CB commited on Sep 11, 2025

Commit

8085632

verified ·

1 Parent(s): 33d86c2

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +104 -163

streamlit_app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 import time
 import hashlib
@@ -12,6 +13,25 @@ from dotenv import load_dotenv
 load_dotenv()
 st.set_page_config(page_title="Generate the story of videos", layout="wide")
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
@@ -106,60 +126,19 @@ def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) ->
 def file_name_or_id(file_obj):
     if not file_obj:
         return None
     if isinstance(file_obj, dict):
         for key in ("name", "id", "fileId", "file_id", "uri", "url"):
             val = file_obj.get(key)
             if val:
-                s = str(val)
-                if s.startswith("http://") or s.startswith("https://"):
-                    tail = s.rstrip("/").split("/")[-1]
-                    return tail if tail.startswith("files/") else f"files/{tail}"
-                if s.startswith("files/"):
-                    return s
-                if "/" not in s and 6 <= len(s) <= 128:
-                    return f"files/{s}"
-                return s
-        uri = file_obj.get("uri") or file_obj.get("url")
-        if uri:
-            tail = str(uri).rstrip("/").split("/")[-1]
-            return tail if tail.startswith("files/") else f"files/{tail}"
         return None
     for attr in ("name", "id", "fileId", "file_id", "uri", "url"):
         val = getattr(file_obj, attr, None)
         if val:
-            s = str(val)
-            if s.startswith("http://") or s.startswith("https://"):
-                tail = s.rstrip("/").split("/")[-1]
-                return tail if tail.startswith("files/") else f"files/{tail}"
-            if s.startswith("files/"):
-                return s
-            if "/" not in s and 6 <= len(s) <= 128:
-                return f"files/{s}"
-            return s
     s = str(file_obj)
-    if "http://" in s or "https://" in s:
-        tail = s.rstrip("/").split("/")[-1]
-        return tail if tail.startswith("files/") else f"files/{tail}"
-    if "files/" in s:
-        idx = s.find("files/")
-        return s[idx:] if s[idx:].startswith("files/") else f"files/{s[idx+6:]}"
-    return None
-HAS_GENAI = False
-genai = None
-upload_file = None
-get_file = None
-delete_file = None
-if os.getenv("GOOGLE_API_KEY"):
-    try:
-        import google.generativeai as genai_mod
-        genai = genai_mod
-        upload_file = getattr(genai_mod, "upload_file", None)
-        get_file = getattr(genai_mod, "get_file", None)
-        delete_file = getattr(genai_mod, "delete_file", None)
-        HAS_GENAI = True
-    except Exception:
-        HAS_GENAI = False
 def upload_video_sdk(filepath: str):
     key = get_runtime_api_key()
@@ -216,9 +195,12 @@ settings = st.sidebar.expander("Settings", expanded=False)
 env_key = os.getenv("GOOGLE_API_KEY", "")
 API_KEY_INPUT = settings.text_input("Google API Key (one-time)", value="", type="password")
-model_input = settings.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite")
-model_id = model_input.strip() or "gemini-2.0-flash-lite"
 model_arg = model_id if not model_id.startswith("models/") else model_id.split("/", 1)[1]
 default_prompt = (
     "You are an Indoor Human Behavior Analyzer. Watch the video and produce a detailed, evidence‑based behavioral report focused on human actions, "
@@ -297,17 +279,15 @@ def get_runtime_api_key():
         return key
     return os.getenv("GOOGLE_API_KEY", "").strip() or None
-# --- patched responses / generate compatibility layer ---
-import json
-import requests
-def _normalize_model_for_url(model: str) -> str:
     if not model:
-        return "gemini-2.0"
-    return model.split("/", 1)[-1] if model.startswith("models/") else model
-def _build_prompt_from_messages(messages):
-    # messages expected as list of {"role":..., "content":...}
     if not messages:
         return ""
     parts = []
@@ -317,145 +297,103 @@ def _build_prompt_from_messages(messages):
         parts.append(f"{role.upper()}:\n{content.strip()}\n")
     return "\n".join(parts)
-def _parse_http_generate_response(rjson):
-    # Attempt to extract text from various generate shapes
-    if not rjson:
-        return None
-    # common new GL formats: {'candidates':[{'content': '...'}]} or {'output': [{'content': ...}]}
-    if isinstance(rjson, dict):
-        # try 'candidates'
-        if "candidates" in rjson and isinstance(rjson["candidates"], list) and rjson["candidates"]:
-            cand = rjson["candidates"][0]
-            return cand.get("content") or cand.get("text") or rjson.get("text")
-        # try 'output' array with 'content' items
-        out = rjson.get("output")
-        if isinstance(out, list) and out:
-            texts = []
-            for item in out:
-                if isinstance(item, dict):
-                    c = item.get("content") or item.get("contents") or item.get("text")
-                    if isinstance(c, str):
-                        texts.append(c)
-                    elif isinstance(c, list):
-                        for sub in c:
-                            if isinstance(sub, dict):
-                                t = sub.get("text") or sub.get("content")
-                                if t:
-                                    texts.append(t)
-            if texts:
-                return "\n\n".join(texts)
-        # fallback to top-level text
-        if "text" in rjson and isinstance(rjson["text"], str):
-            return rjson["text"]
-    return None
-def responses_generate(model, messages, files, max_output_tokens, api_key):
-    if not api_key:
-        raise RuntimeError("No API key for responses_generate")
-    sdk_err = None
-    # try SDK responses.generate (preferred)
-    if HAS_GENAI and genai is not None:
-        try:
-            genai.configure(api_key=api_key)
-            responses_obj = getattr(genai, "responses", None)
-            if responses_obj is not None and hasattr(responses_obj, "generate"):
-                # SDK expects messages and files in their SDK-specific shapes
-                sdk_kwargs = {"model": model, "messages": messages, "max_output_tokens": int(max_output_tokens or 512)}
-                if files:
-                    sdk_kwargs["files"] = files
-                return responses_obj.generate(**sdk_kwargs)
-        except Exception as e:
-            sdk_err = str(e)
-    # HTTP fallback to Generative Language "generate" endpoints.
     host = "https://generativelanguage.googleapis.com"
-    norm_model = _normalize_model_for_url(model)
     candidates = [
-        f"{host}/v1/models/{norm_model}:generate",
-        f"{host}/v1beta3/models/{norm_model}:generate",
-        f"{host}/v1beta2/models/{norm_model}:generate",
     ]
-    prompt_text = _build_prompt_from_messages(messages)
-    payload = {"prompt": {"text": prompt_text}, "maxOutputTokens": int(max_output_tokens or 512)}
     headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
     last_exc = None
     for url in candidates:
         try:
-            r = requests.post(url, json=payload, headers=headers, timeout=15)
             if r.status_code == 200:
                 try:
                     return r.json()
                 except Exception:
                     return {"text": r.text}
-            # if 404, try next; collect last
             last_exc = RuntimeError(f"HTTP {r.status_code}: {r.text}")
         except Exception as e:
             last_exc = e
-    diag = {"sdk_error": sdk_err, "http_error": str(last_exc), "tried_urls": candidates}
-    raise RuntimeError(f"genai.responses not available and HTTP fallback failed: {diag}")
 def call_responses_once(model_used, system_msg, user_msg, fname, max_tokens):
-    # messages as [system_msg, user_msg]
     messages = [system_msg, user_msg]
     files = [{"name": fname}] if fname else None
     for attempt in range(2):
         try:
             return responses_generate(model_used, messages, files, max_tokens, api_key=get_runtime_api_key())
-        except Exception:
             if attempt == 0:
                 time.sleep(1.0)
                 continue
             raise
-# Helper to extract text from either SDK response object or HTTP dict
 def extract_text_from_response(response):
-    # SDK may return an object with .output, .candidates, or .text
-    # HTTP returns a dict with various shapes
-    # If it's an object (not dict), try attribute access
     try:
-        if response is None:
-            return None
-        if isinstance(response, dict):
-            # HTTP-style
-            text = _parse_http_generate_response(response)
-            if text:
-                return text
-            # try 'output' field shaped differently
-            outputs = response.get("output") or response.get("candidates")
-            if outputs:
-                pieces = []
-                for o in outputs:
-                    if isinstance(o, dict):
-                        t = o.get("content") or o.get("text")
-                        if isinstance(t, str):
-                            pieces.append(t)
-                if pieces:
-                    return "\n\n".join(pieces)
-            return response.get("text") or None
-        else:
-            # object-like SDK response
-            outputs = getattr(response, "output", None) or getattr(response, "candidates", None) or None
-            if outputs:
-                pieces = []
-                for item in outputs:
-                    # each item may have 'content' or 'text'
-                    txt = getattr(item, "content", None) or getattr(item, "text", None) or (item.get("content") if isinstance(item, dict) else None)
-                    if txt:
-                        pieces.append(txt)
-                if pieces:
-                    return "\n\n".join(pieces)
-            # try top-level text
-            txt = getattr(response, "text", None)
-            if txt:
-                return txt
     except Exception:
         pass
     return None
-# --- end patched section ---
 if (st.session_state.get("busy") is False) and ('generate_now' in locals() and generate_now):
     if not st.session_state.get("videos"):
@@ -517,12 +455,13 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
                 prompt_text = (analysis_prompt or default_prompt).strip()
                 if st.session_state.get("fast_mode"):
-                    model_used = model_arg or "gemini-2.0-flash-lite"
                     max_tokens = min(st.session_state.get("max_output_tokens", 512), 1024)
                 else:
                     model_used = model_arg
                     max_tokens = st.session_state.get("max_output_tokens", 1024)
                 system_msg = {"role": "system", "content": "You are a helpful assistant that summarizes videos concisely in vivid detail."}
                 user_msg = {"role": "user", "content": prompt_text}
@@ -531,6 +470,7 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
                 out = extract_text_from_response(response)
                 meta = getattr(response, "metrics", None) or (response.get("metrics") if isinstance(response, dict) else None) or {}
                 output_tokens = 0
                 try:
@@ -541,6 +481,7 @@ if (st.session_state.get("busy") is False) and ('generate_now' in locals() and g
                 except Exception:
                     output_tokens = 0
                 if (not out or output_tokens == 0) and model_used:
                     retry_prompt = "Summarize the video content briefly and vividly (2-4 paragraphs)."
                     try:

+# streamlit_app.py
 import os
 import time
 import hashlib
 load_dotenv()
+# Optional SDK import; we try to use it when available.
+HAS_GENAI = False
+genai = None
+upload_file = None
+get_file = None
+delete_file = None
+try:
+    import google.generativeai as genai_mod  # type: ignore
+    genai = genai_mod
+    upload_file = getattr(genai_mod, "upload_file", None)
+    get_file = getattr(genai_mod, "get_file", None)
+    delete_file = getattr(genai_mod, "delete_file", None)
+    HAS_GENAI = True
+except Exception:
+    HAS_GENAI = False
+import requests
+import json
 st.set_page_config(page_title="Generate the story of videos", layout="wide")
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
 def file_name_or_id(file_obj):
     if not file_obj:
         return None
+    # simple handling for dict or object - return a plausible id/name string
     if isinstance(file_obj, dict):
         for key in ("name", "id", "fileId", "file_id", "uri", "url"):
             val = file_obj.get(key)
             if val:
+                return str(val)
         return None
     for attr in ("name", "id", "fileId", "file_id", "uri", "url"):
         val = getattr(file_obj, attr, None)
         if val:
+            return str(val)
     s = str(file_obj)
+    return s if s else None
 def upload_video_sdk(filepath: str):
     key = get_runtime_api_key()
 env_key = os.getenv("GOOGLE_API_KEY", "")
 API_KEY_INPUT = settings.text_input("Google API Key (one-time)", value="", type="password")
+# Default model changed to text-bison@001 (broadly available). Replace if you have another.
+model_input = settings.text_input("Model (short name)", "text-bison@001")
+model_id = model_input.strip() or "text-bison@001"
+# model_arg used with SDK; model_for_url used for HTTP
 model_arg = model_id if not model_id.startswith("models/") else model_id.split("/", 1)[1]
+model_for_url_default = model_arg.split("/", 1)[0] if "@" not in model_arg else model_arg  # keep @ if present
 default_prompt = (
     "You are an Indoor Human Behavior Analyzer. Watch the video and produce a detailed, evidence‑based behavioral report focused on human actions, "
         return key
     return os.getenv("GOOGLE_API_KEY", "").strip() or None
+# ---- Simplified SDK-first + HTTP-fallback layer ----
+def _normalize_model_for_http(model: str) -> str:
     if not model:
+        return "text-bison@001"
+    # if user provided "models/..." strip prefix
+    m = model.split("/", 1)[-1] if model.startswith("models/") else model
+    return m
+def _messages_to_prompt(messages):
     if not messages:
         return ""
     parts = []
         parts.append(f"{role.upper()}:\n{content.strip()}\n")
     return "\n".join(parts)
+def _http_generate(api_key: str, model: str, prompt: str, max_tokens: int):
     host = "https://generativelanguage.googleapis.com"
+    norm = _normalize_model_for_http(model)
     candidates = [
+        f"{host}/v1/models/{norm}:generate",
+        f"{host}/v1beta3/models/{norm}:generate",
+        f"{host}/v1beta2/models/{norm}:generate",
     ]
+    payload = {"prompt": {"text": prompt}, "maxOutputTokens": int(max_tokens or 512)}
     headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
     last_exc = None
     for url in candidates:
         try:
+            r = requests.post(url, json=payload, headers=headers, timeout=20)
             if r.status_code == 200:
                 try:
                     return r.json()
                 except Exception:
                     return {"text": r.text}
             last_exc = RuntimeError(f"HTTP {r.status_code}: {r.text}")
         except Exception as e:
             last_exc = e
+    raise RuntimeError(f"HTTP generate failed: {last_exc}; tried: {candidates}")
+def responses_generate(model, messages, files, max_output_tokens, api_key):
+    if not api_key:
+        raise RuntimeError("No API key for responses_generate")
+    # Try SDK responses.generate when available and working
+    if HAS_GENAI and genai is not None:
+        try:
+            genai.configure(api_key=api_key)
+            responses_obj = getattr(genai, "responses", None)
+            if responses_obj is not None and hasattr(responses_obj, "generate"):
+                sdk_kwargs = {"model": model, "messages": messages, "max_output_tokens": int(max_output_tokens or 512)}
+                if files:
+                    sdk_kwargs["files"] = files
+                return responses_obj.generate(**sdk_kwargs)
+        except Exception:
+            # fall through to HTTP fallback
+            pass
+    # HTTP fallback
+    prompt = _messages_to_prompt(messages)
+    return _http_generate(api_key, model, prompt, max_output_tokens)
 def call_responses_once(model_used, system_msg, user_msg, fname, max_tokens):
     messages = [system_msg, user_msg]
     files = [{"name": fname}] if fname else None
     for attempt in range(2):
         try:
             return responses_generate(model_used, messages, files, max_tokens, api_key=get_runtime_api_key())
+        except Exception as e:
             if attempt == 0:
                 time.sleep(1.0)
                 continue
             raise
 def extract_text_from_response(response):
+    if response is None:
+        return None
+    # dict-like (HTTP)
+    if isinstance(response, dict):
+        # try common shapes
+        if "candidates" in response and isinstance(response["candidates"], list) and response["candidates"]:
+            cand = response["candidates"][0]
+            return cand.get("content") or cand.get("text") or response.get("text")
+        if "output" in response and isinstance(response["output"], list):
+            pieces = []
+            for item in response["output"]:
+                if isinstance(item, dict):
+                    c = item.get("content") or item.get("text")
+                    if isinstance(c, str):
+                        pieces.append(c)
+            if pieces:
+                return "\n\n".join(pieces)
+        if "text" in response and isinstance(response["text"], str):
+            return response["text"]
+        # fallback: join any candidate-like entries
+        return None
+    # object-like (SDK)
     try:
+        outputs = getattr(response, "output", None) or getattr(response, "candidates", None)
+        if outputs:
+            pieces = []
+            for item in outputs:
+                txt = getattr(item, "content", None) or getattr(item, "text", None)
+                if txt:
+                    pieces.append(txt)
+            if pieces:
+                return "\n\n".join(pieces)
+        txt = getattr(response, "text", None)
+        if txt:
+            return txt
     except Exception:
         pass
     return None
+# ---- end compatibility layer ----
 if (st.session_state.get("busy") is False) and ('generate_now' in locals() and generate_now):
     if not st.session_state.get("videos"):
                 prompt_text = (analysis_prompt or default_prompt).strip()
                 if st.session_state.get("fast_mode"):
+                    model_used = model_arg or "text-bison@001"
                     max_tokens = min(st.session_state.get("max_output_tokens", 512), 1024)
                 else:
                     model_used = model_arg
                     max_tokens = st.session_state.get("max_output_tokens", 1024)
+                # Ensure model_used is a short name (SDK accepts it; HTTP will normalize)
                 system_msg = {"role": "system", "content": "You are a helpful assistant that summarizes videos concisely in vivid detail."}
                 user_msg = {"role": "user", "content": prompt_text}
                 out = extract_text_from_response(response)
+                # Try to read token info if present
                 meta = getattr(response, "metrics", None) or (response.get("metrics") if isinstance(response, dict) else None) or {}
                 output_tokens = 0
                 try:
                 except Exception:
                     output_tokens = 0
+                # Retry strategies if no output
                 if (not out or output_tokens == 0) and model_used:
                     retry_prompt = "Summarize the video content briefly and vividly (2-4 paragraphs)."
                     try: