Spaces:

Hug0endob
/

Video-Analysis

Build error

App Files Files Community

CB commited on Sep 15, 2025

Commit

ed1c53f

verified ·

1 Parent(s): 770380c

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +46 -80

streamlit_app.py CHANGED Viewed

@@ -15,7 +15,7 @@ from dotenv import load_dotenv
 load_dotenv()
-# Optional phi integration (Agent + Gemini wrapper)
 try:
     from phi.agent import Agent
     from phi.model.google import Gemini
@@ -25,22 +25,20 @@ except Exception:
     Agent = Gemini = DuckDuckGo = None
     HAS_PHI = False
-# google-genai (v1.49.1)
 try:
-    import google_genai as genai  # package name for google-genai
-    from google_genai import Files, Responses, configure as genai_configure  # convenience
     HAS_GENAI = True
 except Exception:
     genai = None
-    Files = Responses = None
-    genai_configure = None
     HAS_GENAI = False
 st.set_page_config(page_title="Generate the story of videos", layout="wide")
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
-# ---- Defaults & constants ----
 MODEL_OPTIONS = [
     "gemini-2.5-flash",
     "gemini-2.5-flash-lite",
@@ -48,14 +46,12 @@ MODEL_OPTIONS = [
     "gemini-2.0-flash-lite",
     "custom",
 ]
 DEFAULT_MODEL = "gemini-2.0-flash-lite"
 DEFAULT_PROMPT = (
     "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. "
     "Keep language professional. Include a list of observations for notable events."
 )
-# ---- Session defaults ----
 st.session_state.setdefault("videos", "")
 st.session_state.setdefault("loop_video", False)
 st.session_state.setdefault("uploaded_file", None)
@@ -73,7 +69,6 @@ st.session_state.setdefault("processing_timeout", 900)
 st.session_state.setdefault("generation_timeout", 300)
 st.session_state.setdefault("compress_threshold_mb", 200)
-# ---- Helpers ----
 def sanitize_filename(path_str: str):
     name = Path(path_str).name
     return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
@@ -139,13 +134,11 @@ def configure_genai_if_needed():
     if not key:
         return False
     try:
-        if genai_configure:
-            genai_configure(api_key=key)
     except Exception:
         pass
     return True
-# ---- Agent management (reuse) ----
 _agent = None
 def maybe_create_agent(model_id: str):
     global _agent
@@ -156,8 +149,7 @@ def maybe_create_agent(model_id: str):
     if _agent and st.session_state.get("last_model") == model_id:
         return _agent
     try:
-        if genai_configure:
-            genai_configure(api_key=key)
         _agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
         st.session_state["last_model"] = model_id
     except Exception:
@@ -183,7 +175,6 @@ if current_url != st.session_state.get("last_url_value"):
     clear_all_video_state()
     st.session_state["last_url_value"] = current_url
-# ---- Sidebar UI ----
 st.sidebar.header("Video Input")
 st.sidebar.text_input("Video URL", key="url", placeholder="https://")
@@ -228,34 +219,28 @@ safety_settings = [
     {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
 ]
-# ---- Upload & processing helpers for google-genai Files ----
 def upload_video_sdk(filepath: str):
     key = get_effective_api_key()
     if not key:
         raise RuntimeError("No API key provided")
-    if not HAS_GENAI or Files is None:
-        raise RuntimeError("google-genai SDK not available; cannot upload")
-    if genai_configure:
-        genai_configure(api_key=key)
-    with open(filepath, "rb") as fh:
-        # Files.upload returns a response-like object; adapt as needed
-        resp = Files.create(file=fh, purpose="video")
-    return resp
 def wait_for_processed(file_obj, timeout: int = None):
     if timeout is None:
         timeout = st.session_state.get("processing_timeout", 900)
-    if not HAS_GENAI or Files is None:
         return file_obj
     start = time.time()
-    # file_obj may be a dict or an SDK object; adapt
-    file_id = file_obj.get("name") if isinstance(file_obj, dict) else getattr(file_obj, "name", None) or getattr(file_obj, "id", None)
-    if not file_id:
         return file_obj
     backoff = 1.0
     while True:
         try:
-            obj = Files.get(file_id)
         except Exception as e:
             if time.time() - start > timeout:
                 raise TimeoutError(f"Failed to fetch file status before timeout: {e}")
@@ -263,9 +248,8 @@ def wait_for_processed(file_obj, timeout: int = None):
             backoff = min(backoff * 2, 8.0)
             continue
-        state = obj.get("state") if isinstance(obj, dict) else getattr(obj, "state", None)
-        name = state.get("name") if isinstance(state, dict) else getattr(state, "name", None)
-        if not name or name != "PROCESSING":
             return obj
         if time.time() - start > timeout:
@@ -312,62 +296,46 @@ def compress_video_if_large(local_path: str, threshold_mb: int = 200):
         st.session_state["last_error"] = f"Video compression failed: {e}\n{traceback.format_exc()}"
         return local_path, False
-# ---- Responses API caller adapted for google-genai Responses ----
 def generate_via_responses_api(prompt_text: str, processed, model_used: str, max_tokens: int = 1024, timeout: int = 300):
     key = get_effective_api_key()
     if not key:
         raise RuntimeError("No API key provided")
-    if not HAS_GENAI or Responses is None:
-        raise RuntimeError("Responses API not available; install google-genai SDK.")
-    if genai_configure:
-        genai_configure(api_key=key)
-    file_name = file_name_or_id(processed)
-    if not file_name:
         raise RuntimeError("Uploaded file missing name/id")
-    # Build a minimal Responses.create call that attaches the video file reference.
-    # The exact shape depends on google-genai; here we create a simple text + reference instruction.
-    request = {
         "model": model_used,
-        "input": [
-            {"role": "system", "content": prompt_text},
-            {"role": "user", "content": "Please summarize the attached video."}
-        ],
-        "attachments": [{"mime_type": "video/mp4", "uri": f"file:{file_name}"}],
         "max_output_tokens": max_tokens,
         "temperature": 0.2,
     }
-    # Responses.create returns a response object/dict; attempt to extract text
-    resp = Responses.create(**request)
-    text = ""
-    # support multiple response shapes
-    if isinstance(resp, dict):
-        # common shapes: resp['output'][0]['content'] or resp['candidates'][0]['content']
-        out = resp.get("output") or resp.get("candidates")
-        if isinstance(out, list) and out:
-            first = out[0]
-            if isinstance(first, dict):
-                text = first.get("content") or first.get("text") or ""
             else:
-                text = str(first)
         else:
-            text = resp.get("content") or resp.get("text") or ""
-    else:
-        # SDK object: try attributes
-        try:
-            if hasattr(resp, "outputs"):
-                outputs = getattr(resp, "outputs", None)
-                if outputs:
-                    text = outputs[0].get("content") if isinstance(outputs, list) and isinstance(outputs[0], dict) else str(outputs[0])
-            elif hasattr(resp, "text"):
-                text = getattr(resp, "text", "")
-        except Exception:
-            text = str(resp)
-    return text or ""
-# ---- Layout ----
 col1, col2 = st.columns([1, 3])
 with col1:
     generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
@@ -417,7 +385,6 @@ if st.session_state["videos"]:
     except Exception:
         pass
-# ---- Main generation flow ----
 if generate_now and not st.session_state.get("busy"):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
@@ -430,8 +397,7 @@ if generate_now and not st.session_state.get("busy"):
                 st.session_state["busy"] = True
                 try:
                     if HAS_GENAI and genai is not None:
-                        if genai_configure:
-                            genai_configure(api_key=key_to_use)
                 except Exception:
                     pass
@@ -453,7 +419,7 @@ if generate_now and not st.session_state.get("busy"):
                 if reupload_needed:
                     if not HAS_GENAI:
-                        raise RuntimeError("google-genai SDK not available; install it.")
                     local_path = current_path
                     upload_path, compressed = compress_video_if_large(local_path, threshold_mb=st.session_state.get("compress_threshold_mb", 200))

 load_dotenv()
+# Optional phi integration
 try:
     from phi.agent import Agent
     from phi.model.google import Gemini
     Agent = Gemini = DuckDuckGo = None
     HAS_PHI = False
+# Legacy google.generativeai SDK
 try:
+    import google.generativeai as genai
+    from google.generativeai import upload_file, get_file, responses  # type: ignore
     HAS_GENAI = True
 except Exception:
     genai = None
+    upload_file = get_file = responses = None
     HAS_GENAI = False
 st.set_page_config(page_title="Generate the story of videos", layout="wide")
 DATA_DIR = Path("./data")
 DATA_DIR.mkdir(exist_ok=True)
 MODEL_OPTIONS = [
     "gemini-2.5-flash",
     "gemini-2.5-flash-lite",
     "gemini-2.0-flash-lite",
     "custom",
 ]
 DEFAULT_MODEL = "gemini-2.0-flash-lite"
 DEFAULT_PROMPT = (
     "Watch the video and provide a detailed behavioral report focusing on human actions, interactions, posture, movement, and apparent intent. "
     "Keep language professional. Include a list of observations for notable events."
 )
 st.session_state.setdefault("videos", "")
 st.session_state.setdefault("loop_video", False)
 st.session_state.setdefault("uploaded_file", None)
 st.session_state.setdefault("generation_timeout", 300)
 st.session_state.setdefault("compress_threshold_mb", 200)
 def sanitize_filename(path_str: str):
     name = Path(path_str).name
     return name.lower().translate(str.maketrans("", "", string.punctuation)).replace(" ", "_")
     if not key:
         return False
     try:
+        genai.configure(api_key=key)
     except Exception:
         pass
     return True
 _agent = None
 def maybe_create_agent(model_id: str):
     global _agent
     if _agent and st.session_state.get("last_model") == model_id:
         return _agent
     try:
+        genai.configure(api_key=key)
         _agent = Agent(name="Video AI summarizer", model=Gemini(id=model_id), tools=[DuckDuckGo()], markdown=True)
         st.session_state["last_model"] = model_id
     except Exception:
     clear_all_video_state()
     st.session_state["last_url_value"] = current_url
 st.sidebar.header("Video Input")
 st.sidebar.text_input("Video URL", key="url", placeholder="https://")
     {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
 ]
 def upload_video_sdk(filepath: str):
     key = get_effective_api_key()
     if not key:
         raise RuntimeError("No API key provided")
+    if not HAS_GENAI or upload_file is None:
+        raise RuntimeError("google.generativeai SDK not available; cannot upload")
+    genai.configure(api_key=key)
+    return upload_file(filepath)
 def wait_for_processed(file_obj, timeout: int = None):
     if timeout is None:
         timeout = st.session_state.get("processing_timeout", 900)
+    if not HAS_GENAI or get_file is None:
         return file_obj
     start = time.time()
+    name = file_name_or_id(file_obj)
+    if not name:
         return file_obj
     backoff = 1.0
     while True:
         try:
+            obj = get_file(name)
         except Exception as e:
             if time.time() - start > timeout:
                 raise TimeoutError(f"Failed to fetch file status before timeout: {e}")
             backoff = min(backoff * 2, 8.0)
             continue
+        state = getattr(obj, "state", None)
+        if not state or getattr(state, "name", None) != "PROCESSING":
             return obj
         if time.time() - start > timeout:
         st.session_state["last_error"] = f"Video compression failed: {e}\n{traceback.format_exc()}"
         return local_path, False
 def generate_via_responses_api(prompt_text: str, processed, model_used: str, max_tokens: int = 1024, timeout: int = 300):
     key = get_effective_api_key()
     if not key:
         raise RuntimeError("No API key provided")
+    if not HAS_GENAI or responses is None:
+        raise RuntimeError("Responses API not available; install google-generativeai SDK.")
+    genai.configure(api_key=key)
+    fname = file_name_or_id(processed)
+    if not fname:
         raise RuntimeError("Uploaded file missing name/id")
+    system_msg = {"role": "system", "content": prompt_text}
+    user_msg = {"role": "user", "content": "Please summarize the attached video."}
+    req = {
         "model": model_used,
+        "input": [system_msg, user_msg],
+        "files": [fname],
         "max_output_tokens": max_tokens,
         "temperature": 0.2,
     }
+    resp = responses.create(**req)
+    # extract text robustly
+    out = ""
+    try:
+        if isinstance(resp, dict):
+            candidates = resp.get("candidates") or resp.get("output") or []
+            if isinstance(candidates, list) and candidates:
+                c = candidates[0]
+                if isinstance(c, dict):
+                    out = c.get("content") or c.get("text") or ""
+                else:
+                    out = str(c)
             else:
+                out = resp.get("outputText") or resp.get("content") or resp.get("text") or ""
         else:
+            out = getattr(resp, "output_text", "") or getattr(resp, "text", "") or ""
+    except Exception:
+        out = str(resp)
+    return out or ""
 col1, col2 = st.columns([1, 3])
 with col1:
     generate_now = st.button("Generate the story", type="primary", disabled=not bool(get_effective_api_key()))
     except Exception:
         pass
 if generate_now and not st.session_state.get("busy"):
     if not st.session_state.get("videos"):
         st.error("No video loaded. Use 'Load Video' in the sidebar.")
                 st.session_state["busy"] = True
                 try:
                     if HAS_GENAI and genai is not None:
+                        genai.configure(api_key=key_to_use)
                 except Exception:
                     pass
                 if reupload_needed:
                     if not HAS_GENAI:
+                        raise RuntimeError("google-generativeai SDK not available; install it.")
                     local_path = current_path
                     upload_path, compressed = compress_video_if_large(local_path, threshold_mb=st.session_state.get("compress_threshold_mb", 200))