Spaces:

Hug0endob
/

Video-Analysis

Build error

App Files Files Community

CB commited on Sep 13, 2025

Commit

4ed7ca7

verified ·

1 Parent(s): eb05733

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +83 -47

streamlit_app.py CHANGED Viewed

@@ -276,7 +276,6 @@ if st.session_state["videos"]:
         st.sidebar.caption(f"File size: {file_size_mb:.1f} MB")
         if file_size_mb > 50 and not st.session_state.get("fast_mode", False):
             st.sidebar.warning("Large file detected — consider enabling Fast mode or compression.", icon="⚠️")
-            # do not forcibly change user's fast_mode here; just recommend
     except Exception:
         pass
@@ -291,14 +290,12 @@ if generate_now and not st.session_state.get("busy"):
         else:
             try:
                 st.session_state["busy"] = True
-                # ensure genai is configured now
                 try:
                     if HAS_GENAI and genai is not None:
                         genai.configure(api_key=key_to_use)
                 except Exception:
                     pass
-                # recreate/clear agent if key or model changed
                 model_id = (st.session_state.get("model_input") or "gemini-2.5-flash-lite").strip()
                 if st.session_state.get("last_model") != model_id:
                     st.session_state["last_model"] = ""
@@ -352,15 +349,12 @@ if generate_now and not st.session_state.get("busy"):
                     model_used = model_id
                     max_tokens = 1024
-                # cost/tokens estimate (very rough)
                 est_tokens = max_tokens
                 est_cost_caption = f"Est. max tokens: {est_tokens}"
-                # Generate via Agent if available
                 agent = maybe_create_agent(model_used)
                 if agent:
                     with st.spinner("Generating description via Agent..."):
-                        # guard processed for agent: must be non-none
                         if not processed:
                             raise RuntimeError("Processed file missing for agent generation")
                         response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
@@ -374,7 +368,8 @@ if generate_now and not st.session_state.get("busy"):
                         raise RuntimeError("Uploaded file missing name/id")
                     system_msg = {"role": "system", "content": prompt_text}
                     user_msg = {"role": "user", "content": "Please summarize the attached video."}
-                    # call responses.generate, handling both signature variants and return shapes
                     try:
                         response = genai.responses.generate(
                             model=model_used,
@@ -391,63 +386,104 @@ if generate_now and not st.session_state.get("busy"):
                             max_output_tokens=max_tokens,
                         )
-                    # Normalize outputs to a list of items with .content or dict form
                     outputs = []
                     if response is None:
                         outputs = []
                     else:
-                        outputs = getattr(response, "output", None) or (response.get("output") if isinstance(response, dict) else None) or []
-                        # Newer API may use 'candidates' or 'items' -- check safely
-                        if not outputs:
-                            # check common alternative keys
-                            if isinstance(response, dict):
-                                if "candidates" in response and isinstance(response["candidates"], list):
-                                    outputs = response["candidates"]
-                                elif "items" in response and isinstance(response["items"], list):
-                                    outputs = response["items"]
-                                elif "output" in response and isinstance(response["output"], list):
-                                    outputs = response["output"]
                             else:
-                                # attempt attribute access fallbacks
-                                outputs = getattr(response, "candidates", None) or getattr(response, "items", None) or getattr(response, "output", None) or []
-                    # ensure outputs is a list
-                    if outputs is None:
-                        outputs = []
                     text_pieces = []
                     for item in outputs:
-                        # item may be dict or object
-                        contents = getattr(item, "content", None) or (item.get("content") if isinstance(item, dict) else None)
-                        # some shapes put text directly on item
-                        if contents is None:
-                            # fallback to checking item text or string
-                            txt_direct = getattr(item, "text", None) or (item.get("text") if isinstance(item, dict) else None)
-                            if txt_direct:
-                                text_pieces.append(txt_direct)
                             continue
-                        # contents might be list or single dict/object
                         if isinstance(contents, (list, tuple)):
-                            content_iter = contents
                         else:
-                            content_iter = [contents]
-                        for c in content_iter:
-                            ctype = getattr(c, "type", None) or (c.get("type") if isinstance(c, dict) else None)
-                            if ctype in ("output_text", "text") or ctype is None:
-                                txt = getattr(c, "text", None) or (c.get("text") if isinstance(c, dict) else None)
-                                if txt:
-                                    text_pieces.append(txt)
-                    # final fallback: top-level text fields
                     if not text_pieces:
-                        top_text = getattr(response, "text", None) or (response.get("text") if isinstance(response, dict) else None)
                         if top_text:
-                            text_pieces.append(top_text)
-                    # dedupe while preserving order
                     seen = set()
                     filtered = []
                     for t in text_pieces:
-                        if t not in seen:
                             filtered.append(t)
                             seen.add(t)
                     out = "\n\n".join(filtered)

         st.sidebar.caption(f"File size: {file_size_mb:.1f} MB")
         if file_size_mb > 50 and not st.session_state.get("fast_mode", False):
             st.sidebar.warning("Large file detected — consider enabling Fast mode or compression.", icon="⚠️")
     except Exception:
         pass
         else:
             try:
                 st.session_state["busy"] = True
                 try:
                     if HAS_GENAI and genai is not None:
                         genai.configure(api_key=key_to_use)
                 except Exception:
                     pass
                 model_id = (st.session_state.get("model_input") or "gemini-2.5-flash-lite").strip()
                 if st.session_state.get("last_model") != model_id:
                     st.session_state["last_model"] = ""
                     model_used = model_id
                     max_tokens = 1024
                 est_tokens = max_tokens
                 est_cost_caption = f"Est. max tokens: {est_tokens}"
                 agent = maybe_create_agent(model_used)
                 if agent:
                     with st.spinner("Generating description via Agent..."):
                         if not processed:
                             raise RuntimeError("Processed file missing for agent generation")
                         response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
                         raise RuntimeError("Uploaded file missing name/id")
                     system_msg = {"role": "system", "content": prompt_text}
                     user_msg = {"role": "user", "content": "Please summarize the attached video."}
+                    # Try the modern and legacy signatures; fail clearly if both fail
                     try:
                         response = genai.responses.generate(
                             model=model_used,
                             max_output_tokens=max_tokens,
                         )
+                    # Normalize response into iterable items safely
                     outputs = []
                     if response is None:
                         outputs = []
                     else:
+                        # response might be object or dict; try known attributes/keys
+                        if isinstance(response, dict):
+                            # common dict keys
+                            if isinstance(response.get("output"), list):
+                                outputs = response.get("output") or []
+                            elif isinstance(response.get("candidates"), list):
+                                outputs = response.get("candidates") or []
+                            elif isinstance(response.get("items"), list):
+                                outputs = response.get("items") or []
+                            elif isinstance(response.get("responses"), list):
+                                outputs = response.get("responses") or []
                             else:
+                                # fallback: try to find list-valued entries
+                                for v in response.values():
+                                    if isinstance(v, list):
+                                        outputs = v
+                                        break
+                        else:
+                            # try attribute access
+                            attr_candidates = []
+                            for attr in ("output", "candidates", "items", "responses"):
+                                val = getattr(response, attr, None)
+                                if isinstance(val, list):
+                                    attr_candidates = val
+                                    break
+                            outputs = attr_candidates or []
+                    # Ensure we have a list
+                    if not isinstance(outputs, list):
+                        outputs = list(outputs) if outputs else []
                     text_pieces = []
+                    # Iterate safely through outputs (may be dicts or objects)
                     for item in outputs:
+                        if item is None:
+                            continue
+                        # attempt to extract a 'content' bag
+                        contents = None
+                        if isinstance(item, dict):
+                            contents = item.get("content") or item.get("text") or item.get("message") or item.get("output")
+                        else:
+                            contents = getattr(item, "content", None) or getattr(item, "text", None) or getattr(item, "message", None) or getattr(item, "output", None)
+                        # If contents is a single string, take it
+                        if isinstance(contents, str):
+                            if contents.strip():
+                                text_pieces.append(contents.strip())
                             continue
+                        # If contents is list-like, iterate
                         if isinstance(contents, (list, tuple)):
+                            for c in contents:
+                                if c is None:
+                                    continue
+                                if isinstance(c, str):
+                                    if c.strip():
+                                        text_pieces.append(c.strip())
+                                    continue
+                                c_text = None
+                                if isinstance(c, dict):
+                                    c_text = c.get("text") or c.get("content") or None
+                                else:
+                                    c_text = getattr(c, "text", None) or getattr(c, "content", None)
+                                if c_text:
+                                    text_pieces.append(str(c_text).strip())
+                            continue
+                        # If the item itself contains direct text fields
+                        direct_txt = None
+                        if isinstance(item, dict):
+                            direct_txt = item.get("text") or item.get("output_text") or item.get("message")
                         else:
+                            direct_txt = getattr(item, "text", None) or getattr(item, "output_text", None) or getattr(item, "message", None)
+                        if direct_txt:
+                            text_pieces.append(str(direct_txt).strip())
+                    # final fallback: top-level text on response
                     if not text_pieces:
+                        top_text = None
+                        if isinstance(response, dict):
+                            top_text = response.get("text") or response.get("message") or None
+                        else:
+                            top_text = getattr(response, "text", None) or getattr(response, "message", None)
                         if top_text:
+                            text_pieces.append(str(top_text).strip())
+                    # dedupe preserving order
                     seen = set()
                     filtered = []
                     for t in text_pieces:
+                        if not isinstance(t, str):
+                            continue
+                        if t and t not in seen:
                             filtered.append(t)
                             seen.add(t)
                     out = "\n\n".join(filtered)