Spaces:

Hug0endob
/

Video-Analysis

Build error

App Files Files Community

CB commited on Sep 13, 2025

Commit

4ff4bd1

verified ·

1 Parent(s): 6e43b5c

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +102 -177

streamlit_app.py CHANGED Viewed

@@ -353,141 +353,85 @@ if generate_now and not st.session_state.get("busy"):
                 est_tokens = max_tokens
                 est_cost_caption = f"Est. max tokens: {est_tokens}"
-                # Use Responses API directly (avoid phi.Agent which can raise when candidates is empty)
-                genai.configure(api_key=key_to_use)
-                fname = file_name_or_id(processed)
-                if not fname:
-                    raise RuntimeError("Uploaded file missing name/id")
-                system_msg = {"role": "system", "content": prompt_text}
-                user_msg = {"role": "user", "content": "Please summarize the attached video."}
-                try:
-                    response = genai.responses.generate(
-                        model=model_used,
-                        messages=[system_msg, user_msg],
-                        files=[{"name": fname}],
-                        safety_settings=safety_settings,
-                        max_output_tokens=max_tokens,
-                    )
-                except TypeError:
-                    response = genai.responses.generate(
-                        model=model_used,
-                        input=[{"text": prompt_text, "files": [{"name": fname}]}],
-                        safety_settings=safety_settings,
-                        max_output_tokens=max_tokens,
-                    )
-                # Normalize response into a safe list of output items
-                outputs = []
-                if response is None:
-                    outputs = []
-                elif isinstance(response, dict):
-                    for key in ("output", "candidates", "items", "responses"):
-                        val = response.get(key)
-                        if isinstance(val, list) and val:
-                            outputs = val
-                            break
-                    if not outputs:
-                        for v in response.values():
-                            if isinstance(v, list) and v:
-                                outputs = v
-                                break
-                else:
-                    for attr in ("output", "candidates", "items", "responses"):
-                        val = getattr(response, attr, None)
-                        if isinstance(val, list) and val:
-                            outputs = val
-                            break
-                # Safely extract text pieces without indexing into empty lists
-                text_pieces = []
-                for item in outputs:
-                    if item is None:
-                        continue
-                    # item may be dict or object
-                    if isinstance(item, dict):
-                        # try common fields
-                        txt = item.get("text") or item.get("output_text") or item.get("message")
-                        if isinstance(txt, str) and txt.strip():
-                            text_pieces.append(txt.strip())
-                            continue
-                        contents = item.get("content") or item.get("output")
-                    else:
-                        txt = getattr(item, "text", None) or getattr(item, "output_text", None) or getattr(item, "message", None)
-                        if isinstance(txt, str) and txt.strip():
-                            text_pieces.append(txt.strip())
-                            continue
-                        contents = getattr(item, "content", None) or getattr(item, "output", None)
-                    # contents may be string or list
-                    if isinstance(contents, str) and contents.strip():
-                        text_pieces.append(contents.strip())
-                    elif isinstance(contents, (list, tuple)):
-                        for c in contents:
-                            if c is None:
-                                continue
-                            if isinstance(c, str) and c.strip():
-                                text_pieces.append(c.strip())
-                                continue
-                            if isinstance(c, dict):
-                                t = c.get("text") or c.get("content")
-                            else:
-                                t = getattr(c, "text", None) or getattr(c, "content", None)
-                            if t:
-                                text_pieces.append(str(t).strip())
-                # final fallback: top-level text
-                if not text_pieces:
-                    top_text = getattr(response, "text", None) if not isinstance(response, dict) else (response.get("text") or response.get("message"))
-                    if top_text:
-                        text_pieces.append(str(top_text).strip())
-                # dedupe preserving order
-                seen = set()
-                filtered = []
-                for t in text_pieces:
-                    if not isinstance(t, str):
-                        continue
-                    if t and t not in seen:
-                        filtered.append(t)
-                        seen.add(t)
-                out = "\n\n".join(filtered)
                     try:
-                        response = genai.responses.generate(
-                            model=model_used,
-                            messages=[system_msg, user_msg],
-                            files=[{"name": fname}],
-                            safety_settings=safety_settings,
-                            max_output_tokens=max_tokens,
-                        )
-                    except TypeError:
-                        response = genai.responses.generate(
-                            model=model_used,
-                            input=[{"text": prompt_text, "files": [{"name": fname}]}],
-                            safety_settings=safety_settings,
-                            max_output_tokens=max_tokens,
-                        )
-                    # record raw shape for debugging
-                    debug_info["response_shape"] = type(response).__name__ if response is not None else "None"
-                    # SAFE normalization into list
-                    outputs = []
                     try:
                         if response is None:
                             outputs = []
                         elif isinstance(response, dict):
-                            # typical dict shapes
                             for key in ("output", "candidates", "items", "responses"):
                                 val = response.get(key)
                                 if isinstance(val, list) and val:
                                     outputs = val
                                     break
                             if not outputs:
-                                # pick first list-valued entry if any
                                 for v in response.values():
-                                    if isinstance(v, list):
                                         outputs = v
                                         break
                         else:
@@ -496,24 +440,19 @@ if generate_now and not st.session_state.get("busy"):
                                 if isinstance(val, list) and val:
                                     outputs = val
                                     break
-                    except Exception as e:
-                        # unexpected structure -> capture for debug and continue with empty outputs
-                        st.session_state["last_error"] = f"Response parsing error: {e}\n{traceback.format_exc()}"
-                        outputs = []
-                    debug_info["outputs_len"] = len(outputs)
-                    debug_info["outputs_types"] = [type(o).__name__ for o in outputs]
-                    # iterate without indexing
-                    text_pieces = []
-                    try:
                         for item in outputs:
                             if item is None:
                                 continue
-                            # get potential content container(s)
                             cand_contents = None
                             if isinstance(item, dict):
-                                # common keys that may hold text
                                 for k in ("content", "text", "message", "output_text", "output"):
                                     if k in item and item[k]:
                                         cand_contents = item[k]
@@ -524,13 +463,11 @@ if generate_now and not st.session_state.get("busy"):
                                     if cand_contents:
                                         break
-                            # handle string content
                             if isinstance(cand_contents, str):
                                 if cand_contents.strip():
                                     text_pieces.append(cand_contents.strip())
                                 continue
-                            # handle list-like content
                             if isinstance(cand_contents, (list, tuple)):
                                 for c in cand_contents:
                                     if c is None:
@@ -547,7 +484,6 @@ if generate_now and not st.session_state.get("busy"):
                                         text_pieces.append(str(t).strip())
                                 continue
-                            # fallback to direct text on item
                             direct = None
                             if isinstance(item, dict):
                                 direct = item.get("text") or item.get("output_text") or item.get("message")
@@ -555,31 +491,34 @@ if generate_now and not st.session_state.get("busy"):
                                 direct = getattr(item, "text", None) or getattr(item, "output_text", None) or getattr(item, "message", None)
                             if direct:
                                 text_pieces.append(str(direct).strip())
                     except Exception as e:
-                        st.session_state["last_error"] = f"Error while extracting text pieces: {e}\n{traceback.format_exc()}"
-                    # last resort: top-level text
-                    if not text_pieces:
-                        top_text = None
-                        if isinstance(response, dict):
-                            top_text = response.get("text") or response.get("message")
-                        else:
-                            top_text = getattr(response, "text", None) or getattr(response, "message", None)
-                        if top_text:
-                            text_pieces.append(str(top_text).strip())
-                    # dedupe preserving order
-                    seen = set()
-                    filtered = []
-                    for t in text_pieces:
-                        if not isinstance(t, str):
-                            continue
-                        if t and t not in seen:
-                            filtered.append(t)
-                            seen.add(t)
-                    out = "\n\n".join(filtered)
-                # post-process output to remove prompt echo or placeholders
                 if out:
                     out = remove_prompt_echo(prompt_text, out)
                     p = prompt_text
@@ -597,35 +536,21 @@ if generate_now and not st.session_state.get("busy"):
                 st.session_state["last_error"] = ""
                 st.subheader("Analysis Result")
                 st.markdown(out if out else "No analysis returned.")
-                st.caption(est_cost_caption)
             except Exception as e:
-                # Build improved error info to display in the UI
                 tb = traceback.format_exc()
-                # If we have debug_info, include it
-                dbg = locals().get("debug_info") or {}
-                # Save a concise message + trace to last_error so UI shows it
-                st.session_state["last_error"] = f"{str(e)}\n\nDebug: {dbg}\n\nTraceback:\n{tb}"
                 st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
             finally:
                 st.session_state["busy"] = False
-# show analysis if present
 if st.session_state.get("analysis_out"):
     just_loaded_same = (st.session_state.get("last_loaded_path") == st.session_state.get("videos"))
     if not just_loaded_same:
         st.subheader("Analysis Result")
         st.markdown(st.session_state.get("analysis_out"))
-# show last error and debug helper
 if st.session_state.get("last_error"):
-    with st.expander("Last Error (click to expand)", expanded=True):
         st.write(st.session_state.get("last_error"))
-        # If we extracted debug_info earlier, show short diagnostics
-        try:
-            di = locals().get("debug_info") or {}
-            if di:
-                st.write("Debug info (if available):")
-                st.write(di)
-        except Exception:
-            pass

                 est_tokens = max_tokens
                 est_cost_caption = f"Est. max tokens: {est_tokens}"
+                # First try Agent, but guard and FALLBACK to direct genai responses if Agent fails or returns empty.
+                agent = maybe_create_agent(model_used)
+                debug_info = {"agent_attempted": False, "agent_ok": False, "agent_error": None, "agent_response_has_text": False}
+                if agent:
+                    debug_info["agent_attempted"] = True
                     try:
+                        with st.spinner("Generating description via Agent..."):
+                            if not processed:
+                                raise RuntimeError("Processed file missing for agent generation")
+                            # call agent.run inside try/except to catch library IndexError
+                            agent_response = agent.run(prompt_text, videos=[processed], safety_settings=safety_settings)
+                            # Try to extract text from common attributes; be defensive
+                            agent_text = getattr(agent_response, "content", None) or getattr(agent_response, "outputText", None) or None
+                            if not agent_text:
+                                # try dict-like access
+                                try:
+                                    if isinstance(agent_response, dict):
+                                        # check common keys
+                                        for k in ("content", "outputText", "text"):
+                                            if k in agent_response and agent_response[k]:
+                                                agent_text = agent_response[k]
+                                                break
+                                except Exception:
+                                    pass
+                            if agent_text and str(agent_text).strip():
+                                out = str(agent_text).strip()
+                                debug_info["agent_ok"] = True
+                                debug_info["agent_response_has_text"] = True
+                            else:
+                                # Agent returned but had no usable text; set a marker to fallback
+                                debug_info["agent_ok"] = False
+                    except Exception as ae:
+                        # Save agent error and continue to fallback path instead of crashing
+                        debug_info["agent_error"] = f"{ae}"
+                        # include traceback for debugging
+                        debug_info["agent_traceback"] = traceback.format_exc()
+                        # Do not re-raise; we'll fallback to genai.responses.generate below
+                if not out:
+                    # Fallback to direct Responses API flow
                     try:
+                        if not HAS_GENAI or genai is None:
+                            raise RuntimeError("Responses API not available; install google.generativeai SDK.")
+                        genai.configure(api_key=key_to_use)
+                        fname = file_name_or_id(processed)
+                        if not fname:
+                            raise RuntimeError("Uploaded file missing name/id")
+                        system_msg = {"role": "system", "content": prompt_text}
+                        user_msg = {"role": "user", "content": "Please summarize the attached video."}
+                        try:
+                            response = genai.responses.generate(
+                                model=model_used,
+                                messages=[system_msg, user_msg],
+                                files=[{"name": fname}],
+                                safety_settings=safety_settings,
+                                max_output_tokens=max_tokens,
+                            )
+                        except TypeError:
+                            response = genai.responses.generate(
+                                model=model_used,
+                                input=[{"text": prompt_text, "files": [{"name": fname}]}],
+                                safety_settings=safety_settings,
+                                max_output_tokens=max_tokens,
+                            )
+                        # Defensive normalization of response -> outputs list
+                        outputs = []
                         if response is None:
                             outputs = []
                         elif isinstance(response, dict):
                             for key in ("output", "candidates", "items", "responses"):
                                 val = response.get(key)
                                 if isinstance(val, list) and val:
                                     outputs = val
                                     break
                             if not outputs:
                                 for v in response.values():
+                                    if isinstance(v, list) and v:
                                         outputs = v
                                         break
                         else:
                                 if isinstance(val, list) and val:
                                     outputs = val
                                     break
+                        # ensure list
+                        if not isinstance(outputs, list):
+                            outputs = list(outputs) if outputs else []
+                        # extract text pieces safely
+                        text_pieces = []
                         for item in outputs:
                             if item is None:
                                 continue
+                            # item may be dict or object; attempt to find text-rich fields
                             cand_contents = None
                             if isinstance(item, dict):
                                 for k in ("content", "text", "message", "output_text", "output"):
                                     if k in item and item[k]:
                                         cand_contents = item[k]
                                     if cand_contents:
                                         break
                             if isinstance(cand_contents, str):
                                 if cand_contents.strip():
                                     text_pieces.append(cand_contents.strip())
                                 continue
                             if isinstance(cand_contents, (list, tuple)):
                                 for c in cand_contents:
                                     if c is None:
                                         text_pieces.append(str(t).strip())
                                 continue
                             direct = None
                             if isinstance(item, dict):
                                 direct = item.get("text") or item.get("output_text") or item.get("message")
                                 direct = getattr(item, "text", None) or getattr(item, "output_text", None) or getattr(item, "message", None)
                             if direct:
                                 text_pieces.append(str(direct).strip())
+                        if not text_pieces:
+                            top_text = None
+                            if isinstance(response, dict):
+                                top_text = response.get("text") or response.get("message")
+                            else:
+                                top_text = getattr(response, "text", None) or getattr(response, "message", None)
+                            if top_text:
+                                text_pieces.append(str(top_text).strip())
+                        # dedupe preserving order
+                        seen = set()
+                        filtered = []
+                        for t in text_pieces:
+                            if not isinstance(t, str):
+                                continue
+                            if t and t not in seen:
+                                filtered.append(t)
+                                seen.add(t)
+                        out = "\n\n".join(filtered)
                     except Exception as e:
+                        # Capture clear error to UI and include debug_info
+                        tb = traceback.format_exc()
+                        st.session_state["last_error"] = f"Responses API error: {e}\n\nDebug: {debug_info}\n\nTraceback:\n{tb}"
+                        st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
+                        out = ""
+                # post-process output
                 if out:
                     out = remove_prompt_echo(prompt_text, out)
                     p = prompt_text
                 st.session_state["last_error"] = ""
                 st.subheader("Analysis Result")
                 st.markdown(out if out else "No analysis returned.")
+                st.caption(f"Est. max tokens: {est_tokens}")
             except Exception as e:
                 tb = traceback.format_exc()
+                st.session_state["last_error"] = f"{e}\n\nDebug: {locals().get('debug_info', debug_info)}\n\nTraceback:\n{tb}"
                 st.error("An error occurred while generating the story. You can try Generate again; the uploaded video will be reused.")
             finally:
                 st.session_state["busy"] = False
 if st.session_state.get("analysis_out"):
     just_loaded_same = (st.session_state.get("last_loaded_path") == st.session_state.get("videos"))
     if not just_loaded_same:
         st.subheader("Analysis Result")
         st.markdown(st.session_state.get("analysis_out"))
 if st.session_state.get("last_error"):
+    with st.expander("Last Error", expanded=False):
         st.write(st.session_state.get("last_error"))