Video-Analysis-Tool

Sleeping

App Files Files Community

CB commited on Sep 11, 2025

Commit

9783989

verified ·

1 Parent(s): f93879b

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +116 -506

streamlit_app.py CHANGED Viewed

@@ -1,549 +1,159 @@
-import os
-import time
-import hashlib
-from glob import glob
-from pathlib import Path
-from tempfile import NamedTemporaryFile
-import yt_dlp
-import ffmpeg
-import streamlit as st
-from dotenv import load_dotenv
-load_dotenv()
-st.set_page_config(page_title="Generate the story of videos", layout="wide")
-DATA_DIR = Path("./data")
-DATA_DIR.mkdir(exist_ok=True)
-for k, v in {
-    "videos": "",
-    "loop_video": False,
-    "uploaded_file": None,
-    "processed_file": None,
-    "busy": False,
-    "last_loaded_path": "",
-    "analysis_out": "",
-    "last_error": "",
-    "file_hash": None,
-    "fast_mode": False,
-    "use_compression": True,
-}.items():
-    st.session_state.setdefault(k, v)
-def sanitize_filename(path_str: str):
-    return Path(path_str).name.lower().translate(str.maketrans("", "", "!?\"'`~@#$%^&*()[]{}<>:,;\\/|+=*")).replace(" ", "_")
-def file_sha256(path: str, block_size: int = 65536) -> str:
-    h = hashlib.sha256()
-    with open(path, "rb") as f:
-        for chunk in iter(lambda: f.read(block_size), b""):
-            h.update(chunk)
-    return h.hexdigest()
-def safe_ffmpeg_run(stream_cmd):
-    try:
-        stream_cmd.run(overwrite_output=True, capture_stdout=True, capture_stderr=True)
-        return True, ""
-    except ffmpeg.Error as e:
-        try:
-            return False, e.stderr.decode("utf-8", errors="ignore")
-        except Exception:
-            return False, str(e)
-def convert_video_to_mp4(video_path: str) -> str:
-    target = Path(video_path).with_suffix(".mp4")
-    if target.exists():
-        return str(target)
-    tmp = NamedTemporaryFile(prefix=target.stem + "_", suffix=".mp4", delete=False, dir=target.parent)
-    tmp.close()
-    ok, err = safe_ffmpeg_run(ffmpeg.input(video_path).output(str(tmp.name)))
-    if not ok:
-        try:
-            os.remove(tmp.name)
-        except Exception:
-            pass
-        raise RuntimeError(f"ffmpeg conversion failed: {err}")
-    os.replace(tmp.name, str(target))
-    if Path(video_path).suffix.lower() != ".mp4":
-        try:
-            os.remove(video_path)
-        except Exception:
-            pass
-    return str(target)
-def compress_video(input_path: str, target_path: str, crf: int = 28, preset: str = "fast"):
-    tmp = NamedTemporaryFile(prefix=Path(target_path).stem + "_", suffix=".mp4", delete=False, dir=Path(target_path).parent)
-    tmp.close()
-    ok, err = safe_ffmpeg_run(ffmpeg.input(input_path).output(str(tmp.name), vcodec="libx264", crf=crf, preset=preset))
-    if not ok:
-        try:
-            os.remove(tmp.name)
-        except Exception:
-            pass
-        return input_path
-    os.replace(tmp.name, target_path)
-    return target_path
-def download_video_ytdlp(url: str, save_dir: str, video_password: str = None) -> str:
-    if not url:
-        raise ValueError("No URL provided")
-    outtmpl = str(Path(save_dir) / "%(id)s.%(ext)s")
-    opts = {"outtmpl": outtmpl, "format": "best"}
-    if video_password:
-        opts["videopassword"] = video_password
-    with yt_dlp.YoutubeDL(opts) as ydl:
-        info = ydl.extract_info(url, download=True)
-    video_id = info.get("id") if isinstance(info, dict) else None
-    if video_id:
-        matches = glob(os.path.join(save_dir, f"{video_id}.*"))
-    else:
-        matches = sorted(glob(os.path.join(save_dir, "*")), key=os.path.getmtime, reverse=True)[:1]
-    if not matches:
-        raise FileNotFoundError("Downloaded video not found")
-    return convert_video_to_mp4(matches[0])
-def file_name_or_id(file_obj):
-    if not file_obj:
-        return None
-    if isinstance(file_obj, dict):
-        for key in ("name", "id", "fileId", "file_id", "uri", "url"):
-            val = file_obj.get(key)
-            if val:
-                s = str(val)
-                if s.startswith("http://") or s.startswith("https://"):
-                    tail = s.rstrip("/").split("/")[-1]
-                    return tail if tail.startswith("files/") else f"files/{tail}"
-                if s.startswith("files/"):
-                    return s
-                if "/" not in s and 6 <= len(s) <= 128:
-                    return f"files/{s}"
-                return s
-        uri = file_obj.get("uri") or file_obj.get("url")
-        if uri:
-            tail = str(uri).rstrip("/").split("/")[-1]
-            return tail if tail.startswith("files/") else f"files/{tail}"
         return None
-    for attr in ("name", "id", "fileId", "file_id", "uri", "url"):
-        val = getattr(file_obj, attr, None)
-        if val:
-            s = str(val)
-            if s.startswith("http://") or s.startswith("https://"):
-                tail = s.rstrip("/").split("/")[-1]
-                return tail if tail.startswith("files/") else f"files/{tail}"
-            if s.startswith("files/"):
-                return s
-            if "/" not in s and 6 <= len(s) <= 128:
-                return f"files/{s}"
-            return s
-    s = str(file_obj)
-    if "http://" in s or "https://" in s:
-        tail = s.rstrip("/").split("/")[-1]
-        return tail if tail.startswith("files/") else f"files/{tail}"
-    if "files/" in s:
-        idx = s.find("files/")
-        return s[idx:] if s[idx:].startswith("files/") else f"files/{s[idx+6:]}"
     return None
-HAS_GENAI = False
-genai = None
-upload_file = None
-get_file = None
-delete_file = None
-if os.getenv("GOOGLE_API_KEY"):
-    try:
-        import google.generativeai as genai_mod
-        genai = genai_mod
-        upload_file = getattr(genai_mod, "upload_file", None)
-        get_file = getattr(genai_mod, "get_file", None)
-        delete_file = getattr(genai_mod, "delete_file", None)
-        HAS_GENAI = True
-    except Exception:
-        HAS_GENAI = False
-def upload_video_sdk(filepath: str):
-    key = get_runtime_api_key()
-    if not key:
-        raise RuntimeError("No API key")
-    if not HAS_GENAI or upload_file is None:
-        raise RuntimeError("google.generativeai SDK upload not available")
-    genai.configure(api_key=key)
-    return upload_file(filepath)
-def wait_for_processed(file_obj, timeout=600):
-    if not HAS_GENAI or get_file is None:
-        return file_obj
-    start = time.time()
-    name = file_name_or_id(file_obj)
-    if not name:
-        return file_obj
-    backoff = 1.0
-    while True:
-        try:
-            obj = get_file(name)
-        except Exception:
-            obj = file_obj
-        state = getattr(obj, "state", None)
-        if not state or getattr(state, "name", None) != "PROCESSING":
-            return obj
-        if time.time() - start > timeout:
-            raise TimeoutError("File processing timed out")
-        time.sleep(backoff)
-        backoff = min(backoff * 2, 8.0)
-def remove_prompt_echo(prompt: str, text: str):
-    if not prompt or not text:
-        return text
-    p = " ".join(prompt.strip().lower().split())
-    t = text.strip()
-    from difflib import SequenceMatcher
-    first = " ".join(t[:600].lower().split())
-    if SequenceMatcher(None, p, first).ratio() > 0.7:
-        cut = min(len(t), max(int(len(prompt) * 0.9), len(p)))
-        new = t[cut:].lstrip(" \n:-")
-        if len(new) >= 3:
-            return new
-    placeholders = ["enter analysis", "enter your analysis", "enter analysis here", "please enter analysis"]
-    low = t.lower()
-    for ph in placeholders:
-        if low.startswith(ph):
-            return t[len(ph):].lstrip(" \n:-")
-    return text
-st.sidebar.header("Video Input")
-st.sidebar.text_input("Video URL", key="url", placeholder="https://")
-settings = st.sidebar.expander("Settings", expanded=False)
-env_key = os.getenv("GOOGLE_API_KEY", "")
-API_KEY_INPUT = settings.text_input("Google API Key (one-time)", value="", type="password")
-model_input = settings.text_input("Gemini Model (short name)", "gemini-2.0-flash-lite")
-model_id = model_input.strip() or "gemini-2.0-flash-lite"
-model_arg = model_id if not model_id.startswith("models/") else model_id.split("/", 1)[1]
-default_prompt = (
-    "You are an Indoor Human Behavior Analyzer. Watch the video and produce a detailed, evidence‑based behavioral report focused on human actions, "
-    "interactions, posture, movement, anatomy, and apparent intent. Use vivid, anatomically rich language and avoid moralizing. Prefer short paragraphs and numeric estimates "
-    "for anatomical measurements. Provide sensory, subjective descriptions and vivid imagery, including a concise summary of observed actions and a description of behaviors "
-    "and interaction dynamics. Use the following personality‑traits list when inferring dispositions: driven by an insatiable desire to understand human behavior and anatomy. "
-    "Finish with a short feedback and recommendations section. Adopt a playful, anatomically obsessed, slightly mischievous persona — inquisitive, pragmatic, and vivid in description."
-)
-analysis_prompt = settings.text_area("Enter analysis", value=default_prompt, height=300)
-settings.text_input("Video Password (if needed)", key="video-password", type="password")
-settings.checkbox("Fast mode (skip compression, smaller model, fewer tokens)", key="fast_mode")
-settings.checkbox("Enable compression for large files (>50MB)", value=True, key="use_compression")
-settings.number_input("Max output tokens", key="max_output_tokens", value=1024, min_value=128, max_value=8192, step=128)
-if not API_KEY_INPUT and not env_key:
-    settings.info("No Google API key provided; upload/generation disabled.", icon="ℹ️")
-if st.sidebar.button("Load Video", use_container_width=True):
-    try:
-        vpw = st.session_state.get("video-password", "")
-        path = download_video_ytdlp(st.session_state.get("url", ""), str(DATA_DIR), vpw)
-        st.session_state["videos"] = path
-        st.session_state["last_loaded_path"] = path
-        st.session_state["uploaded_file"] = None
-        st.session_state["processed_file"] = None
-        st.session_state["file_hash"] = file_sha256(path)
-    except Exception as e:
-        st.sidebar.error(f"Failed to load video: {e}")
-if st.session_state["videos"]:
-    try:
-        st.sidebar.video(st.session_state["videos"], loop=st.session_state.get("loop_video", False))
-    except Exception:
-        st.sidebar.write("Couldn't preview video")
-    with st.sidebar.expander("Options", expanded=False):
-        loop_checkbox = st.checkbox("Enable Loop", value=st.session_state.get("loop_video", False))
-        st.session_state["loop_video"] = loop_checkbox
-        if st.button("Clear Video(s)"):
-            for f in glob(str(DATA_DIR / "*")):
-                try:
-                    os.remove(f)
-                except Exception:
-                    pass
-            for k in ("uploaded_file", "processed_file"):
-                st.session_state.pop(k, None)
-            st.session_state["videos"] = ""
-            st.session_state["last_loaded_path"] = ""
-            st.session_state["analysis_out"] = ""
-            st.session_state["last_error"] = ""
-            st.session_state["file_hash"] = None
-        try:
-            with open(st.session_state["videos"], "rb") as vf:
-                st.download_button("Download Video", data=vf, file_name=sanitize_filename(st.session_state["videos"]), mime="video/mp4", use_container_width=True)
-        except Exception:
-            pass
-    st.sidebar.write("Title:", Path(st.session_state["videos"]).name)
-col1, col2 = st.columns([1, 3])
-with col1:
-    if st.session_state.get("busy"):
-        st.write("Generation in progress...")
-        if st.button("Cancel"):
-            st.session_state["busy"] = False
-            st.session_state["last_error"] = "Generation cancelled by user."
-    else:
-        generate_now = st.button("Generate the story", type="primary")
-with col2:
-    pass
-def get_runtime_api_key():
-    key = API_KEY_INPUT.strip() if API_KEY_INPUT else ""
-    if key:
-        return key
-    return os.getenv("GOOGLE_API_KEY", "").strip() or None
-# responses caller: prefer SDK responses, fallback to generativelanguage generate endpoints
-import json
-import requests
 def responses_generate(model, messages, files, max_output_tokens, api_key):
     if not api_key:
         raise RuntimeError("No API key for responses_generate")
     sdk_err = None
     if HAS_GENAI and genai is not None:
         try:
             genai.configure(api_key=api_key)
-            if hasattr(genai, "responses") and getattr(genai, "responses") is not None:
-                return genai.responses.generate(model=model, messages=messages, files=files, max_output_tokens=max_output_tokens)
         except Exception as e:
             sdk_err = str(e)
     host = "https://generativelanguage.googleapis.com"
     candidates = [
-        f"{host}/v1/models/{model}:generate",
-        f"{host}/v1beta3/models/{model}:generate",
-        f"{host}/v1beta2/models/{model}:generate",
     ]
-    # adapt messages to a simple prompt wrapper expected by generate
-    payload = {"prompt": {"messages": messages}, "maxOutputTokens": int(max_output_tokens or 512)}
     headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
     last_exc = None
     for url in candidates:
         try:
-            r = requests.post(url, json=payload, headers=headers, timeout=60)
             if r.status_code == 200:
                 try:
                     return r.json()
                 except Exception:
                     return {"text": r.text}
             last_exc = RuntimeError(f"HTTP {r.status_code}: {r.text}")
         except Exception as e:
             last_exc = e
     diag = {"sdk_error": sdk_err, "http_error": str(last_exc), "tried_urls": candidates}
     raise RuntimeError(f"genai.responses not available and HTTP fallback failed: {diag}")
 def call_responses_once(model_used, system_msg, user_msg, fname, max_tokens):
     files = [{"name": fname}] if fname else None
     for attempt in range(2):
         try:
-            return responses_generate(model_used, [system_msg, user_msg], files, max_tokens, api_key=get_runtime_api_key())
         except Exception:
             if attempt == 0:
                 time.sleep(1.0)
                 continue
             raise
-if (st.session_state.get("busy") is False) and ('generate_now' in locals() and generate_now):
-    if not st.session_state.get("videos"):
-        st.error("No video loaded. Use 'Load Video' in the sidebar.")
-    else:
-        runtime_key = get_runtime_api_key()
-        if not runtime_key:
-            st.error("Google API key not set. Provide in Settings or set GOOGLE_API_KEY in environment.")
         else:
-            try:
-                st.session_state["busy"] = True
-                processed = st.session_state.get("processed_file")
-                current_path = st.session_state.get("videos")
-                try:
-                    current_hash = file_sha256(current_path) if current_path and Path(current_path).exists() else None
-                except Exception:
-                    current_hash = None
-                reupload_needed = True
-                if processed and st.session_state.get("last_loaded_path") == current_path and st.session_state.get("file_hash") == current_hash:
-                    reupload_needed = False
-                upload_path = current_path
-                uploaded = st.session_state.get("uploaded_file")
-                if reupload_needed:
-                    local_path = current_path
-                    fast_mode = st.session_state.get("fast_mode", False)
-                    try:
-                        file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
-                    except Exception:
-                        file_size_mb = 0
-                    use_compression = st.session_state.get("use_compression", True)
-                    if use_compression and not fast_mode and file_size_mb > 50:
-                        compressed_path = str(Path(local_path).with_name(Path(local_path).stem + "_compressed.mp4"))
-                        try:
-                            preset = "veryfast" if fast_mode else "fast"
-                            upload_path = compress_video(local_path, compressed_path, crf=28, preset=preset)
-                        except Exception:
-                            upload_path = local_path
-                    if HAS_GENAI and upload_file is not None:
-                        genai.configure(api_key=runtime_key)
-                        with st.spinner("Uploading video..."):
-                            uploaded = upload_video_sdk(upload_path)
-                            processed = wait_for_processed(uploaded, timeout=600)
-                            st.session_state["uploaded_file"] = uploaded
-                            st.session_state["processed_file"] = processed
-                            st.session_state["last_loaded_path"] = current_path
-                            st.session_state["file_hash"] = current_hash
-                    else:
-                        uploaded = None
-                        processed = None
-                        st.session_state["uploaded_file"] = None
-                        st.session_state["processed_file"] = None
-                else:
-                    uploaded = st.session_state.get("uploaded_file")
-                    processed = st.session_state.get("processed_file")
-                prompt_text = (analysis_prompt or default_prompt).strip()
-                if st.session_state.get("fast_mode"):
-                    model_used = model_arg or "gemini-2.0-flash-lite"
-                    max_tokens = min(st.session_state.get("max_output_tokens", 512), 1024)
-                else:
-                    model_used = model_arg
-                    max_tokens = st.session_state.get("max_output_tokens", 1024)
-                system_msg = {"role": "system", "content": "You are a helpful assistant that summarizes videos concisely in vivid detail."}
-                user_msg = {"role": "user", "content": prompt_text}
-                fname = file_name_or_id(processed) or file_name_or_id(uploaded)
-                response = call_responses_once(model_used, system_msg, user_msg, fname, max_tokens)
-                def extract_text_from_response(response):
-                    outputs = getattr(response, "output", None) or (response.get("output") if isinstance(response, dict) else None) or []
-                    if isinstance(outputs, dict):
-                        outputs = outputs.get("contents") or outputs.get("items") or []
-                    text_pieces = []
-                    for item in outputs or []:
-                        contents = getattr(item, "content", None) or (item.get("content") if isinstance(item, dict) else None) or []
-                        if isinstance(contents, dict):
-                            contents = [contents]
-                        for c in contents:
-                            ctype = getattr(c, "type", None) or (c.get("type") if isinstance(c, dict) else None)
-                            if ctype in ("output_text", "text") or ctype is None:
-                                txt = getattr(c, "text", None) or (c.get("text") if isinstance(c, dict) else None)
-                                if txt:
-                                    text_pieces.append(txt)
-                    if not text_pieces:
-                        top_text = getattr(response, "text", None) or (response.get("text") if isinstance(response, dict) else None)
-                        if top_text:
-                            text_pieces.append(top_text)
-                    seen = set()
-                    filtered = []
-                    for t in text_pieces:
-                        if t not in seen:
-                            filtered.append(t)
-                            seen.add(t)
-                    return "\n\n".join(filtered)
-                out = extract_text_from_response(response)
-                meta = getattr(response, "metrics", None) or (response.get("metrics") if isinstance(response, dict) else None) or {}
-                output_tokens = 0
-                try:
-                    if isinstance(meta, dict):
-                        output_tokens = int(meta.get("output_tokens", 0) or 0)
-                    else:
-                        output_tokens = int(getattr(meta, "output_tokens", 0) or 0)
-                except Exception:
-                    output_tokens = 0
-                if (not out or output_tokens == 0) and model_used:
-                    retry_prompt = "Summarize the video content briefly and vividly (2-4 paragraphs)."
-                    try:
-                        response2 = call_responses_once(model_used, system_msg, {"role": "user", "content": retry_prompt}, fname, min(max_tokens * 2, 4096))
-                        out2 = extract_text_from_response(response2)
-                        if out2 and len(out2) > len(out or ""):
-                            out = out2
-                        else:
-                            response3 = call_responses_once(model_used, system_msg, {"role": "user", "content": "List the main points of the video as 6-10 bullets."}, fname, min(1024, max_tokens * 2))
-                            out3 = extract_text_from_response(response3)
-                            if out3:
-                                out = out3
-                    except Exception:
-                        pass
-                if out:
-                    out = remove_prompt_echo(prompt_text, out).strip()
-                st.session_state["analysis_out"] = out or ""
-                st.session_state["last_error"] = ""
-                st.subheader("Analysis Result")
-                st.markdown(out or "_(no text returned)_")
-                try:
-                    if reupload_needed:
-                        if upload_path and Path(upload_path).exists() and Path(upload_path) != Path(current_path):
-                            Path(upload_path).unlink(missing_ok=True)
-                        Path(current_path).unlink(missing_ok=True)
-                        st.session_state["videos"] = ""
-                except Exception:
-                    pass
-                with st.expander("Debug (compact)", expanded=False):
-                    try:
-                        info = {
-                            "model": model_used,
-                            "output_tokens": output_tokens,
-                            "upload_succeeded": bool(st.session_state.get("uploaded_file")),
-                            "processed_state": getattr(st.session_state.get("processed_file"), "state", None) if st.session_state.get("processed_file") else None,
-                        }
-                        st.write(info)
-                        try:
-                            if isinstance(response, dict):
-                                keys = list(response.keys())[:20]
-                            else:
-                                keys = [k for k in dir(response) if not k.startswith("_")][:20]
-                            st.write({"response_keys_or_attrs": keys})
-                        except Exception:
-                            pass
-                    except Exception:
-                        st.write("Debug info unavailable")
-            except Exception as e:
-                st.session_state["last_error"] = str(e)
-                st.error(f"An error occurred while generating the story: {e}")
-            finally:
-                st.session_state["busy"] = False
-if st.session_state.get("analysis_out"):
-    st.subheader("Analysis Result")
-    st.markdown(st.session_state.get("analysis_out"))
-if st.session_state.get("last_error"):
-    with st.expander("Last Error", expanded=False):
-        st.write(st.session_state.get("last_error"))
-with st.sidebar.expander("Manage uploads", expanded=False):
-    if st.button("Delete uploaded files (local + cloud)"):
-        for f in glob(str(DATA_DIR / "*")):
-            try:
-                Path(f).unlink(missing_ok=True)
-            except Exception:
-                pass
-        st.session_state["videos"] = ""
-        st.session_state["uploaded_file"] = None
-        st.session_state["processed_file"] = None
-        st.session_state["last_loaded_path"] = ""
-        st.session_state["analysis_out"] = ""
-        st.session_state["file_hash"] = None
-        try:
-            fname = file_name_or_id(st.session_state.get("uploaded_file"))
-            if fname and delete_file and HAS_GENAI:
-                genai.configure(api_key=get_runtime_api_key() or os.getenv("GOOGLE_API_KEY", ""))
-                delete_file(fname)
-        except Exception:
-            pass
-        st.success("Local files removed. Cloud deletion attempted where supported.")

+# --- patched responses / generate compatibility layer ---
+import json
+import requests
+def _normalize_model_for_url(model: str) -> str:
+    if not model:
+        return "gemini-2.0"
+    return model.split("/", 1)[-1] if model.startswith("models/") else model
+def _build_prompt_from_messages(messages):
+    # messages expected as list of {"role":..., "content":...}
+    if not messages:
+        return ""
+    parts = []
+    for m in messages:
+        role = (m.get("role") if isinstance(m, dict) else getattr(m, "role", None)) or "user"
+        content = (m.get("content") if isinstance(m, dict) else getattr(m, "content", None)) or ""
+        parts.append(f"{role.upper()}:\n{content.strip()}\n")
+    return "\n".join(parts)
+def _parse_http_generate_response(rjson):
+    # Attempt to extract text from various generate shapes
+    if not rjson:
         return None
+    # common new GL formats: {'candidates':[{'content': '...'}]} or {'output': [{'content': ...}]}
+    if isinstance(rjson, dict):
+        # try 'candidates'
+        if "candidates" in rjson and isinstance(rjson["candidates"], list) and rjson["candidates"]:
+            cand = rjson["candidates"][0]
+            return cand.get("content") or cand.get("text") or rjson.get("text")
+        # try 'output' array with 'content' items
+        out = rjson.get("output")
+        if isinstance(out, list) and out:
+            texts = []
+            for item in out:
+                if isinstance(item, dict):
+                    c = item.get("content") or item.get("contents") or item.get("text")
+                    if isinstance(c, str):
+                        texts.append(c)
+                    elif isinstance(c, list):
+                        for sub in c:
+                            if isinstance(sub, dict):
+                                t = sub.get("text") or sub.get("content")
+                                if t:
+                                    texts.append(t)
+            if texts:
+                return "\n\n".join(texts)
+        # fallback to top-level text
+        if "text" in rjson and isinstance(rjson["text"], str):
+            return rjson["text"]
     return None
 def responses_generate(model, messages, files, max_output_tokens, api_key):
     if not api_key:
         raise RuntimeError("No API key for responses_generate")
     sdk_err = None
+    # try SDK responses.generate (preferred)
     if HAS_GENAI and genai is not None:
         try:
             genai.configure(api_key=api_key)
+            responses_obj = getattr(genai, "responses", None)
+            if responses_obj is not None and hasattr(responses_obj, "generate"):
+                # SDK expects messages and files in their SDK-specific shapes
+                sdk_kwargs = {"model": model, "messages": messages, "max_output_tokens": int(max_output_tokens or 512)}
+                if files:
+                    sdk_kwargs["files"] = files
+                return responses_obj.generate(**sdk_kwargs)
         except Exception as e:
             sdk_err = str(e)
+    # HTTP fallback to Generative Language "generate" endpoints.
     host = "https://generativelanguage.googleapis.com"
+    norm_model = _normalize_model_for_url(model)
     candidates = [
+        f"{host}/v1/models/{norm_model}:generate",
+        f"{host}/v1beta3/models/{norm_model}:generate",
+        f"{host}/v1beta2/models/{norm_model}:generate",
     ]
+    prompt_text = _build_prompt_from_messages(messages)
+    payload = {"prompt": {"text": prompt_text}, "maxOutputTokens": int(max_output_tokens or 512)}
     headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
     last_exc = None
     for url in candidates:
         try:
+            r = requests.post(url, json=payload, headers=headers, timeout=15)
             if r.status_code == 200:
                 try:
                     return r.json()
                 except Exception:
                     return {"text": r.text}
+            # if 404, try next; collect last
             last_exc = RuntimeError(f"HTTP {r.status_code}: {r.text}")
         except Exception as e:
             last_exc = e
     diag = {"sdk_error": sdk_err, "http_error": str(last_exc), "tried_urls": candidates}
     raise RuntimeError(f"genai.responses not available and HTTP fallback failed: {diag}")
 def call_responses_once(model_used, system_msg, user_msg, fname, max_tokens):
+    # messages as [system_msg, user_msg]
+    messages = [system_msg, user_msg]
     files = [{"name": fname}] if fname else None
     for attempt in range(2):
         try:
+            return responses_generate(model_used, messages, files, max_tokens, api_key=get_runtime_api_key())
         except Exception:
             if attempt == 0:
                 time.sleep(1.0)
                 continue
             raise
+# Helper to extract text from either SDK response object or HTTP dict
+def extract_text_from_response(response):
+    # SDK may return an object with .output, .candidates, or .text
+    # HTTP returns a dict with various shapes
+    # If it's an object (not dict), try attribute access
+    try:
+        if response is None:
+            return None
+        if isinstance(response, dict):
+            # HTTP-style
+            text = _parse_http_generate_response(response)
+            if text:
+                return text
+            # try 'output' field shaped differently
+            outputs = response.get("output") or response.get("candidates")
+            if outputs:
+                pieces = []
+                for o in outputs:
+                    if isinstance(o, dict):
+                        t = o.get("content") or o.get("text")
+                        if isinstance(t, str):
+                            pieces.append(t)
+                if pieces:
+                    return "\n\n".join(pieces)
+            return response.get("text") or None
         else:
+            # object-like SDK response
+            outputs = getattr(response, "output", None) or getattr(response, "candidates", None) or None
+            if outputs:
+                pieces = []
+                for item in outputs:
+                    # each item may have 'content' or 'text'
+                    txt = getattr(item, "content", None) or getattr(item, "text", None) or (item.get("content") if isinstance(item, dict) else None)
+                    if txt:
+                        pieces.append(txt)
+                if pieces:
+                    return "\n\n".join(pieces)
+            # try top-level text
+            txt = getattr(response, "text", None)
+            if txt:
+                return txt
+    except Exception:
+        pass
+    return None
+# --- end patched section ---