mila2030
/

gemini2-5-pro

Model card Files Files and versions

xet

Community

mila2030 commited on Nov 2, 2025

Commit

a029b6f

verified ·

1 Parent(s): cc24ffb

Update handler.py

Browse files

Files changed (1) hide show

handler.py +40 -114

handler.py CHANGED Viewed

@@ -1,17 +1,8 @@
-# handler.py — custom Hugging Face handler compatible with your Flask shape
-# Env needed in Endpoint Settings → Environment Variables:
-#   GEMINI_API_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxx
-# Optional:
-#   GEMINI_MODEL=gemini-2.5-pro
-#   TEMPERATURE=0.7
-#   TOP_P=0.95
-#   MAX_OUTPUT_TOKENS=1024
-#   SYSTEM_PROMPT=You are a helpful assistant.
 from typing import Any, Dict, List, Optional
-import os, time
 import google.generativeai as genai
 API_KEY = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
 MODEL_NAME = os.getenv("GEMINI_MODEL", "gemini-2.5-pro")
 TEMPERATURE = float(os.getenv("TEMPERATURE", "0.7"))
@@ -19,125 +10,60 @@ TOP_P = float(os.getenv("TOP_P", "0.95"))
 MAX_TOKENS = int(os.getenv("MAX_OUTPUT_TOKENS", "1024"))
 SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", "You are a helpful assistant.")
-GEN_CFG = {"temperature": TEMPERATURE, "top_p": TOP_P, "max_output_tokens": MAX_TOKENS}
 def _extract_text(resp: Any) -> str:
     if getattr(resp, "text", None):
         return resp.text
     try:
-        for c in getattr(resp, "candidates", []) or []:
-            content = getattr(c, "content", None)
-            for p in getattr(content, "parts", []) or []:
-                t = getattr(p, "text", None)
-                if t: return t
     except Exception:
         pass
     return ""
-def _to_gemini_history(history: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-    """
-    Supports your Flask history: [{role:'user'|'model', parts:[{text:'...'}]}]
-    and OpenAI-style: [{role:'user'|'assistant', content:'...'}]
-    """
-    out: List[Dict[str, Any]] = []
-    for m in history or []:
-        role = (m.get("role") or "").lower()
-        if "parts" in m:  # already Gemini-like
-            parts = m.get("parts") or []
-            role = role if role in ("user","model") else "user"
-            out.append({"role": role, "parts": parts})
-        else:
-            content = m.get("content", "")
-            if not isinstance(content, str):
-                content = str(content)
-            role = "model" if role == "assistant" else ("user" if role != "model" else "model")
-            out.append({"role": role, "parts": [{"text": content}]})
-    return out
-def _pick_user_text(data: Dict[str, Any]) -> Optional[str]:
-    # Supports: {"message":"..."}, or {"inputs":"..."}, or {"inputs":{"messages":[...]}}
-    if isinstance(data.get("message"), str) and data["message"].strip():
-        return data["message"].strip()
-    if isinstance(data.get("inputs"), str) and data["inputs"].strip():
-        return data["inputs"].strip()
-    x = data.get("inputs")
-    if isinstance(x, dict):
-        msgs = x.get("messages") or []
-        for m in reversed(msgs):
-            if (m.get("role","user").lower()) == "user":
-                c = m.get("content","")
-                return c if isinstance(c,str) else str(c)
-    return None
 class EndpointHandler:
     def __init__(self, path: str = ""):
-        if not API_KEY:
-            self._init_error = "Missing GEMINI_API_KEY/GOOGLE_API_KEY."
-            print("[handler:init] ERROR:", self._init_error, flush=True)
-            return
-        self._init_error = None
-        genai.configure(api_key=API_KEY)
-        # Try model name, then a prefixed fallback
-        try:
-            self.model = genai.GenerativeModel(MODEL_NAME, system_instruction=SYSTEM_PROMPT)
-        except Exception as e1:
-            alt = MODEL_NAME.replace("models/", "") if MODEL_NAME.startswith("models/") else f"models/{MODEL_NAME}"
-            try:
-                self.model = genai.GenerativeModel(alt, system_instruction=SYSTEM_PROMPT)
-                print(f"[handler:init] Fallback model name used: {alt}", flush=True)
-            except Exception as e2:
-                self._init_error = f"Model init failed: {repr(e1)} | fallback: {repr(e2)}"
-                print("[handler:init] ERROR:", self._init_error, flush=True)
-                return
-        # Warm-up (non-fatal if it fails)
-        try:
-            t0 = time.time()
-            _ = self.model.generate_content("ping", generation_config={"max_output_tokens": 4})
-            print("[handler:init] warm-up OK in", round((time.time()-t0)*1000), "ms", flush=True)
-        except Exception as e:
-            print("[handler:init] warm-up failed (non-fatal):", repr(e), flush=True)
-    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
-        if getattr(self, "_init_error", None):
-            return {"text": "", "debug": {"where": "init", "error": self._init_error}}
-        # Per-request overrides
-        params = data.get("parameters") or {}
         gen_cfg = {
-            "temperature": float(params.get("temperature", GEN_CFG["temperature"])),
-            "top_p": float(params.get("top_p", GEN_CFG["top_p"])),
-            "max_output_tokens": int(params.get("max_output_tokens", GEN_CFG["max_output_tokens"])),
         }
-        user_text = _pick_user_text(data) or ""
-        if not user_text:
-            return {"text": "", "debug": {"where": "input", "error": "Empty prompt"}}
-        gemini_history = _to_gemini_history(data.get("history") or [])
-        # Try once; tiny retry handles cold-start/transient
-        last_err = None
-        for attempt in range(2):
-            try:
-                if gemini_history:
-                    chat = self.model.start_chat(history=gemini_history)
-                    resp = chat.send_message(user_text, generation_config=gen_cfg)
-                else:
-                    resp = self.model.generate_content(user_text, generation_config=gen_cfg)
-                txt = _extract_text(resp)
-                if txt:
-                    return {"text": txt}
-                fin = []
-                try:
-                    fin = [getattr(c, "finish_reason", None) for c in (resp.candidates or [])]
-                except Exception:
-                    pass
-                return {"text": "", "debug": {"where": "empty_text", "finish_reasons": fin}}
-            except Exception as e:
-                last_err = repr(e)
-                time.sleep(0.35)
-        return {"text": "", "debug": {"where": "exception", "exception": last_err}}

 from typing import Any, Dict, List, Optional
+import os
 import google.generativeai as genai
+# === Load environment variables ===
 API_KEY = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
 MODEL_NAME = os.getenv("GEMINI_MODEL", "gemini-2.5-pro")
 TEMPERATURE = float(os.getenv("TEMPERATURE", "0.7"))
 MAX_TOKENS = int(os.getenv("MAX_OUTPUT_TOKENS", "1024"))
 SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", "You are a helpful assistant.")
+genai.configure(api_key=API_KEY)
 def _extract_text(resp: Any) -> str:
     if getattr(resp, "text", None):
         return resp.text
     try:
+        for c in getattr(resp, "candidates", []):
+            for p in getattr(c, "content", {}).get("parts", []):
+                if p.get("text"):
+                    return p["text"]
     except Exception:
         pass
     return ""
 class EndpointHandler:
     def __init__(self, path: str = ""):
+        print("[handler:init] Loading Gemini model...", flush=True)
+        self.model = genai.GenerativeModel(
+            MODEL_NAME,
+            system_instruction=SYSTEM_PROMPT
+        )
+        print("[handler:init] Model ready ✅", flush=True)
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        print("[handler:call] Incoming data:", data, flush=True)
+        # 1️⃣ Try flexible input parsing
+        text = data.get("message") or data.get("inputs") or ""
+        if isinstance(text, dict):
+            text = text.get("content") or ""
+        text = str(text).strip()
+        if not text:
+            return {"text": "(empty input)"}
+        params = data.get("parameters", {})
         gen_cfg = {
+            "temperature": float(params.get("temperature", TEMPERATURE)),
+            "top_p": float(params.get("top_p", TOP_P)),
+            "max_output_tokens": int(params.get("max_output_tokens", MAX_TOKENS))
         }
+        history = data.get("history") or []
+        try:
+            if history:
+                chat = self.model.start_chat(history=history)
+                resp = chat.send_message(text, generation_config=gen_cfg)
+            else:
+                resp = self.model.generate_content(text, generation_config=gen_cfg)
+            reply = _extract_text(resp)
+            if not reply:
+                reply = "(no response)"
+            return {"text": reply}
+        except Exception as e:
+            print("[handler:error]", e, flush=True)
+            return {"text": f"Error: {e}"}