mila2030
/

gemini2-5-pro

Model card Files Files and versions

xet

Community

mila2030 commited on Nov 2, 2025

Commit

0248d33

verified ·

1 Parent(s): aacab3f

Update handler.py

Browse files

Files changed (1) hide show

handler.py +82 -80

handler.py CHANGED Viewed

@@ -1,21 +1,20 @@
-# handler.py — HF-compliant, stateless Gemini proxy
-from typing import Any, Dict, List, Union
-import os
 import google.generativeai as genai
-# Config via HF Endpoint → Settings → Environment Variables
-MODEL          = os.getenv("GEMINI_MODEL", "gemini-1.5-flash")   # safe default
-TEMPERATURE    = float(os.getenv("TEMPERATURE", "0.7"))
-TOP_P          = float(os.getenv("TOP_P", "0.95"))
-MAX_TOKENS     = int(os.getenv("MAX_OUTPUT_TOKENS", "512"))
-SYSTEM_PROMPT  = os.getenv("SYSTEM_PROMPT", "You are a helpful assistant.")
 def _extract_text(resp: Any) -> str:
-    # 1) standard property
     if getattr(resp, "text", None):
         return resp.text
-    # 2) candidates/parts
     try:
         for c in getattr(resp, "candidates", []) or []:
             content = getattr(c, "content", None)
@@ -27,93 +26,96 @@ def _extract_text(resp: Any) -> str:
         pass
     return ""
-def _last_user_from_messages(msgs: List[Dict[str, Any]]) -> str:
-    for m in reversed(msgs or []):
-        if (m.get("role") or "user").lower() == "user":
-            return str(m.get("content", "")).strip()
-    return ""
 class EndpointHandler:
     def __init__(self, path: str = ""):
-        api_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
-        if not api_key:
-            self._init_error = "Missing GEMINI_API_KEY/GOOGLE_API_KEY in Endpoint Environment Variables."
-            print("[handler:init] ERROR:", self._init_error, flush=True)
             return
-        self._init_error = None
-        genai.configure(api_key=api_key)
-        # Proper system prompt
-        self.model = genai.GenerativeModel(MODEL, system_instruction=SYSTEM_PROMPT)
-        # Optional: slightly relaxed safety to avoid silent blocks of normal prompts
-        self.safety_settings = None
         try:
-            from google.generativeai.types import HarmBlockThreshold, HarmCategory
-            self.safety_settings = {
-                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH,
-                HarmCategory.HARM_CATEGORY_HARASSMENT:  HarmBlockThreshold.BLOCK_ONLY_HIGH,
-                HarmCategory.HARM_CATEGORY_SEXUAL:      HarmBlockThreshold.BLOCK_ONLY_HIGH,
-                HarmCategory.HARM_CATEGORY_DANGEROUS:   HarmBlockThreshold.BLOCK_ONLY_HIGH,
-            }
         except Exception as e:
-            print("[handler:init] safety settings skipped:", repr(e), flush=True)
-        print(f"[handler:init] OK MODEL={MODEL}", flush=True)
-    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
-        if self._init_error:
-            return {"text": "", "debug": {"error": self._init_error}}
         try:
-            print("[handler:call] payload:", data, flush=True)
-            # HF guarantees top-level `inputs`
-            inputs = data.get("inputs")
-            # Accept either:
-            # A) {"inputs":"plain text"}
-            # B) {"inputs":{"messages":[{"role":"user","content":"..."}]}}
-            # (compat) Also accept top-level "messages" if present.
-            if isinstance(inputs, str):
-                user_text = inputs.strip()
-            elif isinstance(inputs, dict) and "messages" in inputs:
-                user_text = _last_user_from_messages(inputs.get("messages"))
-            elif "messages" in data:
-                user_text = _last_user_from_messages(data.get("messages"))
-            else:
-                user_text = ""
-            if not user_text:
-                return {"text": "", "debug": {"note": "Empty prompt."}}
-            gen_cfg = {
-                "temperature": TEMPERATURE,
-                "top_p": TOP_P,
-                "max_output_tokens": MAX_TOKENS,
             }
-            print("[handler:call] generate_content:", repr(user_text[:160]), flush=True)
             resp = self.model.generate_content(
                 user_text,
-                generation_config=gen_cfg,
-                safety_settings=self.safety_settings
             )
-            print("[handler:call] raw resp:", repr(resp), flush=True)
-            text = _extract_text(resp)
-            if text:
-                return {"text": text}
-            # Diagnostics if empty
-            debug = {}
             try:
                 fr = [getattr(c, "finish_reason", None) for c in (resp.candidates or [])]
-                if fr:
-                    debug["finish_reasons"] = fr
             except Exception:
                 pass
-            return {"text": "", "debug": debug or {"note": "Empty model text"}}
         except Exception as e:
-            print("[handler:call] EXC:", repr(e), flush=True)
-            return {"text": "", "debug": {"exception": str(e)}}

+# handler.py — self-diagnostic, always returns a clear result
+from typing import Any, Dict, Union
+import os, socket, json, time
+import requests
 import google.generativeai as genai
+MODEL         = os.getenv("GEMINI_MODEL", "gemini-1.5-flash")  # safe default
+TEMPERATURE   = float(os.getenv("TEMPERATURE", "0.7"))
+TOP_P         = float(os.getenv("TOP_P", "0.95"))
+MAX_TOKENS    = int(os.getenv("MAX_OUTPUT_TOKENS", "512"))
+SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", "You are a helpful assistant.")
+GOOGLE_HOST   = "generativelanguage.googleapis.com"
 def _extract_text(resp: Any) -> str:
     if getattr(resp, "text", None):
         return resp.text
     try:
         for c in getattr(resp, "candidates", []) or []:
             content = getattr(c, "content", None)
         pass
     return ""
 class EndpointHandler:
     def __init__(self, path: str = ""):
+        self.diag = {"stage":"init", "ok":True, "notes":[]}
+        self.api_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
+        if not self.api_key:
+            self.diag["ok"] = False
+            self.diag["error"] = "Missing GEMINI_API_KEY (Endpoint → Settings → Environment Variables)."
+            print("[diag] ENV KEY MISSING", flush=True)
             return
+        # DNS test
         try:
+            ip = socket.gethostbyname(GOOGLE_HOST)
+            self.diag["dns_ip"] = ip
         except Exception as e:
+            self.diag["ok"] = False
+            self.diag["dns_error"] = repr(e)
+            print("[diag] DNS FAIL:", repr(e), flush=True)
+        # HTTPS reachability (no auth) — should return 404/403 but prove TLS/egress works
         try:
+            r = requests.get(f"https://{GOOGLE_HOST}/", timeout=5)
+            self.diag["https_status"] = r.status_code
+        except Exception as e:
+            self.diag["ok"] = False
+            self.diag["https_error"] = repr(e)
+            print("[diag] HTTPS FAIL:", repr(e), flush=True)
+        if not self.diag["ok"]:
+            return
+        try:
+            genai.configure(api_key=self.api_key)
+            self.model = genai.GenerativeModel(MODEL, system_instruction=SYSTEM_PROMPT)
+            self.diag["model_ready"] = True
+        except Exception as e:
+            self.diag["ok"] = False
+            self.diag["model_init_error"] = repr(e)
+            print("[diag] MODEL INIT FAIL:", repr(e), flush=True)
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        # Normalize input
+        inputs = data.get("inputs")
+        if isinstance(inputs, str):
+            user_text = inputs.strip()
+        elif isinstance(inputs, dict) and "messages" in inputs:
+            # accept chat shape, use last user content
+            user_text = ""
+            for m in reversed(inputs.get("messages") or []):
+                if (m.get("role") or "user").lower() == "user":
+                    user_text = str(m.get("content", "")).strip()
+                    break
+        else:
+            user_text = ""
+        if not user_text:
+            return {"text":"", "debug":{"where":"handler","note":"Empty prompt received at endpoint."}}
+        # If init failed, return diagnostics + echo so frontend proves it works
+        if not getattr(self, "model", None):
+            return {
+                "text":"",
+                "debug":{
+                    "where":"init",
+                    "diag": self.diag,
+                    "echo": user_text[:160]
+                }
             }
+        # Try Gemini
+        try:
+            t0 = time.time()
             resp = self.model.generate_content(
                 user_text,
+                generation_config={
+                    "temperature": TEMPERATURE,
+                    "top_p": TOP_P,
+                    "max_output_tokens": MAX_TOKENS
+                },
             )
+            dt = round((time.time() - t0)*1000)
+            txt = _extract_text(resp)
+            if txt:
+                return {"text": txt, "debug":{"latency_ms": dt}}
+            # no text -> return finish reasons for clarity
+            fr = []
             try:
                 fr = [getattr(c, "finish_reason", None) for c in (resp.candidates or [])]
             except Exception:
                 pass
+            return {"text":"", "debug":{"where":"gemini_empty","finish_reasons":fr,"latency_ms":dt,"echo":user_text[:160]}}
         except Exception as e:
+            return {"text":"", "debug":{"where":"gemini_exception","exception":repr(e),"echo":user_text[:160]}}