mila2030 commited on
Commit
a029b6f
·
verified ·
1 Parent(s): cc24ffb

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +40 -114
handler.py CHANGED
@@ -1,17 +1,8 @@
1
- # handler.py — custom Hugging Face handler compatible with your Flask shape
2
- # Env needed in Endpoint Settings → Environment Variables:
3
- # GEMINI_API_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxx
4
- # Optional:
5
- # GEMINI_MODEL=gemini-2.5-pro
6
- # TEMPERATURE=0.7
7
- # TOP_P=0.95
8
- # MAX_OUTPUT_TOKENS=1024
9
- # SYSTEM_PROMPT=You are a helpful assistant.
10
-
11
  from typing import Any, Dict, List, Optional
12
- import os, time
13
  import google.generativeai as genai
14
 
 
15
  API_KEY = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
16
  MODEL_NAME = os.getenv("GEMINI_MODEL", "gemini-2.5-pro")
17
  TEMPERATURE = float(os.getenv("TEMPERATURE", "0.7"))
@@ -19,125 +10,60 @@ TOP_P = float(os.getenv("TOP_P", "0.95"))
19
  MAX_TOKENS = int(os.getenv("MAX_OUTPUT_TOKENS", "1024"))
20
  SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", "You are a helpful assistant.")
21
 
22
- GEN_CFG = {"temperature": TEMPERATURE, "top_p": TOP_P, "max_output_tokens": MAX_TOKENS}
23
 
24
  def _extract_text(resp: Any) -> str:
25
  if getattr(resp, "text", None):
26
  return resp.text
27
  try:
28
- for c in getattr(resp, "candidates", []) or []:
29
- content = getattr(c, "content", None)
30
- for p in getattr(content, "parts", []) or []:
31
- t = getattr(p, "text", None)
32
- if t: return t
33
  except Exception:
34
  pass
35
  return ""
36
 
37
- def _to_gemini_history(history: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
38
- """
39
- Supports your Flask history: [{role:'user'|'model', parts:[{text:'...'}]}]
40
- and OpenAI-style: [{role:'user'|'assistant', content:'...'}]
41
- """
42
- out: List[Dict[str, Any]] = []
43
- for m in history or []:
44
- role = (m.get("role") or "").lower()
45
- if "parts" in m: # already Gemini-like
46
- parts = m.get("parts") or []
47
- role = role if role in ("user","model") else "user"
48
- out.append({"role": role, "parts": parts})
49
- else:
50
- content = m.get("content", "")
51
- if not isinstance(content, str):
52
- content = str(content)
53
- role = "model" if role == "assistant" else ("user" if role != "model" else "model")
54
- out.append({"role": role, "parts": [{"text": content}]})
55
- return out
56
-
57
- def _pick_user_text(data: Dict[str, Any]) -> Optional[str]:
58
- # Supports: {"message":"..."}, or {"inputs":"..."}, or {"inputs":{"messages":[...]}}
59
- if isinstance(data.get("message"), str) and data["message"].strip():
60
- return data["message"].strip()
61
- if isinstance(data.get("inputs"), str) and data["inputs"].strip():
62
- return data["inputs"].strip()
63
- x = data.get("inputs")
64
- if isinstance(x, dict):
65
- msgs = x.get("messages") or []
66
- for m in reversed(msgs):
67
- if (m.get("role","user").lower()) == "user":
68
- c = m.get("content","")
69
- return c if isinstance(c,str) else str(c)
70
- return None
71
-
72
  class EndpointHandler:
73
  def __init__(self, path: str = ""):
74
- if not API_KEY:
75
- self._init_error = "Missing GEMINI_API_KEY/GOOGLE_API_KEY."
76
- print("[handler:init] ERROR:", self._init_error, flush=True)
77
- return
 
 
78
 
79
- self._init_error = None
80
- genai.configure(api_key=API_KEY)
81
 
82
- # Try model name, then a prefixed fallback
83
- try:
84
- self.model = genai.GenerativeModel(MODEL_NAME, system_instruction=SYSTEM_PROMPT)
85
- except Exception as e1:
86
- alt = MODEL_NAME.replace("models/", "") if MODEL_NAME.startswith("models/") else f"models/{MODEL_NAME}"
87
- try:
88
- self.model = genai.GenerativeModel(alt, system_instruction=SYSTEM_PROMPT)
89
- print(f"[handler:init] Fallback model name used: {alt}", flush=True)
90
- except Exception as e2:
91
- self._init_error = f"Model init failed: {repr(e1)} | fallback: {repr(e2)}"
92
- print("[handler:init] ERROR:", self._init_error, flush=True)
93
- return
94
 
95
- # Warm-up (non-fatal if it fails)
96
- try:
97
- t0 = time.time()
98
- _ = self.model.generate_content("ping", generation_config={"max_output_tokens": 4})
99
- print("[handler:init] warm-up OK in", round((time.time()-t0)*1000), "ms", flush=True)
100
- except Exception as e:
101
- print("[handler:init] warm-up failed (non-fatal):", repr(e), flush=True)
102
 
103
- def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
104
- if getattr(self, "_init_error", None):
105
- return {"text": "", "debug": {"where": "init", "error": self._init_error}}
106
-
107
- # Per-request overrides
108
- params = data.get("parameters") or {}
109
  gen_cfg = {
110
- "temperature": float(params.get("temperature", GEN_CFG["temperature"])),
111
- "top_p": float(params.get("top_p", GEN_CFG["top_p"])),
112
- "max_output_tokens": int(params.get("max_output_tokens", GEN_CFG["max_output_tokens"])),
113
  }
114
 
115
- user_text = _pick_user_text(data) or ""
116
- if not user_text:
117
- return {"text": "", "debug": {"where": "input", "error": "Empty prompt"}}
118
-
119
- gemini_history = _to_gemini_history(data.get("history") or [])
120
 
121
- # Try once; tiny retry handles cold-start/transient
122
- last_err = None
123
- for attempt in range(2):
124
- try:
125
- if gemini_history:
126
- chat = self.model.start_chat(history=gemini_history)
127
- resp = chat.send_message(user_text, generation_config=gen_cfg)
128
- else:
129
- resp = self.model.generate_content(user_text, generation_config=gen_cfg)
130
- txt = _extract_text(resp)
131
- if txt:
132
- return {"text": txt}
133
- fin = []
134
- try:
135
- fin = [getattr(c, "finish_reason", None) for c in (resp.candidates or [])]
136
- except Exception:
137
- pass
138
- return {"text": "", "debug": {"where": "empty_text", "finish_reasons": fin}}
139
- except Exception as e:
140
- last_err = repr(e)
141
- time.sleep(0.35)
142
-
143
- return {"text": "", "debug": {"where": "exception", "exception": last_err}}
 
 
 
 
 
 
 
 
 
 
 
1
  from typing import Any, Dict, List, Optional
2
+ import os
3
  import google.generativeai as genai
4
 
5
+ # === Load environment variables ===
6
  API_KEY = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
7
  MODEL_NAME = os.getenv("GEMINI_MODEL", "gemini-2.5-pro")
8
  TEMPERATURE = float(os.getenv("TEMPERATURE", "0.7"))
 
10
  MAX_TOKENS = int(os.getenv("MAX_OUTPUT_TOKENS", "1024"))
11
  SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", "You are a helpful assistant.")
12
 
13
+ genai.configure(api_key=API_KEY)
14
 
15
  def _extract_text(resp: Any) -> str:
16
  if getattr(resp, "text", None):
17
  return resp.text
18
  try:
19
+ for c in getattr(resp, "candidates", []):
20
+ for p in getattr(c, "content", {}).get("parts", []):
21
+ if p.get("text"):
22
+ return p["text"]
 
23
  except Exception:
24
  pass
25
  return ""
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  class EndpointHandler:
28
  def __init__(self, path: str = ""):
29
+ print("[handler:init] Loading Gemini model...", flush=True)
30
+ self.model = genai.GenerativeModel(
31
+ MODEL_NAME,
32
+ system_instruction=SYSTEM_PROMPT
33
+ )
34
+ print("[handler:init] Model ready ✅", flush=True)
35
 
36
+ def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
37
+ print("[handler:call] Incoming data:", data, flush=True)
38
 
39
+ # 1️⃣ Try flexible input parsing
40
+ text = data.get("message") or data.get("inputs") or ""
41
+ if isinstance(text, dict):
42
+ text = text.get("content") or ""
43
+ text = str(text).strip()
 
 
 
 
 
 
 
44
 
45
+ if not text:
46
+ return {"text": "(empty input)"}
 
 
 
 
 
47
 
48
+ params = data.get("parameters", {})
 
 
 
 
 
49
  gen_cfg = {
50
+ "temperature": float(params.get("temperature", TEMPERATURE)),
51
+ "top_p": float(params.get("top_p", TOP_P)),
52
+ "max_output_tokens": int(params.get("max_output_tokens", MAX_TOKENS))
53
  }
54
 
55
+ history = data.get("history") or []
 
 
 
 
56
 
57
+ try:
58
+ if history:
59
+ chat = self.model.start_chat(history=history)
60
+ resp = chat.send_message(text, generation_config=gen_cfg)
61
+ else:
62
+ resp = self.model.generate_content(text, generation_config=gen_cfg)
63
+ reply = _extract_text(resp)
64
+ if not reply:
65
+ reply = "(no response)"
66
+ return {"text": reply}
67
+ except Exception as e:
68
+ print("[handler:error]", e, flush=True)
69
+ return {"text": f"Error: {e}"}