Raiff1982 commited on
Commit
c835dfc
·
verified ·
1 Parent(s): a0d48a1

Create handler.py

Browse files
Files changed (1) hide show
  1. handler.py +216 -0
handler.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ from typing import Any, Dict, List, Optional
4
+
5
+ try:
6
+ import torch
7
+ except Exception:
8
+ torch = None
9
+
10
+ try:
11
+ from transformers import AutoModelForCausalLM, AutoTokenizer
12
+ except Exception:
13
+ AutoModelForCausalLM = None
14
+ AutoTokenizer = None
15
+
16
+ # Prefer advanced core when available; fall back to the CLI Codette path
17
+ try:
18
+ from src.components.ai_core import AICore
19
+ except Exception:
20
+ try:
21
+ from components.ai_core import AICore # type: ignore
22
+ except Exception:
23
+ AICore = None # type: ignore
24
+
25
+ try:
26
+ from codette_new import Codette
27
+ except Exception:
28
+ Codette = None # type: ignore
29
+
30
+
31
+ class EndpointHandler:
32
+ """
33
+ Hugging Face Inference Toolkit handler for Codette.
34
+
35
+ The handler prefers the multi-perspective AICore path (authoritative entry point for advanced
36
+ usage) and falls back to the lightweight Codette CLI path if the core cannot be initialized.
37
+ """
38
+
39
+ def __init__(self, path: str = ""):
40
+ self.logger = logging.getLogger(__name__)
41
+ self.model_path = path or os.getenv("CODETTE_MODEL_PATH") or os.getenv("CODETTE_MODEL_ID", "")
42
+ self.device = "cpu"
43
+ self.ai_core: Optional["AICore"] = None
44
+ self.codette: Optional["Codette"] = None
45
+ self.model = None
46
+ self.tokenizer = None
47
+ self.initialized_with = "uninitialized"
48
+
49
+ self._initialize_core()
50
+
51
+ def _initialize_core(self) -> None:
52
+ """Initialize the preferred AICore backend, then fall back to Codette."""
53
+ if AICore and AutoTokenizer and AutoModelForCausalLM:
54
+ try:
55
+ self.ai_core = AICore()
56
+ self._load_model_into_core()
57
+ self.initialized_with = "ai_core"
58
+ return
59
+ except Exception as exc:
60
+ self.logger.warning("AICore initialization failed, falling back to Codette: %s", exc)
61
+ self.ai_core = None
62
+
63
+ if Codette:
64
+ try:
65
+ self.codette = Codette(user_name="EndpointUser")
66
+ self.initialized_with = "codette"
67
+ return
68
+ except Exception as exc:
69
+ self.logger.error("Failed to initialize Codette fallback: %s", exc)
70
+
71
+ raise RuntimeError("No available inference backend for EndpointHandler.")
72
+
73
+ def _load_model_into_core(self) -> None:
74
+ """Load tokenizer/model from the provided path and attach them to AICore."""
75
+ assert self.ai_core is not None, "AICore must be initialized before loading the model."
76
+
77
+ model_id = self.model_path or self.ai_core.model_id or "gpt2"
78
+ self.logger.info("Loading model for AICore from path: %s", model_id)
79
+
80
+ self.tokenizer = AutoTokenizer.from_pretrained(model_id)
81
+ if self.tokenizer.pad_token is None:
82
+ self.tokenizer.pad_token = self.tokenizer.eos_token or self.tokenizer.unk_token
83
+
84
+ pad_token_id = self.tokenizer.pad_token_id
85
+ self.model = AutoModelForCausalLM.from_pretrained(model_id, pad_token_id=pad_token_id)
86
+
87
+ if torch and torch.cuda.is_available():
88
+ self.device = "cuda"
89
+ self.model = self.model.to(self.device)
90
+ else:
91
+ self.device = "cpu"
92
+
93
+ self.model.eval()
94
+
95
+ self.ai_core.model = self.model
96
+ self.ai_core.tokenizer = self.tokenizer
97
+ self.ai_core.model_id = model_id
98
+
99
+ self._initialize_cocoons()
100
+
101
+ def _initialize_cocoons(self) -> None:
102
+ """Attach the cocoon manager so quantum state persistence remains traceable."""
103
+ assert self.ai_core is not None, "AICore must be initialized before configuring cocoons."
104
+
105
+ try:
106
+ from src.utils.cocoon_manager import CocoonManager
107
+ except Exception:
108
+ try:
109
+ from utils.cocoon_manager import CocoonManager # type: ignore
110
+ except Exception:
111
+ self.logger.info("CocoonManager unavailable; continuing without persisted cocoons.")
112
+ return
113
+
114
+ try:
115
+ manager = CocoonManager("./cocoons")
116
+ manager.load_cocoons()
117
+ self.ai_core.cocoon_manager = manager
118
+ latest_state = manager.get_latest_quantum_state()
119
+ if isinstance(latest_state, dict):
120
+ self.ai_core.quantum_state = latest_state
121
+ except Exception as exc:
122
+ self.logger.warning("CocoonManager initialization failed: %s", exc)
123
+
124
+ def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
125
+ if not isinstance(data, dict):
126
+ raise ValueError("Request payload must be a dictionary.")
127
+
128
+ raw_inputs = data.get("inputs", None)
129
+ parameters = data.get("parameters", {}) or {}
130
+ user_name = (
131
+ data.get("user_name")
132
+ or data.get("user")
133
+ or parameters.get("user_name")
134
+ or "EndpointUser"
135
+ )
136
+ max_new_tokens = int(parameters.get("max_new_tokens", data.get("max_new_tokens", 150)))
137
+ temperature = float(parameters.get("temperature", data.get("temperature", 0.3)))
138
+ use_aegis = bool(parameters.get("use_aegis", data.get("use_aegis", True)))
139
+
140
+ inputs = self._normalize_inputs(raw_inputs)
141
+ responses: List[Dict[str, Any]] = []
142
+
143
+ for prompt in inputs:
144
+ generated_text = self._generate_response(
145
+ prompt=prompt,
146
+ user_name=user_name,
147
+ max_new_tokens=max_new_tokens,
148
+ temperature=temperature,
149
+ use_aegis=use_aegis,
150
+ )
151
+
152
+ responses.append(
153
+ {
154
+ "generated_text": generated_text,
155
+ "engine": self.initialized_with,
156
+ "user": user_name,
157
+ }
158
+ )
159
+
160
+ return responses
161
+
162
+ def _normalize_inputs(self, raw_inputs: Any) -> List[str]:
163
+ if raw_inputs is None:
164
+ raise ValueError("`inputs` field is required.")
165
+
166
+ if isinstance(raw_inputs, str):
167
+ candidate = raw_inputs.strip()
168
+ if not candidate:
169
+ raise ValueError("`inputs` cannot be an empty string.")
170
+ return [candidate]
171
+
172
+ if isinstance(raw_inputs, list):
173
+ cleaned: List[str] = []
174
+ for entry in raw_inputs:
175
+ if not isinstance(entry, str):
176
+ raise ValueError("All entries in `inputs` list must be strings.")
177
+ item = entry.strip()
178
+ if not item:
179
+ raise ValueError("Entries in `inputs` list cannot be empty.")
180
+ cleaned.append(item)
181
+ if not cleaned:
182
+ raise ValueError("`inputs` list must contain at least one non-empty string.")
183
+ return cleaned
184
+
185
+ raise ValueError("`inputs` must be a string or list of strings.")
186
+
187
+ def _generate_response(
188
+ self,
189
+ prompt: str,
190
+ user_name: str,
191
+ max_new_tokens: int,
192
+ temperature: float,
193
+ use_aegis: bool,
194
+ ) -> str:
195
+ if self.ai_core:
196
+ try:
197
+ max_length = max(64, min(max_new_tokens + 64, 1024))
198
+ return self.ai_core.generate_text(
199
+ prompt=prompt,
200
+ max_length=max_length,
201
+ temperature=temperature,
202
+ perspective=None,
203
+ use_aegis=use_aegis,
204
+ )
205
+ except Exception as exc:
206
+ self.logger.warning("AICore generation failed; retrying with Codette: %s", exc)
207
+
208
+ if self.codette:
209
+ try:
210
+ if hasattr(self.codette, "user_name"):
211
+ self.codette.user_name = user_name
212
+ return self.codette.respond(prompt)
213
+ except Exception as exc:
214
+ self.logger.error("Codette fallback failed: %s", exc)
215
+
216
+ raise RuntimeError("No available backend to generate a response.")