gnai-creator commited on
Commit
8470b3a
·
verified ·
1 Parent(s): f06ec25

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +651 -651
handler.py CHANGED
@@ -1,651 +1,651 @@
1
- """Custom inference handler for Hugging Face Inference Endpoints.
2
-
3
- This module exposes :class:`EndpointHandler`, the entrypoint used by the
4
- Hugging Face serving stack when ``--task custom`` is selected. The handler
5
- loads the exported Noesis decoder ONNX graph and accepts symbolic intent
6
- vectors (``psi``) along with an optional ``slow_state`` memory tensor. The
7
- outputs mirror the values produced by the training runtime:
8
-
9
- * ``z_out`` – semantic embedding projected back into symbolic space.
10
- * ``choice``, ``pain``, ``memory`` and ``quality`` – diagnostic scalars.
11
- * ``slow_state`` – updated slow memory tensor suitable for recurrent usage.
12
-
13
- The handler is intentionally lightweight so it can run without the rest of the
14
- AletheiaEngine Python package being installed.
15
- """
16
-
17
- from __future__ import annotations
18
-
19
- import importlib
20
- import importlib.util
21
- from dataclasses import dataclass
22
- from pathlib import Path
23
- import hashlib
24
- import re
25
- from typing import Any, Mapping, MutableMapping, Optional, Sequence, Tuple
26
-
27
- import numpy as np
28
-
29
-
30
- _WORD_RE = re.compile(r"\w+", re.UNICODE)
31
-
32
- _INTENT_VOCAB = [
33
- "clarity",
34
- "empathy",
35
- "analysis",
36
- "evidence",
37
- "caution",
38
- "curiosity",
39
- "context",
40
- "precision",
41
- "ethics",
42
- "resilience",
43
- "coherence",
44
- "safety",
45
- "humility",
46
- "breadth",
47
- "depth",
48
- "innovation",
49
- "structure",
50
- "rigour",
51
- "balance",
52
- "confidence",
53
- ]
54
-
55
- _DEFAULT_PROVIDER = "aletheia-noesis"
56
- _DEFAULT_MODEL = "noesis-transformer-onnx"
57
-
58
-
59
- class _TextEncoder:
60
- """Deterministic text → vector encoder.
61
-
62
- The Hugging Face Inference Endpoints frequently pass user prompts as
63
- strings via the ``inputs`` field. The Noesis decoder, however, expects a
64
- symbolic vector (``psi``) as input. To provide a graceful fallback the
65
- handler lazily converts short text prompts into a stable float32 vector by
66
- hashing tokens onto a hypersphere. This mirrors the lightweight
67
- ``TextEncoder256`` implementation bundled with the full AletheiaEngine
68
- package while avoiding a heavy import dependency inside the endpoint
69
- container.
70
- """
71
-
72
- def __init__(self, dim: int) -> None:
73
- self.dim = dim
74
-
75
- @staticmethod
76
- def _tokens(text: str) -> list[str]:
77
- return [tok.lower() for tok in _WORD_RE.findall(text)]
78
-
79
- @staticmethod
80
- def _seed(tok: str) -> int:
81
- # FNV-1a hash for determinism across processes/platforms.
82
- value = 2166136261
83
- for byte in tok.encode("utf-8"):
84
- value ^= byte
85
- value = (value * 16777619) & 0xFFFFFFFF
86
- return int(value)
87
-
88
- def encode(self, text: str) -> np.ndarray:
89
- tokens = self._tokens(text)
90
- if not tokens:
91
- return np.zeros((1, self.dim), dtype=np.float32)
92
-
93
- vecs = []
94
- for tok in tokens:
95
- rs = np.random.RandomState(self._seed(tok))
96
- embedding = rs.normal(0.0, 1.0, size=(self.dim,)).astype(np.float32)
97
- norm = float(np.linalg.norm(embedding)) or 1.0
98
- vecs.append(embedding / norm)
99
-
100
- stacked = np.stack(vecs, axis=0)
101
- pooled = stacked.mean(axis=0, dtype=np.float32, keepdims=True)
102
- pooled_norm = float(np.linalg.norm(pooled)) or 1.0
103
- return pooled / pooled_norm
104
-
105
-
106
- class _SimpleTokenizer:
107
- """Minimal tokenizer mirroring the reference Noesis runtime."""
108
-
109
- def __init__(self) -> None:
110
- special_tokens = ["<pad>", "<bos>", "<eos>", "<unk>"]
111
- alphabet = list("abcdefghijklmnopqrstuvwxyz0123456789 .,;:'\"!?-\n")
112
- self._tokens = special_tokens + alphabet
113
- self._token_to_id = {token: idx for idx, token in enumerate(self._tokens)}
114
-
115
- @property
116
- def pad_token_id(self) -> int:
117
- return 0
118
-
119
- @property
120
- def bos_token_id(self) -> int:
121
- return 1
122
-
123
- @property
124
- def eos_token_id(self) -> int:
125
- return 2
126
-
127
- @property
128
- def unk_token_id(self) -> int:
129
- return 3
130
-
131
- def encode(self, text: str) -> list[int]:
132
- tokens = [self.bos_token_id]
133
- for char in text:
134
- tokens.append(self._token_to_id.get(char.lower(), self.unk_token_id))
135
- tokens.append(self.eos_token_id)
136
- return tokens
137
-
138
- def decode(self, token_ids: Sequence[int]) -> str:
139
- """Convert token IDs back into a text string."""
140
-
141
- characters: list[str] = []
142
- for idx in token_ids:
143
- if idx == self.eos_token_id:
144
- break
145
- if idx in {self.pad_token_id, self.bos_token_id}:
146
- continue
147
-
148
- if 0 <= idx < len(self._tokens):
149
- token = self._tokens[idx]
150
- if token not in {"<pad>", "<bos>", "<eos>", "<unk>"}:
151
- characters.append(token)
152
- else:
153
- characters.append("?")
154
- else:
155
- characters.append("?")
156
-
157
- return "".join(characters)
158
-
159
-
160
- def _summarise_intent(psi: Sequence[float], top_k: int = 4) -> list[str]:
161
- """Convert strongest symbolic dimensions into descriptors."""
162
-
163
- vector = np.asarray(list(psi), dtype=np.float32).reshape(-1)
164
- if vector.size == 0:
165
- return []
166
-
167
- k = min(top_k, vector.size)
168
- magnitudes = np.abs(vector)
169
- top_indices = magnitudes.argsort()[::-1][:k]
170
- summary: list[str] = []
171
- for index in top_indices.tolist():
172
- descriptor = _INTENT_VOCAB[index % len(_INTENT_VOCAB)]
173
- direction = "elevated" if vector[index] >= 0 else "attenuated"
174
- summary.append(f"{descriptor} ({direction}, |ψ|={magnitudes[index]:.2f})")
175
- return summary
176
-
177
-
178
- @dataclass(frozen=True)
179
- class _DecodingParams:
180
- beam_size: int = 6
181
- temperature: float = 0.8
182
- top_p: float = 0.9
183
- max_new_tokens: int = 256
184
- min_new_tokens: int = 16 # Minimum tokens before allowing EOS
185
- stop_quality: float = 0.6
186
-
187
- @classmethod
188
- def from_payload(cls, payload: Mapping[str, Any]) -> "_DecodingParams":
189
- source: Mapping[str, Any] | None = None
190
- if "decoding" in payload and isinstance(payload["decoding"], Mapping):
191
- source = payload["decoding"]
192
- elif "parameters" in payload and isinstance(payload["parameters"], Mapping):
193
- candidate = payload["parameters"].get("decoding")
194
- if isinstance(candidate, Mapping):
195
- source = candidate
196
-
197
- if not source:
198
- return cls()
199
-
200
- kwargs: dict[str, Any] = {}
201
- for field in cls.__dataclass_fields__.keys(): # type: ignore[attr-defined]
202
- if field in source:
203
- try:
204
- kwargs[field] = type(getattr(cls(), field))(source[field])
205
- except (TypeError, ValueError):
206
- continue
207
- return cls(**kwargs)
208
-
209
- def to_dict(self) -> dict[str, Any]:
210
- return {field: getattr(self, field) for field in self.__dataclass_fields__.keys()} # type: ignore[attr-defined]
211
-
212
-
213
- @dataclass(frozen=True)
214
- class _ModelIO:
215
- """Snapshot of ONNX input and output metadata."""
216
-
217
- inputs: tuple[Any, ...]
218
- outputs: tuple[Any, ...]
219
-
220
-
221
- class EndpointHandler:
222
- """Callable endpoint used by Hugging Face to drive inference."""
223
-
224
- def __init__(self, path: str | None = None) -> None:
225
- self.model_dir = Path(path or Path(__file__).parent)
226
- self.session = self._load_session()
227
- self.io = self._capture_io()
228
-
229
- self.primary_input = self.io.inputs[0].name
230
- self.slow_input = self._find_input("slow_state")
231
- self.tokens_input = self._find_input("tokens")
232
- self._primary_dim = self._infer_primary_dim()
233
- self._text_encoder = _TextEncoder(self._primary_dim)
234
- self._tokenizer = _SimpleTokenizer()
235
- self._defaults = {}
236
- skip_inputs = {self.primary_input}
237
- if self.slow_input is not None:
238
- skip_inputs.add(self.slow_input)
239
- if self.tokens_input is not None:
240
- skip_inputs.add(self.tokens_input)
241
- for node in self.io.inputs:
242
- if node.name in skip_inputs:
243
- continue
244
- self._defaults[node.name] = self._zeros_like(node)
245
- if self.slow_input is not None:
246
- self._slow_fallback = self._zeros_like(self._input_map[self.slow_input])
247
- else:
248
- self._slow_fallback = None
249
- if self.tokens_input is not None:
250
- token_node = self._input_map[self.tokens_input]
251
- self._token_sequence_length = self._infer_sequence_length(token_node)
252
- self._token_dtype = self._dtype_for(token_node)
253
- else:
254
- self._token_sequence_length = 0
255
- self._token_dtype = np.int64
256
-
257
- def _load_session(self):
258
- """Load the ONNX session, tolerating alternate filenames."""
259
-
260
- ort = self._import_onnxruntime()
261
- preferred_names = ("model.onnx", "model_infer.onnx")
262
- for name in preferred_names:
263
- candidate = self.model_dir / name
264
- if candidate.exists():
265
- return ort.InferenceSession(str(candidate), providers=["CPUExecutionProvider"])
266
-
267
- available = sorted(str(p.name) for p in self.model_dir.glob("*.onnx"))
268
- if len(available) == 1:
269
- # Fall back to the lone ONNX artefact if it has a non-standard name.
270
- return ort.InferenceSession(str(self.model_dir / available[0]), providers=["CPUExecutionProvider"])
271
-
272
- choices = ", ".join(available) or "<none>"
273
- raise FileNotFoundError(
274
- "Could not locate any of %s in %s (available: %s)"
275
- % (", ".join(preferred_names), self.model_dir, choices)
276
- )
277
-
278
- @staticmethod
279
- def _import_onnxruntime():
280
- """Import :mod:`onnxruntime`, providing a helpful error if unavailable."""
281
-
282
- spec = importlib.util.find_spec("onnxruntime")
283
- if spec is None:
284
- raise ModuleNotFoundError(
285
- "onnxruntime is required to load Noesis decoder ONNX graphs. "
286
- "Install it with 'pip install onnxruntime'."
287
- )
288
- return importlib.import_module("onnxruntime")
289
-
290
- @property
291
- def _input_map(self) -> Mapping[str, Any]:
292
- return {node.name: node for node in self.io.inputs}
293
-
294
- def _capture_io(self) -> _ModelIO:
295
- return _ModelIO(inputs=tuple(self.session.get_inputs()), outputs=tuple(self.session.get_outputs()))
296
-
297
- def _find_input(self, target: str) -> Optional[str]:
298
- target = target.lower()
299
- for node in self.io.inputs:
300
- if node.name.lower() == target:
301
- return node.name
302
- return None
303
-
304
- def _infer_primary_dim(self) -> int:
305
- node = self._input_map[self.primary_input]
306
- for dim in reversed(node.shape):
307
- if isinstance(dim, int) and dim > 0:
308
- return dim
309
- # Conservative default matching TextEncoder256.
310
- return 256
311
-
312
- def _infer_sequence_length(self, node: Any) -> int:
313
- for dim in reversed(getattr(node, "shape", [])):
314
- if isinstance(dim, int) and dim > 0:
315
- return dim
316
- return 1
317
-
318
- @staticmethod
319
- def _onnx_type_to_numpy(type_str: str | None) -> np.dtype:
320
- mapping = {
321
- "tensor(float)": np.float32,
322
- "tensor(float16)": np.float16,
323
- "tensor(double)": np.float64,
324
- "tensor(int64)": np.int64,
325
- "tensor(int32)": np.int32,
326
- "tensor(int16)": np.int16,
327
- "tensor(int8)": np.int8,
328
- "tensor(uint8)": np.uint8,
329
- "tensor(bool)": np.bool_,
330
- }
331
- return mapping.get(type_str, np.float32)
332
-
333
- def _dtype_for(self, node: Any) -> np.dtype:
334
- return self._onnx_type_to_numpy(getattr(node, "type", None))
335
-
336
- def _zeros_like(self, node: Any) -> np.ndarray:
337
- shape: list[int] = []
338
- for dim in node.shape:
339
- if isinstance(dim, int) and dim > 0:
340
- shape.append(dim)
341
- else:
342
- shape.append(1)
343
- dtype = self._dtype_for(node)
344
- return np.zeros(shape, dtype=dtype)
345
-
346
- def _coerce_array(self, value: Any, *, node: Any, allow_empty: bool = False) -> np.ndarray:
347
- dtype = self._dtype_for(node)
348
- array = np.asarray(value, dtype=dtype)
349
- if array.size == 0 and not allow_empty:
350
- raise ValueError("Received an empty array; provide at least one value.")
351
- if array.ndim == 1:
352
- array = np.expand_dims(array, axis=0)
353
- elif array.ndim > 2:
354
- raise ValueError("Expected a 1D or batched 2D array; received shape %s" % (array.shape,))
355
- if array.dtype != dtype:
356
- array = array.astype(dtype, copy=False)
357
- return array
358
-
359
- def _prepare_inputs(self, payload: Mapping[str, Any]) -> MutableMapping[str, np.ndarray]:
360
- psi = payload.get("psi")
361
- if psi is None:
362
- psi = (
363
- payload.get("vector")
364
- or payload.get("psi_s")
365
- or payload.get("inputs")
366
- or payload.get("prompt")
367
- or payload.get("text")
368
- )
369
- if psi is None:
370
- raise KeyError("Payload must include a 'psi' field containing the symbolic vector.")
371
-
372
- primary_node = self._input_map[self.primary_input]
373
- inputs: MutableMapping[str, np.ndarray] = {
374
- self.primary_input: self._vector_from_payload(psi, node=primary_node)
375
- }
376
-
377
- if self.slow_input is not None:
378
- slow_value = payload.get("slow_state") or payload.get("slow") or payload.get("state")
379
- if slow_value is None:
380
- inputs[self.slow_input] = self._slow_fallback.copy()
381
- else:
382
- inputs[self.slow_input] = self._coerce_array(
383
- slow_value,
384
- node=self._input_map[self.slow_input],
385
- allow_empty=True,
386
- )
387
-
388
- for name, default in self._defaults.items():
389
- inputs[name] = default.copy()
390
-
391
- return inputs
392
-
393
- def _vector_from_payload(self, value: Any, *, node: Any) -> np.ndarray:
394
- if isinstance(value, str):
395
- encoded = self._text_encoder.encode(value)
396
- return self._coerce_array(encoded, node=node)
397
-
398
- if isinstance(value, (list, tuple)) and value and all(isinstance(v, str) for v in value):
399
- encoded = self._text_encoder.encode(" ".join(value))
400
- return self._coerce_array(encoded, node=node)
401
-
402
- return self._coerce_array(value, node=node)
403
-
404
- @staticmethod
405
- def _candidate_seed(psi: np.ndarray) -> int:
406
- digest = hashlib.sha1(psi.tobytes()).digest()
407
- return int.from_bytes(digest[:4], "little", signed=False)
408
-
409
- def _token_array_from_ids(self, token_ids: Sequence[int]) -> np.ndarray:
410
- ids = list(token_ids)
411
- if self._token_sequence_length <= 0:
412
- return np.asarray([ids], dtype=self._token_dtype)
413
-
414
- padded = np.full(
415
- (1, self._token_sequence_length),
416
- fill_value=self._tokenizer.pad_token_id,
417
- dtype=self._token_dtype,
418
- )
419
- length = min(len(ids), self._token_sequence_length)
420
- if length > 0:
421
- padded[0, :length] = np.asarray(ids[:length], dtype=self._token_dtype)
422
- return padded
423
-
424
- def _run_candidate(self, base_feed: Mapping[str, np.ndarray], tokens: Sequence[int]) -> list[tuple[Any, np.ndarray]]:
425
- feed = {
426
- name: (value.copy() if isinstance(value, np.ndarray) else value)
427
- for name, value in base_feed.items()
428
- }
429
- if self.tokens_input is not None:
430
- feed[self.tokens_input] = self._token_array_from_ids(tokens)
431
- outputs = self.session.run(None, feed)
432
- return list(zip(self.io.outputs, outputs))
433
-
434
- @staticmethod
435
- def _extract_logits(outputs: Sequence[tuple[Any, np.ndarray]]) -> Optional[np.ndarray]:
436
- for node, value in outputs:
437
- if getattr(node, "name", "").lower() == "logits":
438
- return np.asarray(value, dtype=np.float32)
439
- if outputs:
440
- return np.asarray(outputs[0][1], dtype=np.float32)
441
- return None
442
-
443
- @staticmethod
444
- def _sample_next_token(
445
- logits: np.ndarray,
446
- decoding: _DecodingParams,
447
- rng: np.random.Generator,
448
- ) -> int:
449
- vector = np.asarray(logits, dtype=np.float64).reshape(-1)
450
- temperature = max(float(decoding.temperature), 1e-5)
451
- top_p = float(decoding.top_p)
452
-
453
- if temperature <= 1e-5 or not np.isfinite(vector).any():
454
- return int(int(np.argmax(vector)))
455
-
456
- stabilized = vector / temperature
457
- stabilized -= np.max(stabilized)
458
- probs = np.exp(stabilized)
459
- probs = np.nan_to_num(probs, nan=0.0, posinf=0.0, neginf=0.0)
460
- total = probs.sum()
461
- if total <= 0.0:
462
- return int(np.argmax(vector))
463
- probs /= total
464
-
465
- if top_p <= 0.0:
466
- return int(np.argmax(probs))
467
-
468
- if 0.0 < top_p < 1.0:
469
- sorted_indices = np.argsort(-probs)
470
- sorted_probs = probs[sorted_indices]
471
- cumulative = np.cumsum(sorted_probs)
472
- mask = cumulative <= top_p
473
- if mask.size > 0:
474
- mask[0] = True
475
- filtered_indices = sorted_indices[mask]
476
- filtered_probs = sorted_probs[mask]
477
- filtered_total = filtered_probs.sum()
478
- if filtered_total <= 0.0:
479
- filtered_indices = sorted_indices
480
- filtered_probs = sorted_probs
481
- filtered_total = filtered_probs.sum()
482
- filtered_probs = filtered_probs / filtered_total
483
- choice = rng.choice(len(filtered_indices), p=filtered_probs)
484
- return int(filtered_indices[int(choice)])
485
-
486
- choice = rng.choice(len(probs), p=probs)
487
- return int(choice)
488
-
489
- def _generate_sequence(
490
- self,
491
- base_feed: Mapping[str, np.ndarray],
492
- *,
493
- decoding: _DecodingParams,
494
- seed: int,
495
- ) -> Optional[Tuple[str, list[int], float, list[tuple[Any, np.ndarray]], int]]:
496
- if self.tokens_input is None:
497
- return None
498
-
499
- rng = np.random.default_rng(seed)
500
- token_ids: list[int] = [self._tokenizer.bos_token_id]
501
- quality = float("-inf")
502
- formatted_outputs: list[tuple[Any, np.ndarray]] | None = None
503
- steps = 0
504
-
505
- max_steps = max(decoding.max_new_tokens, 1)
506
- for _ in range(max_steps):
507
- outputs = self._run_candidate(base_feed, token_ids)
508
- logits = self._extract_logits(outputs)
509
- if logits is None:
510
- break
511
- last_index = min(len(token_ids) - 1, logits.shape[1] - 1)
512
- next_logits = logits[0, last_index].copy()
513
-
514
- # Apply strong penalty to EOS token if we haven't reached min_new_tokens
515
- # This reduces the probability of generating EOS prematurely
516
- if steps < decoding.min_new_tokens:
517
- next_logits[self._tokenizer.eos_token_id] -= 10.0
518
-
519
- next_token = self._sample_next_token(next_logits, decoding, rng)
520
- token_ids.append(int(next_token))
521
- steps += 1
522
-
523
- # Check if we generated EOS prematurely and replace with space
524
- if token_ids[-1] == self._tokenizer.eos_token_id and steps < decoding.min_new_tokens:
525
- # Find space token ID (fallback to 'a' if space not found)
526
- space_token_id = self._tokenizer._token_to_id.get(" ", self._tokenizer._token_to_id.get("a", self._tokenizer.unk_token_id))
527
- token_ids[-1] = space_token_id
528
- # Note: In production, add logging here to track how often this happens
529
-
530
- outputs = self._run_candidate(base_feed, token_ids)
531
- formatted_outputs = outputs
532
- quality = self._extract_q_hat(outputs)
533
-
534
- # Only allow EOS break if we've generated at least min_new_tokens (excluding BOS)
535
- if token_ids[-1] == self._tokenizer.eos_token_id and steps >= decoding.min_new_tokens:
536
- break
537
- if self._token_sequence_length > 0 and len(token_ids) >= self._token_sequence_length:
538
- break
539
-
540
- if formatted_outputs is None:
541
- return None
542
-
543
- text = self._tokenizer.decode(token_ids)
544
- return text, token_ids, float(quality), formatted_outputs, steps
545
-
546
- @staticmethod
547
- def _extract_q_hat(outputs: Sequence[tuple[Any, np.ndarray]]) -> float:
548
- for node, value in outputs:
549
- if getattr(node, "name", "").lower() == "q_hat":
550
- return float(np.squeeze(np.asarray(value, dtype=np.float32)))
551
- # Fallback if the node name differs slightly.
552
- for node, value in outputs:
553
- if "q" in getattr(node, "name", "").lower():
554
- return float(np.squeeze(np.asarray(value, dtype=np.float32)))
555
- return float("-inf")
556
-
557
- @staticmethod
558
- def _format_output(name: str, value: np.ndarray) -> Any:
559
- value = np.asarray(value, dtype=np.float32)
560
- value = np.nan_to_num(value, nan=0.0, posinf=0.0, neginf=0.0)
561
- squeezed = np.squeeze(value)
562
- if squeezed.ndim == 0:
563
- return float(squeezed)
564
- return squeezed.tolist()
565
-
566
- def __call__(self, data: Mapping[str, Any]) -> Mapping[str, Any]:
567
- payload = data.get("inputs", data)
568
- if not isinstance(payload, Mapping):
569
- payload = {"psi": payload}
570
-
571
- feed = self._prepare_inputs(payload)
572
- psi_vector = np.asarray(feed[self.primary_input], dtype=np.float32).reshape(-1)
573
- state_constraints = payload.get("constraints")
574
- if not isinstance(state_constraints, Mapping):
575
- state_constraints = None
576
- decoding = _DecodingParams.from_payload(payload)
577
- system_prompt = payload.get("system_prompt")
578
- user_prompt = payload.get("user_prompt")
579
-
580
- descriptors = _summarise_intent(psi_vector)
581
- summary = ", ".join(descriptors) if descriptors else "balanced intent"
582
-
583
- best_candidate: Optional[Tuple[str, list[int], float, list[tuple[Any, np.ndarray]], int]] = None
584
- seeds: list[int] = []
585
-
586
- if self.tokens_input is not None:
587
- beams = max(decoding.beam_size, 1)
588
- base_seed = self._candidate_seed(psi_vector)
589
- for beam_idx in range(beams):
590
- seed = base_seed + beam_idx
591
- seeds.append(seed)
592
- candidate = self._generate_sequence(
593
- feed,
594
- decoding=decoding,
595
- seed=seed,
596
- )
597
- if candidate is None:
598
- continue
599
- text, token_ids, quality, outputs, steps = candidate
600
- if (
601
- best_candidate is None
602
- or quality > best_candidate[2]
603
- ):
604
- best_candidate = candidate
605
- if quality >= decoding.stop_quality:
606
- break
607
-
608
- if best_candidate is None:
609
- outputs = self.session.run(None, feed)
610
- formatted_outputs = list(zip(self.io.outputs, outputs))
611
- quality = self._extract_q_hat(formatted_outputs)
612
- text = f"Symbolic synopsis → {summary}."
613
- token_ids: list[int] = []
614
- steps = 0
615
- else:
616
- text, token_ids, quality, formatted_outputs, steps = best_candidate
617
-
618
- formatted = {
619
- node.name: self._format_output(node.name, value)
620
- for node, value in formatted_outputs
621
- }
622
-
623
- if not np.isfinite(quality):
624
- quality = 0.0
625
- quality = float(quality)
626
-
627
- metadata = {
628
- "summary": summary,
629
- "descriptors": descriptors,
630
- "constraints": state_constraints or {},
631
- "decoding": decoding.to_dict(),
632
- "seeds": seeds,
633
- "steps": steps,
634
- "system_prompt": system_prompt if isinstance(system_prompt, str) else None,
635
- "user_prompt": user_prompt if isinstance(user_prompt, str) else None,
636
- }
637
-
638
- response = {
639
- "text": text,
640
- "tokens": token_ids,
641
- "quality": quality,
642
- "q_hat": quality,
643
- "provider": _DEFAULT_PROVIDER,
644
- "model": _DEFAULT_MODEL,
645
- "metadata": metadata,
646
- }
647
- response.update(formatted)
648
- return response
649
-
650
-
651
- __all__ = ["EndpointHandler"]
 
1
+ """Custom inference handler for Hugging Face Inference Endpoints.
2
+
3
+ This module exposes :class:`EndpointHandler`, the entrypoint used by the
4
+ Hugging Face serving stack when ``--task custom`` is selected. The handler
5
+ loads the exported Noesis decoder ONNX graph and accepts symbolic intent
6
+ vectors (``psi``) along with an optional ``slow_state`` memory tensor. The
7
+ outputs mirror the values produced by the training runtime:
8
+
9
+ * ``z_out`` – semantic embedding projected back into symbolic space.
10
+ * ``choice``, ``pain``, ``memory`` and ``quality`` – diagnostic scalars.
11
+ * ``slow_state`` – updated slow memory tensor suitable for recurrent usage.
12
+
13
+ The handler is intentionally lightweight so it can run without the rest of the
14
+ AletheiaEngine Python package being installed.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import importlib
20
+ import importlib.util
21
+ from dataclasses import dataclass
22
+ from pathlib import Path
23
+ import hashlib
24
+ import re
25
+ from typing import Any, Mapping, MutableMapping, Optional, Sequence, Tuple
26
+
27
+ import numpy as np
28
+
29
+
30
+ _WORD_RE = re.compile(r"\w+", re.UNICODE)
31
+
32
+ _INTENT_VOCAB = [
33
+ "clarity",
34
+ "empathy",
35
+ "analysis",
36
+ "evidence",
37
+ "caution",
38
+ "curiosity",
39
+ "context",
40
+ "precision",
41
+ "ethics",
42
+ "resilience",
43
+ "coherence",
44
+ "safety",
45
+ "humility",
46
+ "breadth",
47
+ "depth",
48
+ "innovation",
49
+ "structure",
50
+ "rigour",
51
+ "balance",
52
+ "confidence",
53
+ ]
54
+
55
+ _DEFAULT_PROVIDER = "aletheia-noesis"
56
+ _DEFAULT_MODEL = "noesis-transformer-onnx"
57
+
58
+
59
+ class _TextEncoder:
60
+ """Deterministic text → vector encoder.
61
+
62
+ The Hugging Face Inference Endpoints frequently pass user prompts as
63
+ strings via the ``inputs`` field. The Noesis decoder, however, expects a
64
+ symbolic vector (``psi``) as input. To provide a graceful fallback the
65
+ handler lazily converts short text prompts into a stable float32 vector by
66
+ hashing tokens onto a hypersphere. This mirrors the lightweight
67
+ ``TextEncoder256`` implementation bundled with the full AletheiaEngine
68
+ package while avoiding a heavy import dependency inside the endpoint
69
+ container.
70
+ """
71
+
72
+ def __init__(self, dim: int) -> None:
73
+ self.dim = dim
74
+
75
+ @staticmethod
76
+ def _tokens(text: str) -> list[str]:
77
+ return [tok.lower() for tok in _WORD_RE.findall(text)]
78
+
79
+ @staticmethod
80
+ def _seed(tok: str) -> int:
81
+ # FNV-1a hash for determinism across processes/platforms.
82
+ value = 2166136261
83
+ for byte in tok.encode("utf-8"):
84
+ value ^= byte
85
+ value = (value * 16777619) & 0xFFFFFFFF
86
+ return int(value)
87
+
88
+ def encode(self, text: str) -> np.ndarray:
89
+ tokens = self._tokens(text)
90
+ if not tokens:
91
+ return np.zeros((1, self.dim), dtype=np.float32)
92
+
93
+ vecs = []
94
+ for tok in tokens:
95
+ rs = np.random.RandomState(self._seed(tok))
96
+ embedding = rs.normal(0.0, 1.0, size=(self.dim,)).astype(np.float32)
97
+ norm = float(np.linalg.norm(embedding)) or 1.0
98
+ vecs.append(embedding / norm)
99
+
100
+ stacked = np.stack(vecs, axis=0)
101
+ pooled = stacked.mean(axis=0, dtype=np.float32, keepdims=True)
102
+ pooled_norm = float(np.linalg.norm(pooled)) or 1.0
103
+ return pooled / pooled_norm
104
+
105
+
106
+ class _SimpleTokenizer:
107
+ """Minimal tokenizer mirroring the reference Noesis runtime."""
108
+
109
+ def __init__(self) -> None:
110
+ special_tokens = ["<pad>", "<bos>", "<eos>", "<unk>"]
111
+ alphabet = list("abcdefghijklmnopqrstuvwxyz0123456789 .,;:'\"!?-\n")
112
+ self._tokens = special_tokens + alphabet
113
+ self._token_to_id = {token: idx for idx, token in enumerate(self._tokens)}
114
+
115
+ @property
116
+ def pad_token_id(self) -> int:
117
+ return 0
118
+
119
+ @property
120
+ def bos_token_id(self) -> int:
121
+ return 1
122
+
123
+ @property
124
+ def eos_token_id(self) -> int:
125
+ return 2
126
+
127
+ @property
128
+ def unk_token_id(self) -> int:
129
+ return 3
130
+
131
+ def encode(self, text: str) -> list[int]:
132
+ tokens = [self.bos_token_id]
133
+ for char in text:
134
+ tokens.append(self._token_to_id.get(char.lower(), self.unk_token_id))
135
+ tokens.append(self.eos_token_id)
136
+ return tokens
137
+
138
+ def decode(self, token_ids: Sequence[int]) -> str:
139
+ """Convert token IDs back into a text string."""
140
+
141
+ characters: list[str] = []
142
+ for idx in token_ids:
143
+ if idx == self.eos_token_id:
144
+ break
145
+ if idx in {self.pad_token_id, self.bos_token_id}:
146
+ continue
147
+
148
+ if 0 <= idx < len(self._tokens):
149
+ token = self._tokens[idx]
150
+ if token not in {"<pad>", "<bos>", "<eos>", "<unk>"}:
151
+ characters.append(token)
152
+ else:
153
+ characters.append("?")
154
+ else:
155
+ characters.append("?")
156
+
157
+ return "".join(characters)
158
+
159
+
160
+ def _summarise_intent(psi: Sequence[float], top_k: int = 4) -> list[str]:
161
+ """Convert strongest symbolic dimensions into descriptors."""
162
+
163
+ vector = np.asarray(list(psi), dtype=np.float32).reshape(-1)
164
+ if vector.size == 0:
165
+ return []
166
+
167
+ k = min(top_k, vector.size)
168
+ magnitudes = np.abs(vector)
169
+ top_indices = magnitudes.argsort()[::-1][:k]
170
+ summary: list[str] = []
171
+ for index in top_indices.tolist():
172
+ descriptor = _INTENT_VOCAB[index % len(_INTENT_VOCAB)]
173
+ direction = "elevated" if vector[index] >= 0 else "attenuated"
174
+ summary.append(f"{descriptor} ({direction}, |ψ|={magnitudes[index]:.2f})")
175
+ return summary
176
+
177
+
178
+ @dataclass(frozen=True)
179
+ class _DecodingParams:
180
+ beam_size: int = 6
181
+ temperature: float = 0.8
182
+ top_p: float = 0.9
183
+ max_new_tokens: int = 1024
184
+ min_new_tokens: int = 16 # Minimum tokens before allowing EOS
185
+ stop_quality: float = 0.6
186
+
187
+ @classmethod
188
+ def from_payload(cls, payload: Mapping[str, Any]) -> "_DecodingParams":
189
+ source: Mapping[str, Any] | None = None
190
+ if "decoding" in payload and isinstance(payload["decoding"], Mapping):
191
+ source = payload["decoding"]
192
+ elif "parameters" in payload and isinstance(payload["parameters"], Mapping):
193
+ candidate = payload["parameters"].get("decoding")
194
+ if isinstance(candidate, Mapping):
195
+ source = candidate
196
+
197
+ if not source:
198
+ return cls()
199
+
200
+ kwargs: dict[str, Any] = {}
201
+ for field in cls.__dataclass_fields__.keys(): # type: ignore[attr-defined]
202
+ if field in source:
203
+ try:
204
+ kwargs[field] = type(getattr(cls(), field))(source[field])
205
+ except (TypeError, ValueError):
206
+ continue
207
+ return cls(**kwargs)
208
+
209
+ def to_dict(self) -> dict[str, Any]:
210
+ return {field: getattr(self, field) for field in self.__dataclass_fields__.keys()} # type: ignore[attr-defined]
211
+
212
+
213
+ @dataclass(frozen=True)
214
+ class _ModelIO:
215
+ """Snapshot of ONNX input and output metadata."""
216
+
217
+ inputs: tuple[Any, ...]
218
+ outputs: tuple[Any, ...]
219
+
220
+
221
+ class EndpointHandler:
222
+ """Callable endpoint used by Hugging Face to drive inference."""
223
+
224
+ def __init__(self, path: str | None = None) -> None:
225
+ self.model_dir = Path(path or Path(__file__).parent)
226
+ self.session = self._load_session()
227
+ self.io = self._capture_io()
228
+
229
+ self.primary_input = self.io.inputs[0].name
230
+ self.slow_input = self._find_input("slow_state")
231
+ self.tokens_input = self._find_input("tokens")
232
+ self._primary_dim = self._infer_primary_dim()
233
+ self._text_encoder = _TextEncoder(self._primary_dim)
234
+ self._tokenizer = _SimpleTokenizer()
235
+ self._defaults = {}
236
+ skip_inputs = {self.primary_input}
237
+ if self.slow_input is not None:
238
+ skip_inputs.add(self.slow_input)
239
+ if self.tokens_input is not None:
240
+ skip_inputs.add(self.tokens_input)
241
+ for node in self.io.inputs:
242
+ if node.name in skip_inputs:
243
+ continue
244
+ self._defaults[node.name] = self._zeros_like(node)
245
+ if self.slow_input is not None:
246
+ self._slow_fallback = self._zeros_like(self._input_map[self.slow_input])
247
+ else:
248
+ self._slow_fallback = None
249
+ if self.tokens_input is not None:
250
+ token_node = self._input_map[self.tokens_input]
251
+ self._token_sequence_length = self._infer_sequence_length(token_node)
252
+ self._token_dtype = self._dtype_for(token_node)
253
+ else:
254
+ self._token_sequence_length = 0
255
+ self._token_dtype = np.int64
256
+
257
+ def _load_session(self):
258
+ """Load the ONNX session, tolerating alternate filenames."""
259
+
260
+ ort = self._import_onnxruntime()
261
+ preferred_names = ("model.onnx", "model_infer.onnx")
262
+ for name in preferred_names:
263
+ candidate = self.model_dir / name
264
+ if candidate.exists():
265
+ return ort.InferenceSession(str(candidate), providers=["CPUExecutionProvider"])
266
+
267
+ available = sorted(str(p.name) for p in self.model_dir.glob("*.onnx"))
268
+ if len(available) == 1:
269
+ # Fall back to the lone ONNX artefact if it has a non-standard name.
270
+ return ort.InferenceSession(str(self.model_dir / available[0]), providers=["CPUExecutionProvider"])
271
+
272
+ choices = ", ".join(available) or "<none>"
273
+ raise FileNotFoundError(
274
+ "Could not locate any of %s in %s (available: %s)"
275
+ % (", ".join(preferred_names), self.model_dir, choices)
276
+ )
277
+
278
+ @staticmethod
279
+ def _import_onnxruntime():
280
+ """Import :mod:`onnxruntime`, providing a helpful error if unavailable."""
281
+
282
+ spec = importlib.util.find_spec("onnxruntime")
283
+ if spec is None:
284
+ raise ModuleNotFoundError(
285
+ "onnxruntime is required to load Noesis decoder ONNX graphs. "
286
+ "Install it with 'pip install onnxruntime'."
287
+ )
288
+ return importlib.import_module("onnxruntime")
289
+
290
+ @property
291
+ def _input_map(self) -> Mapping[str, Any]:
292
+ return {node.name: node for node in self.io.inputs}
293
+
294
+ def _capture_io(self) -> _ModelIO:
295
+ return _ModelIO(inputs=tuple(self.session.get_inputs()), outputs=tuple(self.session.get_outputs()))
296
+
297
+ def _find_input(self, target: str) -> Optional[str]:
298
+ target = target.lower()
299
+ for node in self.io.inputs:
300
+ if node.name.lower() == target:
301
+ return node.name
302
+ return None
303
+
304
+ def _infer_primary_dim(self) -> int:
305
+ node = self._input_map[self.primary_input]
306
+ for dim in reversed(node.shape):
307
+ if isinstance(dim, int) and dim > 0:
308
+ return dim
309
+ # Conservative default matching TextEncoder256.
310
+ return 256
311
+
312
+ def _infer_sequence_length(self, node: Any) -> int:
313
+ for dim in reversed(getattr(node, "shape", [])):
314
+ if isinstance(dim, int) and dim > 0:
315
+ return dim
316
+ return 1
317
+
318
+ @staticmethod
319
+ def _onnx_type_to_numpy(type_str: str | None) -> np.dtype:
320
+ mapping = {
321
+ "tensor(float)": np.float32,
322
+ "tensor(float16)": np.float16,
323
+ "tensor(double)": np.float64,
324
+ "tensor(int64)": np.int64,
325
+ "tensor(int32)": np.int32,
326
+ "tensor(int16)": np.int16,
327
+ "tensor(int8)": np.int8,
328
+ "tensor(uint8)": np.uint8,
329
+ "tensor(bool)": np.bool_,
330
+ }
331
+ return mapping.get(type_str, np.float32)
332
+
333
+ def _dtype_for(self, node: Any) -> np.dtype:
334
+ return self._onnx_type_to_numpy(getattr(node, "type", None))
335
+
336
+ def _zeros_like(self, node: Any) -> np.ndarray:
337
+ shape: list[int] = []
338
+ for dim in node.shape:
339
+ if isinstance(dim, int) and dim > 0:
340
+ shape.append(dim)
341
+ else:
342
+ shape.append(1)
343
+ dtype = self._dtype_for(node)
344
+ return np.zeros(shape, dtype=dtype)
345
+
346
+ def _coerce_array(self, value: Any, *, node: Any, allow_empty: bool = False) -> np.ndarray:
347
+ dtype = self._dtype_for(node)
348
+ array = np.asarray(value, dtype=dtype)
349
+ if array.size == 0 and not allow_empty:
350
+ raise ValueError("Received an empty array; provide at least one value.")
351
+ if array.ndim == 1:
352
+ array = np.expand_dims(array, axis=0)
353
+ elif array.ndim > 2:
354
+ raise ValueError("Expected a 1D or batched 2D array; received shape %s" % (array.shape,))
355
+ if array.dtype != dtype:
356
+ array = array.astype(dtype, copy=False)
357
+ return array
358
+
359
+ def _prepare_inputs(self, payload: Mapping[str, Any]) -> MutableMapping[str, np.ndarray]:
360
+ psi = payload.get("psi")
361
+ if psi is None:
362
+ psi = (
363
+ payload.get("vector")
364
+ or payload.get("psi_s")
365
+ or payload.get("inputs")
366
+ or payload.get("prompt")
367
+ or payload.get("text")
368
+ )
369
+ if psi is None:
370
+ raise KeyError("Payload must include a 'psi' field containing the symbolic vector.")
371
+
372
+ primary_node = self._input_map[self.primary_input]
373
+ inputs: MutableMapping[str, np.ndarray] = {
374
+ self.primary_input: self._vector_from_payload(psi, node=primary_node)
375
+ }
376
+
377
+ if self.slow_input is not None:
378
+ slow_value = payload.get("slow_state") or payload.get("slow") or payload.get("state")
379
+ if slow_value is None:
380
+ inputs[self.slow_input] = self._slow_fallback.copy()
381
+ else:
382
+ inputs[self.slow_input] = self._coerce_array(
383
+ slow_value,
384
+ node=self._input_map[self.slow_input],
385
+ allow_empty=True,
386
+ )
387
+
388
+ for name, default in self._defaults.items():
389
+ inputs[name] = default.copy()
390
+
391
+ return inputs
392
+
393
+ def _vector_from_payload(self, value: Any, *, node: Any) -> np.ndarray:
394
+ if isinstance(value, str):
395
+ encoded = self._text_encoder.encode(value)
396
+ return self._coerce_array(encoded, node=node)
397
+
398
+ if isinstance(value, (list, tuple)) and value and all(isinstance(v, str) for v in value):
399
+ encoded = self._text_encoder.encode(" ".join(value))
400
+ return self._coerce_array(encoded, node=node)
401
+
402
+ return self._coerce_array(value, node=node)
403
+
404
+ @staticmethod
405
+ def _candidate_seed(psi: np.ndarray) -> int:
406
+ digest = hashlib.sha1(psi.tobytes()).digest()
407
+ return int.from_bytes(digest[:4], "little", signed=False)
408
+
409
+ def _token_array_from_ids(self, token_ids: Sequence[int]) -> np.ndarray:
410
+ ids = list(token_ids)
411
+ if self._token_sequence_length <= 0:
412
+ return np.asarray([ids], dtype=self._token_dtype)
413
+
414
+ padded = np.full(
415
+ (1, self._token_sequence_length),
416
+ fill_value=self._tokenizer.pad_token_id,
417
+ dtype=self._token_dtype,
418
+ )
419
+ length = min(len(ids), self._token_sequence_length)
420
+ if length > 0:
421
+ padded[0, :length] = np.asarray(ids[:length], dtype=self._token_dtype)
422
+ return padded
423
+
424
+ def _run_candidate(self, base_feed: Mapping[str, np.ndarray], tokens: Sequence[int]) -> list[tuple[Any, np.ndarray]]:
425
+ feed = {
426
+ name: (value.copy() if isinstance(value, np.ndarray) else value)
427
+ for name, value in base_feed.items()
428
+ }
429
+ if self.tokens_input is not None:
430
+ feed[self.tokens_input] = self._token_array_from_ids(tokens)
431
+ outputs = self.session.run(None, feed)
432
+ return list(zip(self.io.outputs, outputs))
433
+
434
+ @staticmethod
435
+ def _extract_logits(outputs: Sequence[tuple[Any, np.ndarray]]) -> Optional[np.ndarray]:
436
+ for node, value in outputs:
437
+ if getattr(node, "name", "").lower() == "logits":
438
+ return np.asarray(value, dtype=np.float32)
439
+ if outputs:
440
+ return np.asarray(outputs[0][1], dtype=np.float32)
441
+ return None
442
+
443
+ @staticmethod
444
+ def _sample_next_token(
445
+ logits: np.ndarray,
446
+ decoding: _DecodingParams,
447
+ rng: np.random.Generator,
448
+ ) -> int:
449
+ vector = np.asarray(logits, dtype=np.float64).reshape(-1)
450
+ temperature = max(float(decoding.temperature), 1e-5)
451
+ top_p = float(decoding.top_p)
452
+
453
+ if temperature <= 1e-5 or not np.isfinite(vector).any():
454
+ return int(int(np.argmax(vector)))
455
+
456
+ stabilized = vector / temperature
457
+ stabilized -= np.max(stabilized)
458
+ probs = np.exp(stabilized)
459
+ probs = np.nan_to_num(probs, nan=0.0, posinf=0.0, neginf=0.0)
460
+ total = probs.sum()
461
+ if total <= 0.0:
462
+ return int(np.argmax(vector))
463
+ probs /= total
464
+
465
+ if top_p <= 0.0:
466
+ return int(np.argmax(probs))
467
+
468
+ if 0.0 < top_p < 1.0:
469
+ sorted_indices = np.argsort(-probs)
470
+ sorted_probs = probs[sorted_indices]
471
+ cumulative = np.cumsum(sorted_probs)
472
+ mask = cumulative <= top_p
473
+ if mask.size > 0:
474
+ mask[0] = True
475
+ filtered_indices = sorted_indices[mask]
476
+ filtered_probs = sorted_probs[mask]
477
+ filtered_total = filtered_probs.sum()
478
+ if filtered_total <= 0.0:
479
+ filtered_indices = sorted_indices
480
+ filtered_probs = sorted_probs
481
+ filtered_total = filtered_probs.sum()
482
+ filtered_probs = filtered_probs / filtered_total
483
+ choice = rng.choice(len(filtered_indices), p=filtered_probs)
484
+ return int(filtered_indices[int(choice)])
485
+
486
+ choice = rng.choice(len(probs), p=probs)
487
+ return int(choice)
488
+
489
+ def _generate_sequence(
490
+ self,
491
+ base_feed: Mapping[str, np.ndarray],
492
+ *,
493
+ decoding: _DecodingParams,
494
+ seed: int,
495
+ ) -> Optional[Tuple[str, list[int], float, list[tuple[Any, np.ndarray]], int]]:
496
+ if self.tokens_input is None:
497
+ return None
498
+
499
+ rng = np.random.default_rng(seed)
500
+ token_ids: list[int] = [self._tokenizer.bos_token_id]
501
+ quality = float("-inf")
502
+ formatted_outputs: list[tuple[Any, np.ndarray]] | None = None
503
+ steps = 0
504
+
505
+ max_steps = max(decoding.max_new_tokens, 1)
506
+ for _ in range(max_steps):
507
+ outputs = self._run_candidate(base_feed, token_ids)
508
+ logits = self._extract_logits(outputs)
509
+ if logits is None:
510
+ break
511
+ last_index = min(len(token_ids) - 1, logits.shape[1] - 1)
512
+ next_logits = logits[0, last_index].copy()
513
+
514
+ # Apply strong penalty to EOS token if we haven't reached min_new_tokens
515
+ # This reduces the probability of generating EOS prematurely
516
+ if steps < decoding.min_new_tokens:
517
+ next_logits[self._tokenizer.eos_token_id] -= 10.0
518
+
519
+ next_token = self._sample_next_token(next_logits, decoding, rng)
520
+ token_ids.append(int(next_token))
521
+ steps += 1
522
+
523
+ # Check if we generated EOS prematurely and replace with space
524
+ if token_ids[-1] == self._tokenizer.eos_token_id and steps < decoding.min_new_tokens:
525
+ # Find space token ID (fallback to 'a' if space not found)
526
+ space_token_id = self._tokenizer._token_to_id.get(" ", self._tokenizer._token_to_id.get("a", self._tokenizer.unk_token_id))
527
+ token_ids[-1] = space_token_id
528
+ # Note: In production, add logging here to track how often this happens
529
+
530
+ outputs = self._run_candidate(base_feed, token_ids)
531
+ formatted_outputs = outputs
532
+ quality = self._extract_q_hat(outputs)
533
+
534
+ # Only allow EOS break if we've generated at least min_new_tokens (excluding BOS)
535
+ if token_ids[-1] == self._tokenizer.eos_token_id and steps >= decoding.min_new_tokens:
536
+ break
537
+ if self._token_sequence_length > 0 and len(token_ids) >= self._token_sequence_length:
538
+ break
539
+
540
+ if formatted_outputs is None:
541
+ return None
542
+
543
+ text = self._tokenizer.decode(token_ids)
544
+ return text, token_ids, float(quality), formatted_outputs, steps
545
+
546
+ @staticmethod
547
+ def _extract_q_hat(outputs: Sequence[tuple[Any, np.ndarray]]) -> float:
548
+ for node, value in outputs:
549
+ if getattr(node, "name", "").lower() == "q_hat":
550
+ return float(np.squeeze(np.asarray(value, dtype=np.float32)))
551
+ # Fallback if the node name differs slightly.
552
+ for node, value in outputs:
553
+ if "q" in getattr(node, "name", "").lower():
554
+ return float(np.squeeze(np.asarray(value, dtype=np.float32)))
555
+ return float("-inf")
556
+
557
+ @staticmethod
558
+ def _format_output(name: str, value: np.ndarray) -> Any:
559
+ value = np.asarray(value, dtype=np.float32)
560
+ value = np.nan_to_num(value, nan=0.0, posinf=0.0, neginf=0.0)
561
+ squeezed = np.squeeze(value)
562
+ if squeezed.ndim == 0:
563
+ return float(squeezed)
564
+ return squeezed.tolist()
565
+
566
+ def __call__(self, data: Mapping[str, Any]) -> Mapping[str, Any]:
567
+ payload = data.get("inputs", data)
568
+ if not isinstance(payload, Mapping):
569
+ payload = {"psi": payload}
570
+
571
+ feed = self._prepare_inputs(payload)
572
+ psi_vector = np.asarray(feed[self.primary_input], dtype=np.float32).reshape(-1)
573
+ state_constraints = payload.get("constraints")
574
+ if not isinstance(state_constraints, Mapping):
575
+ state_constraints = None
576
+ decoding = _DecodingParams.from_payload(payload)
577
+ system_prompt = payload.get("system_prompt")
578
+ user_prompt = payload.get("user_prompt")
579
+
580
+ descriptors = _summarise_intent(psi_vector)
581
+ summary = ", ".join(descriptors) if descriptors else "balanced intent"
582
+
583
+ best_candidate: Optional[Tuple[str, list[int], float, list[tuple[Any, np.ndarray]], int]] = None
584
+ seeds: list[int] = []
585
+
586
+ if self.tokens_input is not None:
587
+ beams = max(decoding.beam_size, 1)
588
+ base_seed = self._candidate_seed(psi_vector)
589
+ for beam_idx in range(beams):
590
+ seed = base_seed + beam_idx
591
+ seeds.append(seed)
592
+ candidate = self._generate_sequence(
593
+ feed,
594
+ decoding=decoding,
595
+ seed=seed,
596
+ )
597
+ if candidate is None:
598
+ continue
599
+ text, token_ids, quality, outputs, steps = candidate
600
+ if (
601
+ best_candidate is None
602
+ or quality > best_candidate[2]
603
+ ):
604
+ best_candidate = candidate
605
+ if quality >= decoding.stop_quality:
606
+ break
607
+
608
+ if best_candidate is None:
609
+ outputs = self.session.run(None, feed)
610
+ formatted_outputs = list(zip(self.io.outputs, outputs))
611
+ quality = self._extract_q_hat(formatted_outputs)
612
+ text = f"Symbolic synopsis → {summary}."
613
+ token_ids: list[int] = []
614
+ steps = 0
615
+ else:
616
+ text, token_ids, quality, formatted_outputs, steps = best_candidate
617
+
618
+ formatted = {
619
+ node.name: self._format_output(node.name, value)
620
+ for node, value in formatted_outputs
621
+ }
622
+
623
+ if not np.isfinite(quality):
624
+ quality = 0.0
625
+ quality = float(quality)
626
+
627
+ metadata = {
628
+ "summary": summary,
629
+ "descriptors": descriptors,
630
+ "constraints": state_constraints or {},
631
+ "decoding": decoding.to_dict(),
632
+ "seeds": seeds,
633
+ "steps": steps,
634
+ "system_prompt": system_prompt if isinstance(system_prompt, str) else None,
635
+ "user_prompt": user_prompt if isinstance(user_prompt, str) else None,
636
+ }
637
+
638
+ response = {
639
+ "text": text,
640
+ "tokens": token_ids,
641
+ "quality": quality,
642
+ "q_hat": quality,
643
+ "provider": _DEFAULT_PROVIDER,
644
+ "model": _DEFAULT_MODEL,
645
+ "metadata": metadata,
646
+ }
647
+ response.update(formatted)
648
+ return response
649
+
650
+
651
+ __all__ = ["EndpointHandler"]