Upload folder using huggingface_hub
Browse files- handler.py +144 -0
- requirements.txt +2 -0
handler.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Custom inference handler for Hugging Face Inference Endpoints.
|
| 2 |
+
|
| 3 |
+
This module exposes :class:`EndpointHandler`, the entrypoint used by the
|
| 4 |
+
Hugging Face serving stack when ``--task custom`` is selected. The handler
|
| 5 |
+
loads the exported Noesis decoder ONNX graph and accepts symbolic intent
|
| 6 |
+
vectors (``psi``) along with an optional ``slow_state`` memory tensor. The
|
| 7 |
+
outputs mirror the values produced by the training runtime:
|
| 8 |
+
|
| 9 |
+
* ``z_out`` – semantic embedding projected back into symbolic space.
|
| 10 |
+
* ``choice``, ``pain``, ``memory`` and ``quality`` – diagnostic scalars.
|
| 11 |
+
* ``slow_state`` – updated slow memory tensor suitable for recurrent usage.
|
| 12 |
+
|
| 13 |
+
The handler is intentionally lightweight so it can run without the rest of the
|
| 14 |
+
AletheiaEngine Python package being installed.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
from __future__ import annotations
|
| 18 |
+
|
| 19 |
+
from dataclasses import dataclass
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
from typing import Any, Mapping, MutableMapping, Optional
|
| 22 |
+
|
| 23 |
+
import numpy as np
|
| 24 |
+
import onnxruntime as ort
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@dataclass(frozen=True)
|
| 28 |
+
class _ModelIO:
|
| 29 |
+
"""Snapshot of ONNX input and output metadata."""
|
| 30 |
+
|
| 31 |
+
inputs: tuple[ort.NodeArg, ...]
|
| 32 |
+
outputs: tuple[ort.NodeArg, ...]
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class EndpointHandler:
|
| 36 |
+
"""Callable endpoint used by Hugging Face to drive inference."""
|
| 37 |
+
|
| 38 |
+
def __init__(self, path: str | None = None) -> None:
|
| 39 |
+
self.model_dir = Path(path or Path(__file__).parent)
|
| 40 |
+
self.session = self._load_session()
|
| 41 |
+
self.io = self._capture_io()
|
| 42 |
+
|
| 43 |
+
self.primary_input = self.io.inputs[0].name
|
| 44 |
+
self.slow_input = self._find_input("slow_state")
|
| 45 |
+
self._defaults = {
|
| 46 |
+
node.name: self._zeros_like(node)
|
| 47 |
+
for node in self.io.inputs
|
| 48 |
+
if node.name not in {self.primary_input, self.slow_input}
|
| 49 |
+
}
|
| 50 |
+
if self.slow_input is not None:
|
| 51 |
+
self._slow_fallback = self._zeros_like(self._input_map[self.slow_input])
|
| 52 |
+
else:
|
| 53 |
+
self._slow_fallback = None
|
| 54 |
+
|
| 55 |
+
def _load_session(self) -> ort.InferenceSession:
|
| 56 |
+
model_path = self.model_dir / "model.onnx"
|
| 57 |
+
if not model_path.exists():
|
| 58 |
+
available = ", ".join(sorted(str(p.name) for p in self.model_dir.glob("*.onnx"))) or "<none>"
|
| 59 |
+
raise FileNotFoundError(
|
| 60 |
+
"Could not locate 'model.onnx' in %s (available: %s)" % (self.model_dir, available)
|
| 61 |
+
)
|
| 62 |
+
return ort.InferenceSession(str(model_path), providers=["CPUExecutionProvider"])
|
| 63 |
+
|
| 64 |
+
@property
|
| 65 |
+
def _input_map(self) -> Mapping[str, ort.NodeArg]:
|
| 66 |
+
return {node.name: node for node in self.io.inputs}
|
| 67 |
+
|
| 68 |
+
def _capture_io(self) -> _ModelIO:
|
| 69 |
+
return _ModelIO(inputs=tuple(self.session.get_inputs()), outputs=tuple(self.session.get_outputs()))
|
| 70 |
+
|
| 71 |
+
def _find_input(self, target: str) -> Optional[str]:
|
| 72 |
+
target = target.lower()
|
| 73 |
+
for node in self.io.inputs:
|
| 74 |
+
if node.name.lower() == target:
|
| 75 |
+
return node.name
|
| 76 |
+
return None
|
| 77 |
+
|
| 78 |
+
@staticmethod
|
| 79 |
+
def _zeros_like(node: ort.NodeArg) -> np.ndarray:
|
| 80 |
+
shape: list[int] = []
|
| 81 |
+
for dim in node.shape:
|
| 82 |
+
if isinstance(dim, int) and dim > 0:
|
| 83 |
+
shape.append(dim)
|
| 84 |
+
else:
|
| 85 |
+
shape.append(1)
|
| 86 |
+
return np.zeros(shape, dtype=np.float32)
|
| 87 |
+
|
| 88 |
+
@staticmethod
|
| 89 |
+
def _coerce_array(value: Any, *, allow_empty: bool = False) -> np.ndarray:
|
| 90 |
+
array = np.asarray(value, dtype=np.float32)
|
| 91 |
+
if array.size == 0 and not allow_empty:
|
| 92 |
+
raise ValueError("Received an empty array; provide at least one value.")
|
| 93 |
+
if array.ndim == 1:
|
| 94 |
+
array = np.expand_dims(array, axis=0)
|
| 95 |
+
elif array.ndim > 2:
|
| 96 |
+
raise ValueError("Expected a 1D or batched 2D array; received shape %s" % (array.shape,))
|
| 97 |
+
return array
|
| 98 |
+
|
| 99 |
+
def _prepare_inputs(self, payload: Mapping[str, Any]) -> MutableMapping[str, np.ndarray]:
|
| 100 |
+
psi = payload.get("psi")
|
| 101 |
+
if psi is None:
|
| 102 |
+
psi = payload.get("vector") or payload.get("psi_s") or payload.get("inputs")
|
| 103 |
+
if psi is None:
|
| 104 |
+
raise KeyError("Payload must include a 'psi' field containing the symbolic vector.")
|
| 105 |
+
|
| 106 |
+
inputs: MutableMapping[str, np.ndarray] = {self.primary_input: self._coerce_array(psi)}
|
| 107 |
+
|
| 108 |
+
if self.slow_input is not None:
|
| 109 |
+
slow_value = payload.get("slow_state") or payload.get("slow") or payload.get("state")
|
| 110 |
+
if slow_value is None:
|
| 111 |
+
inputs[self.slow_input] = self._slow_fallback.copy()
|
| 112 |
+
else:
|
| 113 |
+
inputs[self.slow_input] = self._coerce_array(slow_value, allow_empty=True)
|
| 114 |
+
|
| 115 |
+
for name, default in self._defaults.items():
|
| 116 |
+
inputs[name] = default.copy()
|
| 117 |
+
|
| 118 |
+
return inputs
|
| 119 |
+
|
| 120 |
+
@staticmethod
|
| 121 |
+
def _format_output(name: str, value: np.ndarray) -> Any:
|
| 122 |
+
value = np.asarray(value, dtype=np.float32)
|
| 123 |
+
value = np.nan_to_num(value, nan=0.0, posinf=0.0, neginf=0.0)
|
| 124 |
+
squeezed = np.squeeze(value)
|
| 125 |
+
if squeezed.ndim == 0:
|
| 126 |
+
return float(squeezed)
|
| 127 |
+
return squeezed.tolist()
|
| 128 |
+
|
| 129 |
+
def __call__(self, data: Mapping[str, Any]) -> Mapping[str, Any]:
|
| 130 |
+
payload = data.get("inputs", data)
|
| 131 |
+
if not isinstance(payload, Mapping):
|
| 132 |
+
payload = {"psi": payload}
|
| 133 |
+
|
| 134 |
+
feed = self._prepare_inputs(payload)
|
| 135 |
+
outputs = self.session.run(None, feed)
|
| 136 |
+
|
| 137 |
+
result = {
|
| 138 |
+
node.name: self._format_output(node.name, value)
|
| 139 |
+
for node, value in zip(self.io.outputs, outputs)
|
| 140 |
+
}
|
| 141 |
+
return result
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
__all__ = ["EndpointHandler"]
|
requirements.txt
CHANGED
|
@@ -3,3 +3,5 @@ uvicorn[standard]>=0.23
|
|
| 3 |
pydantic>=2.6
|
| 4 |
torch>=2.1
|
| 5 |
numpy>=1.24
|
|
|
|
|
|
|
|
|
| 3 |
pydantic>=2.6
|
| 4 |
torch>=2.1
|
| 5 |
numpy>=1.24
|
| 6 |
+
onnxruntime
|
| 7 |
+
numpy
|