gnai-creator commited on
Commit
2a9a820
·
verified ·
1 Parent(s): d99d36a

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. .gitattributes +1 -0
  2. handler.py +144 -144
  3. noesis_model.onnx.data +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ noesis_model.onnx.data filter=lfs diff=lfs merge=lfs -text
handler.py CHANGED
@@ -1,144 +1,144 @@
1
- """Custom inference handler for Hugging Face Inference Endpoints.
2
-
3
- This module exposes :class:`EndpointHandler`, the entrypoint used by the
4
- Hugging Face serving stack when ``--task custom`` is selected. The handler
5
- loads the exported Noesis decoder ONNX graph and accepts symbolic intent
6
- vectors (``psi``) along with an optional ``slow_state`` memory tensor. The
7
- outputs mirror the values produced by the training runtime:
8
-
9
- * ``z_out`` – semantic embedding projected back into symbolic space.
10
- * ``choice``, ``pain``, ``memory`` and ``quality`` – diagnostic scalars.
11
- * ``slow_state`` – updated slow memory tensor suitable for recurrent usage.
12
-
13
- The handler is intentionally lightweight so it can run without the rest of the
14
- AletheiaEngine Python package being installed.
15
- """
16
-
17
- from __future__ import annotations
18
-
19
- from dataclasses import dataclass
20
- from pathlib import Path
21
- from typing import Any, Mapping, MutableMapping, Optional
22
-
23
- import numpy as np
24
- import onnxruntime as ort
25
-
26
-
27
- @dataclass(frozen=True)
28
- class _ModelIO:
29
- """Snapshot of ONNX input and output metadata."""
30
-
31
- inputs: tuple[ort.NodeArg, ...]
32
- outputs: tuple[ort.NodeArg, ...]
33
-
34
-
35
- class EndpointHandler:
36
- """Callable endpoint used by Hugging Face to drive inference."""
37
-
38
- def __init__(self, path: str | None = None) -> None:
39
- self.model_dir = Path(path or Path(__file__).parent)
40
- self.session = self._load_session()
41
- self.io = self._capture_io()
42
-
43
- self.primary_input = self.io.inputs[0].name
44
- self.slow_input = self._find_input("slow_state")
45
- self._defaults = {
46
- node.name: self._zeros_like(node)
47
- for node in self.io.inputs
48
- if node.name not in {self.primary_input, self.slow_input}
49
- }
50
- if self.slow_input is not None:
51
- self._slow_fallback = self._zeros_like(self._input_map[self.slow_input])
52
- else:
53
- self._slow_fallback = None
54
-
55
- def _load_session(self) -> ort.InferenceSession:
56
- model_path = self.model_dir / "model_infer.onnx"
57
- if not model_path.exists():
58
- available = ", ".join(sorted(str(p.name) for p in self.model_dir.glob("*.onnx"))) or "<none>"
59
- raise FileNotFoundError(
60
- "Could not locate 'model.onnx' in %s (available: %s)" % (self.model_dir, available)
61
- )
62
- return ort.InferenceSession(str(model_path), providers=["CPUExecutionProvider"])
63
-
64
- @property
65
- def _input_map(self) -> Mapping[str, ort.NodeArg]:
66
- return {node.name: node for node in self.io.inputs}
67
-
68
- def _capture_io(self) -> _ModelIO:
69
- return _ModelIO(inputs=tuple(self.session.get_inputs()), outputs=tuple(self.session.get_outputs()))
70
-
71
- def _find_input(self, target: str) -> Optional[str]:
72
- target = target.lower()
73
- for node in self.io.inputs:
74
- if node.name.lower() == target:
75
- return node.name
76
- return None
77
-
78
- @staticmethod
79
- def _zeros_like(node: ort.NodeArg) -> np.ndarray:
80
- shape: list[int] = []
81
- for dim in node.shape:
82
- if isinstance(dim, int) and dim > 0:
83
- shape.append(dim)
84
- else:
85
- shape.append(1)
86
- return np.zeros(shape, dtype=np.float32)
87
-
88
- @staticmethod
89
- def _coerce_array(value: Any, *, allow_empty: bool = False) -> np.ndarray:
90
- array = np.asarray(value, dtype=np.float32)
91
- if array.size == 0 and not allow_empty:
92
- raise ValueError("Received an empty array; provide at least one value.")
93
- if array.ndim == 1:
94
- array = np.expand_dims(array, axis=0)
95
- elif array.ndim > 2:
96
- raise ValueError("Expected a 1D or batched 2D array; received shape %s" % (array.shape,))
97
- return array
98
-
99
- def _prepare_inputs(self, payload: Mapping[str, Any]) -> MutableMapping[str, np.ndarray]:
100
- psi = payload.get("psi")
101
- if psi is None:
102
- psi = payload.get("vector") or payload.get("psi_s") or payload.get("inputs")
103
- if psi is None:
104
- raise KeyError("Payload must include a 'psi' field containing the symbolic vector.")
105
-
106
- inputs: MutableMapping[str, np.ndarray] = {self.primary_input: self._coerce_array(psi)}
107
-
108
- if self.slow_input is not None:
109
- slow_value = payload.get("slow_state") or payload.get("slow") or payload.get("state")
110
- if slow_value is None:
111
- inputs[self.slow_input] = self._slow_fallback.copy()
112
- else:
113
- inputs[self.slow_input] = self._coerce_array(slow_value, allow_empty=True)
114
-
115
- for name, default in self._defaults.items():
116
- inputs[name] = default.copy()
117
-
118
- return inputs
119
-
120
- @staticmethod
121
- def _format_output(name: str, value: np.ndarray) -> Any:
122
- value = np.asarray(value, dtype=np.float32)
123
- value = np.nan_to_num(value, nan=0.0, posinf=0.0, neginf=0.0)
124
- squeezed = np.squeeze(value)
125
- if squeezed.ndim == 0:
126
- return float(squeezed)
127
- return squeezed.tolist()
128
-
129
- def __call__(self, data: Mapping[str, Any]) -> Mapping[str, Any]:
130
- payload = data.get("inputs", data)
131
- if not isinstance(payload, Mapping):
132
- payload = {"psi": payload}
133
-
134
- feed = self._prepare_inputs(payload)
135
- outputs = self.session.run(None, feed)
136
-
137
- result = {
138
- node.name: self._format_output(node.name, value)
139
- for node, value in zip(self.io.outputs, outputs)
140
- }
141
- return result
142
-
143
-
144
- __all__ = ["EndpointHandler"]
 
1
+ """Custom inference handler for Hugging Face Inference Endpoints.
2
+
3
+ This module exposes :class:`EndpointHandler`, the entrypoint used by the
4
+ Hugging Face serving stack when ``--task custom`` is selected. The handler
5
+ loads the exported Noesis decoder ONNX graph and accepts symbolic intent
6
+ vectors (``psi``) along with an optional ``slow_state`` memory tensor. The
7
+ outputs mirror the values produced by the training runtime:
8
+
9
+ * ``z_out`` – semantic embedding projected back into symbolic space.
10
+ * ``choice``, ``pain``, ``memory`` and ``quality`` – diagnostic scalars.
11
+ * ``slow_state`` – updated slow memory tensor suitable for recurrent usage.
12
+
13
+ The handler is intentionally lightweight so it can run without the rest of the
14
+ AletheiaEngine Python package being installed.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from dataclasses import dataclass
20
+ from pathlib import Path
21
+ from typing import Any, Mapping, MutableMapping, Optional
22
+
23
+ import numpy as np
24
+ import onnxruntime as ort
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class _ModelIO:
29
+ """Snapshot of ONNX input and output metadata."""
30
+
31
+ inputs: tuple[ort.NodeArg, ...]
32
+ outputs: tuple[ort.NodeArg, ...]
33
+
34
+
35
+ class EndpointHandler:
36
+ """Callable endpoint used by Hugging Face to drive inference."""
37
+
38
+ def __init__(self, path: str | None = None) -> None:
39
+ self.model_dir = Path(path or Path(__file__).parent)
40
+ self.session = self._load_session()
41
+ self.io = self._capture_io()
42
+
43
+ self.primary_input = self.io.inputs[0].name
44
+ self.slow_input = self._find_input("slow_state")
45
+ self._defaults = {
46
+ node.name: self._zeros_like(node)
47
+ for node in self.io.inputs
48
+ if node.name not in {self.primary_input, self.slow_input}
49
+ }
50
+ if self.slow_input is not None:
51
+ self._slow_fallback = self._zeros_like(self._input_map[self.slow_input])
52
+ else:
53
+ self._slow_fallback = None
54
+
55
+ def _load_session(self) -> ort.InferenceSession:
56
+ model_path = self.model_dir / "model.onnx"
57
+ if not model_path.exists():
58
+ available = ", ".join(sorted(str(p.name) for p in self.model_dir.glob("*.onnx"))) or "<none>"
59
+ raise FileNotFoundError(
60
+ "Could not locate 'model.onnx' in %s (available: %s)" % (self.model_dir, available)
61
+ )
62
+ return ort.InferenceSession(str(model_path), providers=["CPUExecutionProvider"])
63
+
64
+ @property
65
+ def _input_map(self) -> Mapping[str, ort.NodeArg]:
66
+ return {node.name: node for node in self.io.inputs}
67
+
68
+ def _capture_io(self) -> _ModelIO:
69
+ return _ModelIO(inputs=tuple(self.session.get_inputs()), outputs=tuple(self.session.get_outputs()))
70
+
71
+ def _find_input(self, target: str) -> Optional[str]:
72
+ target = target.lower()
73
+ for node in self.io.inputs:
74
+ if node.name.lower() == target:
75
+ return node.name
76
+ return None
77
+
78
+ @staticmethod
79
+ def _zeros_like(node: ort.NodeArg) -> np.ndarray:
80
+ shape: list[int] = []
81
+ for dim in node.shape:
82
+ if isinstance(dim, int) and dim > 0:
83
+ shape.append(dim)
84
+ else:
85
+ shape.append(1)
86
+ return np.zeros(shape, dtype=np.float32)
87
+
88
+ @staticmethod
89
+ def _coerce_array(value: Any, *, allow_empty: bool = False) -> np.ndarray:
90
+ array = np.asarray(value, dtype=np.float32)
91
+ if array.size == 0 and not allow_empty:
92
+ raise ValueError("Received an empty array; provide at least one value.")
93
+ if array.ndim == 1:
94
+ array = np.expand_dims(array, axis=0)
95
+ elif array.ndim > 2:
96
+ raise ValueError("Expected a 1D or batched 2D array; received shape %s" % (array.shape,))
97
+ return array
98
+
99
+ def _prepare_inputs(self, payload: Mapping[str, Any]) -> MutableMapping[str, np.ndarray]:
100
+ psi = payload.get("psi")
101
+ if psi is None:
102
+ psi = payload.get("vector") or payload.get("psi_s") or payload.get("inputs")
103
+ if psi is None:
104
+ raise KeyError("Payload must include a 'psi' field containing the symbolic vector.")
105
+
106
+ inputs: MutableMapping[str, np.ndarray] = {self.primary_input: self._coerce_array(psi)}
107
+
108
+ if self.slow_input is not None:
109
+ slow_value = payload.get("slow_state") or payload.get("slow") or payload.get("state")
110
+ if slow_value is None:
111
+ inputs[self.slow_input] = self._slow_fallback.copy()
112
+ else:
113
+ inputs[self.slow_input] = self._coerce_array(slow_value, allow_empty=True)
114
+
115
+ for name, default in self._defaults.items():
116
+ inputs[name] = default.copy()
117
+
118
+ return inputs
119
+
120
+ @staticmethod
121
+ def _format_output(name: str, value: np.ndarray) -> Any:
122
+ value = np.asarray(value, dtype=np.float32)
123
+ value = np.nan_to_num(value, nan=0.0, posinf=0.0, neginf=0.0)
124
+ squeezed = np.squeeze(value)
125
+ if squeezed.ndim == 0:
126
+ return float(squeezed)
127
+ return squeezed.tolist()
128
+
129
+ def __call__(self, data: Mapping[str, Any]) -> Mapping[str, Any]:
130
+ payload = data.get("inputs", data)
131
+ if not isinstance(payload, Mapping):
132
+ payload = {"psi": payload}
133
+
134
+ feed = self._prepare_inputs(payload)
135
+ outputs = self.session.run(None, feed)
136
+
137
+ result = {
138
+ node.name: self._format_output(node.name, value)
139
+ for node, value in zip(self.io.outputs, outputs)
140
+ }
141
+ return result
142
+
143
+
144
+ __all__ = ["EndpointHandler"]
noesis_model.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09208897eb94632e675cf7ea7aa2e5b30fbb18014cebd26faa8a96ed99bd600f
3
+ size 508690432