Ship bundle-native Open Structure runtime in public release
Browse files- README.md +8 -4
- docs/AETHON_OPEN_STRUCTURE_HF_MODEL_CARD.md +5 -3
- docs/AETHON_OPEN_STRUCTURE_RUNTIME.md +6 -2
- examples/aethon_open_structure_python.py +47 -487
- run_aethon.py +36 -0
- runtime/aethon/__init__.py +1 -0
- runtime/aethon/rfi_abstraction.py +78 -0
- runtime/aethon/rfi_bundle.py +101 -0
- runtime/aethon/rfi_document_filter.py +147 -0
- runtime/aethon/rfi_graph.py +769 -0
- runtime/aethon/rfi_ingest.py +445 -0
- runtime/aethon/rfi_interpreter.py +144 -0
- runtime/aethon/rfi_lexicon.py +182 -0
- runtime/aethon/rfi_math.py +228 -0
- runtime/aethon/rfi_metrics.py +78 -0
- runtime/aethon/rfi_ontology.py +242 -0
- runtime/aethon/rfi_query.py +1079 -0
- runtime/aethon/rfi_query_forms.py +67 -0
- runtime/aethon/rfi_reasoner.py +245 -0
- runtime/aethon/rfi_reasoning_rules.py +59 -0
- runtime/aethon/rfi_runtime.py +359 -0
- runtime/aethon/rfi_semantics.py +81 -0
- runtime/aethon/rfi_surface.py +546 -0
- runtime/aethon/rfi_surface_lexicon.py +43 -0
README.md
CHANGED
|
@@ -208,7 +208,7 @@ Aethon generalizes by reusing learned structure across:
|
|
| 208 |
The intended public experience is model-like:
|
| 209 |
|
| 210 |
- load the bundle
|
| 211 |
-
- create a runtime object
|
| 212 |
- call `ask(...)`
|
| 213 |
- get natural text back
|
| 214 |
|
|
@@ -229,10 +229,12 @@ finally:
|
|
| 229 |
Portable runtime included in the release:
|
| 230 |
|
| 231 |
- `examples/aethon_open_structure_python.py`
|
|
|
|
|
|
|
| 232 |
|
| 233 |
-
|
| 234 |
|
| 235 |
-
It should be treated as a real runnable Open Structure entry point, not as a storage demo.
|
| 236 |
|
| 237 |
## Prompt Examples
|
| 238 |
|
|
@@ -393,6 +395,8 @@ Additional docs in this release:
|
|
| 393 |
- `docs/aethon_n1_bundle_schema.json`
|
| 394 |
- `docs/AETHON_OPEN_STRUCTURE_RUNTIME.md`
|
| 395 |
|
| 396 |
-
|
| 397 |
|
| 398 |
- `examples/aethon_open_structure_python.py`
|
|
|
|
|
|
|
|
|
| 208 |
The intended public experience is model-like:
|
| 209 |
|
| 210 |
- load the bundle
|
| 211 |
+
- create a runtime object from the shipped release
|
| 212 |
- call `ask(...)`
|
| 213 |
- get natural text back
|
| 214 |
|
|
|
|
| 229 |
Portable runtime included in the release:
|
| 230 |
|
| 231 |
- `examples/aethon_open_structure_python.py`
|
| 232 |
+
- `run_aethon.py`
|
| 233 |
+
- `runtime/aethon/...`
|
| 234 |
|
| 235 |
+
This release now ships a portable bundle-native runtime pack.
|
| 236 |
|
| 237 |
+
It should be treated as a real runnable Open Structure entry point, not as a storage demo or thin adapter.
|
| 238 |
|
| 239 |
## Prompt Examples
|
| 240 |
|
|
|
|
| 395 |
- `docs/aethon_n1_bundle_schema.json`
|
| 396 |
- `docs/AETHON_OPEN_STRUCTURE_RUNTIME.md`
|
| 397 |
|
| 398 |
+
Portable runtime entry points:
|
| 399 |
|
| 400 |
- `examples/aethon_open_structure_python.py`
|
| 401 |
+
- `run_aethon.py`
|
| 402 |
+
- `runtime/aethon/...`
|
docs/AETHON_OPEN_STRUCTURE_HF_MODEL_CARD.md
CHANGED
|
@@ -208,7 +208,7 @@ Aethon generalizes by reusing learned structure across:
|
|
| 208 |
The intended public experience is model-like:
|
| 209 |
|
| 210 |
- load the bundle
|
| 211 |
-
- create a runtime object
|
| 212 |
- call `ask(...)`
|
| 213 |
- get natural text back
|
| 214 |
|
|
@@ -229,10 +229,12 @@ finally:
|
|
| 229 |
Portable runtime included in the release:
|
| 230 |
|
| 231 |
- `examples/aethon_open_structure_python.py`
|
|
|
|
|
|
|
| 232 |
|
| 233 |
-
|
| 234 |
|
| 235 |
-
It should be treated as a real runnable Open Structure entry point, not as a storage demo.
|
| 236 |
|
| 237 |
## Prompt Examples
|
| 238 |
|
|
|
|
| 208 |
The intended public experience is model-like:
|
| 209 |
|
| 210 |
- load the bundle
|
| 211 |
+
- create a runtime object from the shipped release
|
| 212 |
- call `ask(...)`
|
| 213 |
- get natural text back
|
| 214 |
|
|
|
|
| 229 |
Portable runtime included in the release:
|
| 230 |
|
| 231 |
- `examples/aethon_open_structure_python.py`
|
| 232 |
+
- `run_aethon.py`
|
| 233 |
+
- `runtime/aethon/...`
|
| 234 |
|
| 235 |
+
This release now ships a portable bundle-native runtime pack.
|
| 236 |
|
| 237 |
+
It should be treated as a real runnable Open Structure entry point, not as a storage demo or thin adapter.
|
| 238 |
|
| 239 |
## Prompt Examples
|
| 240 |
|
docs/AETHON_OPEN_STRUCTURE_RUNTIME.md
CHANGED
|
@@ -51,15 +51,19 @@ Then load the persistent bundle memory from `graph.sqlite3`.
|
|
| 51 |
The recommended public shape is:
|
| 52 |
|
| 53 |
1. pull the bundle
|
| 54 |
-
2. construct a runtime object
|
| 55 |
3. call `ask(...)`
|
| 56 |
4. receive natural text back
|
| 57 |
|
| 58 |
Starter example in this repo:
|
| 59 |
|
| 60 |
- `examples/aethon_open_structure_python.py`
|
|
|
|
|
|
|
| 61 |
|
| 62 |
-
|
|
|
|
|
|
|
| 63 |
|
| 64 |
## Minimum Read Path
|
| 65 |
|
|
|
|
| 51 |
The recommended public shape is:
|
| 52 |
|
| 53 |
1. pull the bundle
|
| 54 |
+
2. construct a runtime object from the shipped release
|
| 55 |
3. call `ask(...)`
|
| 56 |
4. receive natural text back
|
| 57 |
|
| 58 |
Starter example in this repo:
|
| 59 |
|
| 60 |
- `examples/aethon_open_structure_python.py`
|
| 61 |
+
- `run_aethon.py`
|
| 62 |
+
- `runtime/aethon/...`
|
| 63 |
|
| 64 |
+
The release now ships a portable bundle-native runtime pack.
|
| 65 |
+
|
| 66 |
+
That runtime hides storage details behind a model-facing interface so developers interact with Aethon as a model rather than as a data store.
|
| 67 |
|
| 68 |
## Minimum Read Path
|
| 69 |
|
examples/aethon_open_structure_python.py
CHANGED
|
@@ -1,532 +1,92 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
-
import
|
| 4 |
-
import re
|
| 5 |
-
import sqlite3
|
| 6 |
-
from collections import deque
|
| 7 |
from dataclasses import dataclass
|
| 8 |
from pathlib import Path
|
| 9 |
|
| 10 |
-
from huggingface_hub import
|
| 11 |
|
| 12 |
|
| 13 |
@dataclass(frozen=True)
|
| 14 |
class AethonOpenStructureResponse:
|
| 15 |
answer: str
|
| 16 |
text: str
|
|
|
|
|
|
|
|
|
|
| 17 |
mode: str
|
| 18 |
|
| 19 |
|
| 20 |
class AethonOpenStructureModel:
|
| 21 |
-
"""Portable
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
"lives": "lives_in",
|
| 26 |
-
"stay": "lives_in",
|
| 27 |
-
"sleep": "lives_in",
|
| 28 |
-
"located": "located_in",
|
| 29 |
-
"depend": "depend_on",
|
| 30 |
-
"depends": "depend_on",
|
| 31 |
-
"return": "return",
|
| 32 |
-
"returns": "return",
|
| 33 |
-
"watch": "watch",
|
| 34 |
-
"watches": "watch",
|
| 35 |
-
"buy": "bought",
|
| 36 |
-
"bought": "bought",
|
| 37 |
-
"prefer": "prefer",
|
| 38 |
-
"prefers": "prefer",
|
| 39 |
-
"like": "like",
|
| 40 |
-
"likes": "like",
|
| 41 |
-
"chase": "chase",
|
| 42 |
-
"chases": "chase",
|
| 43 |
-
"keep": "keeps",
|
| 44 |
-
"keeps": "keeps",
|
| 45 |
-
"carry": "carrying",
|
| 46 |
-
"carries": "carrying",
|
| 47 |
-
"carrying": "carrying",
|
| 48 |
-
"study": "study",
|
| 49 |
-
"studies": "study",
|
| 50 |
-
"use": "use",
|
| 51 |
-
"uses": "use",
|
| 52 |
-
"call": "call",
|
| 53 |
-
"calls": "call",
|
| 54 |
-
"import": "import",
|
| 55 |
-
"imports": "import",
|
| 56 |
-
}
|
| 57 |
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
self.
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
|
| 69 |
@classmethod
|
| 70 |
def from_hub(
|
| 71 |
cls,
|
| 72 |
repo_id: str,
|
| 73 |
*,
|
| 74 |
-
local_dir: str | Path = "
|
| 75 |
) -> "AethonOpenStructureModel":
|
| 76 |
-
|
| 77 |
-
local_root.mkdir(parents=True, exist_ok=True)
|
| 78 |
-
metadata_path = hf_hub_download(
|
| 79 |
-
repo_id=repo_id,
|
| 80 |
-
filename="bundle/metadata.json",
|
| 81 |
-
local_dir=local_root,
|
| 82 |
-
local_dir_use_symlinks=False,
|
| 83 |
-
)
|
| 84 |
-
hf_hub_download(
|
| 85 |
repo_id=repo_id,
|
| 86 |
-
|
| 87 |
-
local_dir=local_root,
|
| 88 |
local_dir_use_symlinks=False,
|
| 89 |
)
|
| 90 |
-
return cls(
|
| 91 |
|
| 92 |
def ask(self, question: str) -> AethonOpenStructureResponse:
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
text
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
self.conn.close()
|
| 103 |
-
|
| 104 |
-
def _ask_one(self, question: str) -> AethonOpenStructureResponse:
|
| 105 |
-
normalized = self._normalize(question)
|
| 106 |
-
tokens = normalized.split()
|
| 107 |
-
match = self._match_query_form(tokens)
|
| 108 |
-
if match is not None:
|
| 109 |
-
response = self._answer_query_form(question, normalized, tokens, match)
|
| 110 |
-
if response is not None:
|
| 111 |
-
return response
|
| 112 |
-
return self._unknown(question, normalized)
|
| 113 |
-
|
| 114 |
-
def _split_prompt(self, question: str) -> list[str]:
|
| 115 |
-
chunks = re.split(r"\?\s+|\?\s*$|\.\s+(?=[A-Z])|\bthen\b|;", question)
|
| 116 |
-
return [chunk.strip(" ?.") for chunk in chunks if chunk.strip(" ?.")]
|
| 117 |
-
|
| 118 |
-
def _match_query_form(self, tokens: list[str]) -> dict | None:
|
| 119 |
-
best: dict | None = None
|
| 120 |
-
best_score = -1
|
| 121 |
-
for form in self.query_forms:
|
| 122 |
-
prefix = [str(token).lower() for token in form.get("prefix", [])]
|
| 123 |
-
requires = [str(token).lower() for token in form.get("requires", [])]
|
| 124 |
-
if len(tokens) < len(prefix):
|
| 125 |
-
continue
|
| 126 |
-
if tokens[: len(prefix)] != prefix:
|
| 127 |
-
continue
|
| 128 |
-
if any(req not in tokens for req in requires):
|
| 129 |
-
continue
|
| 130 |
-
score = len(prefix) * 10 + len(requires)
|
| 131 |
-
if score > best_score:
|
| 132 |
-
best = form
|
| 133 |
-
best_score = score
|
| 134 |
-
return best
|
| 135 |
-
|
| 136 |
-
def _answer_query_form(
|
| 137 |
-
self,
|
| 138 |
-
question: str,
|
| 139 |
-
normalized: str,
|
| 140 |
-
tokens: list[str],
|
| 141 |
-
form: dict,
|
| 142 |
-
) -> AethonOpenStructureResponse | None:
|
| 143 |
-
intent = str(form.get("intent", ""))
|
| 144 |
-
if intent == "where_entity":
|
| 145 |
-
subject = self._extract_subject(tokens, form)
|
| 146 |
-
if subject:
|
| 147 |
-
answer = self._latest_object(subject, ("located_in", "lives_in"))
|
| 148 |
-
if answer:
|
| 149 |
-
return AethonOpenStructureResponse(
|
| 150 |
-
answer=self._display(answer),
|
| 151 |
-
text=f"{self._display(subject)} is currently in {self._display(answer)}.",
|
| 152 |
-
mode="direct",
|
| 153 |
-
)
|
| 154 |
-
return self._unknown(question, normalized)
|
| 155 |
-
|
| 156 |
-
if intent == "what_changed":
|
| 157 |
-
subject = self._extract_subject(tokens, form)
|
| 158 |
-
if subject:
|
| 159 |
-
row = self.conn.execute(
|
| 160 |
-
"""
|
| 161 |
-
SELECT relation, previous_object, new_object
|
| 162 |
-
FROM contradictions
|
| 163 |
-
WHERE subject = ?
|
| 164 |
-
ORDER BY contradiction_id DESC
|
| 165 |
-
LIMIT 1
|
| 166 |
-
""",
|
| 167 |
-
(subject,),
|
| 168 |
-
).fetchone()
|
| 169 |
-
if row is not None:
|
| 170 |
-
relation = self._display_relation(str(row["relation"]))
|
| 171 |
-
previous_value = self._display(str(row["previous_object"]))
|
| 172 |
-
new_value = self._display(str(row["new_object"]))
|
| 173 |
-
text = f"{self._display(subject)} changed in {relation} from {previous_value} to {new_value}."
|
| 174 |
-
return AethonOpenStructureResponse(
|
| 175 |
-
answer=f"{previous_value} -> {new_value}",
|
| 176 |
-
text=text,
|
| 177 |
-
mode="revision",
|
| 178 |
-
)
|
| 179 |
-
return self._unknown(question, normalized)
|
| 180 |
-
|
| 181 |
-
if intent == "has_contradiction":
|
| 182 |
-
subject = self._extract_subject(tokens, form)
|
| 183 |
-
if subject:
|
| 184 |
-
row = self.conn.execute(
|
| 185 |
-
"""
|
| 186 |
-
SELECT 1
|
| 187 |
-
FROM contradictions
|
| 188 |
-
WHERE subject = ?
|
| 189 |
-
LIMIT 1
|
| 190 |
-
""",
|
| 191 |
-
(subject,),
|
| 192 |
-
).fetchone()
|
| 193 |
-
if row is not None:
|
| 194 |
-
return AethonOpenStructureResponse(
|
| 195 |
-
answer="yes",
|
| 196 |
-
text=f"Yes, I know conflicting or revised information about {self._display(subject)}.",
|
| 197 |
-
mode="contradiction",
|
| 198 |
-
)
|
| 199 |
-
return AethonOpenStructureResponse(
|
| 200 |
-
answer="no",
|
| 201 |
-
text=f"I do not currently see a contradiction about {self._display(subject)}.",
|
| 202 |
-
mode="contradiction",
|
| 203 |
-
)
|
| 204 |
-
|
| 205 |
-
if intent == "relation_path":
|
| 206 |
-
subject = self._extract_subject(tokens, form)
|
| 207 |
-
object_value = self._extract_object(tokens, form)
|
| 208 |
-
if subject and object_value:
|
| 209 |
-
path = self._find_relation_path(subject, object_value)
|
| 210 |
-
if path:
|
| 211 |
-
path_text = " -> ".join(self._display_relation(step) for step in path)
|
| 212 |
-
text = f"{self._display(subject)} connects to {self._display(object_value)} through {path_text}."
|
| 213 |
-
return AethonOpenStructureResponse(answer=path_text, text=text, mode="path")
|
| 214 |
-
return self._unknown(question, normalized)
|
| 215 |
-
|
| 216 |
-
if intent in {"relation_object", "keep_location_lookup"}:
|
| 217 |
-
subject = self._extract_subject(tokens, form)
|
| 218 |
-
relation = self._relation_from_form(tokens, form)
|
| 219 |
-
if subject and relation:
|
| 220 |
-
if relation == "keeps":
|
| 221 |
-
kept = self._latest_object(subject, ("keeps",))
|
| 222 |
-
if kept:
|
| 223 |
-
answer = self._latest_object(kept, ("located_in", "lives_in"))
|
| 224 |
-
if answer:
|
| 225 |
-
text = f"{self._display(subject)} keeps it in {self._display(answer)}."
|
| 226 |
-
return AethonOpenStructureResponse(answer=self._display(answer), text=text, mode="derived")
|
| 227 |
-
else:
|
| 228 |
-
answer = self._latest_object(subject, (relation,))
|
| 229 |
-
if answer:
|
| 230 |
-
text = self._compose_relation_answer(subject, relation, answer)
|
| 231 |
-
return AethonOpenStructureResponse(answer=self._display(answer), text=text, mode="direct")
|
| 232 |
-
return self._unknown(question, normalized)
|
| 233 |
-
|
| 234 |
-
if intent == "relation_subject":
|
| 235 |
-
relation = self._relation_from_form(tokens, form)
|
| 236 |
-
object_value = self._extract_object(tokens, form)
|
| 237 |
-
if relation and object_value:
|
| 238 |
-
answer = self._latest_subject(object_value, relation)
|
| 239 |
-
if answer:
|
| 240 |
-
text = self._compose_reverse_relation_answer(answer, relation, object_value)
|
| 241 |
-
return AethonOpenStructureResponse(answer=self._display(answer), text=text, mode="direct")
|
| 242 |
-
return self._unknown(question, normalized)
|
| 243 |
-
|
| 244 |
-
if intent == "classify":
|
| 245 |
-
subject = self._extract_subject(tokens, form)
|
| 246 |
-
if subject:
|
| 247 |
-
answer = self._classify(subject)
|
| 248 |
-
if answer:
|
| 249 |
-
text = f"{self._display(subject)} is {self._article_for(self._display(answer))} {self._display(answer)}."
|
| 250 |
-
return AethonOpenStructureResponse(answer=self._display(answer), text=text, mode="classification")
|
| 251 |
-
return self._unknown(question, normalized)
|
| 252 |
-
|
| 253 |
-
if intent in {"plan_first", "plan_next", "plan_previous"}:
|
| 254 |
-
target = self._extract_subject(tokens, form)
|
| 255 |
-
if target:
|
| 256 |
-
relation = {
|
| 257 |
-
"plan_first": "plan_first",
|
| 258 |
-
"plan_next": "plan_next",
|
| 259 |
-
"plan_previous": "plan_previous",
|
| 260 |
-
}[intent]
|
| 261 |
-
answer = self._latest_object(target, (relation,))
|
| 262 |
-
if answer:
|
| 263 |
-
text = self._plan_text(intent, target, answer)
|
| 264 |
-
return AethonOpenStructureResponse(answer=self._display(answer), text=text, mode="plan")
|
| 265 |
-
return self._unknown(question, normalized)
|
| 266 |
-
|
| 267 |
-
if intent == "story_query":
|
| 268 |
-
subject = self._extract_subject(tokens, form)
|
| 269 |
-
anchor = self._extract_object(tokens, form)
|
| 270 |
-
story = self._story_for(subject, anchor)
|
| 271 |
-
if story:
|
| 272 |
-
return AethonOpenStructureResponse(answer=story, text=story, mode="story")
|
| 273 |
-
return self._unknown(question, normalized)
|
| 274 |
-
|
| 275 |
-
return None
|
| 276 |
-
|
| 277 |
-
def _normalize(self, text: str) -> str:
|
| 278 |
-
lowered = text.lower()
|
| 279 |
-
lowered = re.sub(r"[^\w\s]", " ", lowered)
|
| 280 |
-
lowered = re.sub(r"\s+", " ", lowered).strip()
|
| 281 |
-
for entry in self.semantic_lexicon:
|
| 282 |
-
symbol = str(entry.get("symbol", "")).strip().lower()
|
| 283 |
-
meaning = str(entry.get("meaning", "")).strip().lower()
|
| 284 |
-
if symbol and meaning:
|
| 285 |
-
lowered = re.sub(rf"\b{re.escape(symbol)}\b", meaning, lowered)
|
| 286 |
-
return lowered
|
| 287 |
-
|
| 288 |
-
def _extract_subject(self, tokens: list[str], form: dict) -> str | None:
|
| 289 |
-
mode = str(form.get("subject_mode", ""))
|
| 290 |
-
prefix_len = len(form.get("prefix", []))
|
| 291 |
-
end_anchor = str(form.get("subject_end_anchor", "")).lower()
|
| 292 |
-
if mode == "tail":
|
| 293 |
-
return self._join_tokens(tokens[prefix_len:])
|
| 294 |
-
if mode == "tail_without_last":
|
| 295 |
-
return self._join_tokens(tokens[prefix_len:-1])
|
| 296 |
-
if mode == "single_after_prefix":
|
| 297 |
-
return tokens[prefix_len] if len(tokens) > prefix_len else None
|
| 298 |
-
if mode == "between_indexes":
|
| 299 |
-
subject_start = int(form.get("subject_start", prefix_len))
|
| 300 |
-
if subject_start < prefix_len:
|
| 301 |
-
subject_start = prefix_len
|
| 302 |
-
end_index = len(tokens)
|
| 303 |
-
if end_anchor and end_anchor in tokens[subject_start:]:
|
| 304 |
-
end_index = tokens.index(end_anchor, subject_start)
|
| 305 |
-
return self._join_tokens(tokens[subject_start:end_index])
|
| 306 |
-
if mode == "between_anchors":
|
| 307 |
-
subject_start = int(form.get("subject_start", prefix_len))
|
| 308 |
-
end_index = len(tokens)
|
| 309 |
-
if end_anchor and end_anchor in tokens[subject_start:]:
|
| 310 |
-
end_index = tokens.index(end_anchor, subject_start)
|
| 311 |
-
return self._join_tokens(tokens[subject_start:end_index])
|
| 312 |
-
return self._join_tokens(tokens[prefix_len:])
|
| 313 |
-
|
| 314 |
-
def _extract_object(self, tokens: list[str], form: dict) -> str | None:
|
| 315 |
-
object_mode = str(form.get("object_mode", ""))
|
| 316 |
-
prefix_len = len(form.get("prefix", []))
|
| 317 |
-
anchor = str(form.get("object_start_anchor", "")).lower()
|
| 318 |
-
if object_mode == "after_anchor" and anchor:
|
| 319 |
-
if anchor in tokens:
|
| 320 |
-
start = tokens.index(anchor) + 1
|
| 321 |
-
return self._join_tokens(tokens[start:])
|
| 322 |
-
if object_mode == "tail_after_prefix":
|
| 323 |
-
return self._join_tokens(tokens[prefix_len:])
|
| 324 |
-
if object_mode == "from_relation_words":
|
| 325 |
-
relation = self._relation_from_form(tokens, form)
|
| 326 |
-
if relation is None:
|
| 327 |
-
return None
|
| 328 |
-
for index, token in enumerate(tokens):
|
| 329 |
-
if self._RELATION_WORDS.get(token) == relation:
|
| 330 |
-
return self._join_tokens(tokens[index + 1 :])
|
| 331 |
-
return None
|
| 332 |
-
|
| 333 |
-
def _relation_from_form(self, tokens: list[str], form: dict) -> str | None:
|
| 334 |
-
relation_mode = str(form.get("relation_mode", "fixed"))
|
| 335 |
-
if relation_mode == "fixed":
|
| 336 |
-
relation = str(form.get("relation", "")).strip()
|
| 337 |
-
return relation or None
|
| 338 |
-
if relation_mode == "from_words":
|
| 339 |
-
for token in tokens:
|
| 340 |
-
relation = self._RELATION_WORDS.get(token)
|
| 341 |
-
if relation is not None:
|
| 342 |
-
return relation
|
| 343 |
-
return None
|
| 344 |
-
|
| 345 |
-
def _latest_object(self, subject: str, relations: tuple[str, ...]) -> str | None:
|
| 346 |
-
if not relations:
|
| 347 |
-
return None
|
| 348 |
-
placeholders = ", ".join("?" for _ in relations)
|
| 349 |
-
row = self.conn.execute(
|
| 350 |
-
f"""
|
| 351 |
-
SELECT object
|
| 352 |
-
FROM edges
|
| 353 |
-
WHERE subject = ?
|
| 354 |
-
AND relation IN ({placeholders})
|
| 355 |
-
AND is_active = 1
|
| 356 |
-
ORDER BY edge_id DESC
|
| 357 |
-
LIMIT 1
|
| 358 |
-
""",
|
| 359 |
-
(subject, *relations),
|
| 360 |
-
).fetchone()
|
| 361 |
-
return None if row is None else str(row["object"])
|
| 362 |
-
|
| 363 |
-
def _latest_subject(self, object_value: str, relation: str) -> str | None:
|
| 364 |
-
row = self.conn.execute(
|
| 365 |
-
"""
|
| 366 |
-
SELECT subject
|
| 367 |
-
FROM edges
|
| 368 |
-
WHERE object = ?
|
| 369 |
-
AND relation = ?
|
| 370 |
-
AND is_active = 1
|
| 371 |
-
ORDER BY edge_id DESC
|
| 372 |
-
LIMIT 1
|
| 373 |
-
""",
|
| 374 |
-
(object_value, relation),
|
| 375 |
-
).fetchone()
|
| 376 |
-
return None if row is None else str(row["subject"])
|
| 377 |
-
|
| 378 |
-
def _classify(self, subject: str) -> str | None:
|
| 379 |
-
direct = self._latest_object(subject, ("is_a", "equals"))
|
| 380 |
-
if direct is None:
|
| 381 |
-
return None
|
| 382 |
-
promoted = self._latest_object(direct, ("is_a",))
|
| 383 |
-
return promoted or direct
|
| 384 |
-
|
| 385 |
-
def _find_relation_path(self, start: str, goal: str, max_depth: int = 4) -> list[str] | None:
|
| 386 |
-
queue: deque[tuple[str, list[str], int]] = deque([(start, [], 0)])
|
| 387 |
-
seen = {start}
|
| 388 |
-
while queue:
|
| 389 |
-
node, path, depth = queue.popleft()
|
| 390 |
-
if depth >= max_depth:
|
| 391 |
-
continue
|
| 392 |
-
rows = self.conn.execute(
|
| 393 |
-
"""
|
| 394 |
-
SELECT relation, object
|
| 395 |
-
FROM edges
|
| 396 |
-
WHERE subject = ?
|
| 397 |
-
AND is_active = 1
|
| 398 |
-
ORDER BY edge_id DESC
|
| 399 |
-
""",
|
| 400 |
-
(node,),
|
| 401 |
-
).fetchall()
|
| 402 |
-
for row in rows:
|
| 403 |
-
relation = str(row["relation"])
|
| 404 |
-
nxt = str(row["object"])
|
| 405 |
-
new_path = path + [relation]
|
| 406 |
-
if nxt == goal:
|
| 407 |
-
return new_path
|
| 408 |
-
if nxt not in seen:
|
| 409 |
-
seen.add(nxt)
|
| 410 |
-
queue.append((nxt, new_path, depth + 1))
|
| 411 |
-
return None
|
| 412 |
-
|
| 413 |
-
def _story_for(self, subject: str | None, anchor: str | None) -> str | None:
|
| 414 |
-
if not subject:
|
| 415 |
-
return None
|
| 416 |
-
rows = self.conn.execute(
|
| 417 |
-
"""
|
| 418 |
-
SELECT relation, object
|
| 419 |
-
FROM edges
|
| 420 |
-
WHERE subject = ?
|
| 421 |
-
AND is_active = 1
|
| 422 |
-
ORDER BY edge_id ASC
|
| 423 |
-
LIMIT 6
|
| 424 |
-
""",
|
| 425 |
-
(subject,),
|
| 426 |
-
).fetchall()
|
| 427 |
-
if not rows:
|
| 428 |
-
return None
|
| 429 |
-
sentences = [self._compose_relation_answer(subject, str(row["relation"]), str(row["object"])) for row in rows]
|
| 430 |
-
if anchor:
|
| 431 |
-
return " ".join(sentences) + f" After {self._display(anchor)}, the story keeps moving through what Aethon already knows."
|
| 432 |
-
return " ".join(sentences)
|
| 433 |
-
|
| 434 |
-
def _compose_relation_answer(self, subject: str, relation: str, answer: str) -> str:
|
| 435 |
-
relation_text = self._display_relation(relation)
|
| 436 |
-
templates = self.surface_lexicon.get("relation_templates", {})
|
| 437 |
-
if relation in templates:
|
| 438 |
-
template = str(templates[relation])
|
| 439 |
-
return template.format(subject=self._display(subject), object=self._display(answer))
|
| 440 |
-
return f"{self._display(subject)} {relation_text} {self._display(answer)}."
|
| 441 |
-
|
| 442 |
-
def _compose_reverse_relation_answer(self, subject: str, relation: str, object_value: str) -> str:
|
| 443 |
-
if relation == "chase":
|
| 444 |
-
return f"{self._display(subject)} chases {self._display(object_value)}."
|
| 445 |
-
relation_text = self._display_relation(relation)
|
| 446 |
-
return f"{self._display(subject)} {relation_text} {self._display(object_value)}."
|
| 447 |
-
|
| 448 |
-
def _plan_text(self, intent: str, target: str, answer: str) -> str:
|
| 449 |
-
target_display = self._display(target)
|
| 450 |
-
answer_display = self._display(answer)
|
| 451 |
-
if intent == "plan_first":
|
| 452 |
-
return f"For {target_display}, the grounded first step is {answer_display}."
|
| 453 |
-
if intent == "plan_next":
|
| 454 |
-
return f"After that point, the next grounded step is {answer_display}."
|
| 455 |
-
return f"Before that point, the grounded earlier step is {answer_display}."
|
| 456 |
-
|
| 457 |
-
def _unknown(self, question: str, normalized: str) -> AethonOpenStructureResponse:
|
| 458 |
-
subject = self._salient_subject(normalized)
|
| 459 |
-
subject_text = self._display(subject) if subject else "that"
|
| 460 |
-
if normalized.startswith("where "):
|
| 461 |
-
variants = [
|
| 462 |
-
f"I cannot ground a reliable location for {subject_text}, and I would rather not invent one.",
|
| 463 |
-
f"{subject_text.capitalize()} is not something I can place confidently from what this bundle can currently support.",
|
| 464 |
-
f"I do not have enough grounded structure to place {subject_text} anywhere without guessing.",
|
| 465 |
-
]
|
| 466 |
-
elif normalized.startswith(("who ", "what ")):
|
| 467 |
-
variants = [
|
| 468 |
-
f"I cannot ground a reliable answer about {subject_text} strongly enough yet.",
|
| 469 |
-
f"I would rather stay careful than pretend I know more about {subject_text} than I can support.",
|
| 470 |
-
f"{subject_text.capitalize()} goes beyond what I can answer faithfully from the current structure.",
|
| 471 |
-
]
|
| 472 |
-
else:
|
| 473 |
-
variants = [
|
| 474 |
-
f"I cannot support a reliable answer for {subject_text} right now.",
|
| 475 |
-
f"I would rather stay careful than guess about {subject_text}.",
|
| 476 |
-
f"I do not have enough grounded structure to answer that faithfully about {subject_text}.",
|
| 477 |
-
]
|
| 478 |
-
index = len(normalized) % len(variants) if normalized else 0
|
| 479 |
-
return AethonOpenStructureResponse(answer="<unknown>", text=variants[index], mode="unknown")
|
| 480 |
-
|
| 481 |
-
def _display(self, concept: str) -> str:
|
| 482 |
-
row = self.conn.execute(
|
| 483 |
-
"""
|
| 484 |
-
SELECT display_name
|
| 485 |
-
FROM concepts
|
| 486 |
-
WHERE concept_id = ?
|
| 487 |
-
""",
|
| 488 |
-
(concept,),
|
| 489 |
-
).fetchone()
|
| 490 |
-
if row is None:
|
| 491 |
-
return concept.replace("_", " ")
|
| 492 |
-
display = str(row["display_name"]).strip()
|
| 493 |
-
return display or concept.replace("_", " ")
|
| 494 |
-
|
| 495 |
-
@staticmethod
|
| 496 |
-
def _display_relation(relation: str) -> str:
|
| 497 |
-
return relation.replace("_", " ")
|
| 498 |
-
|
| 499 |
-
@staticmethod
|
| 500 |
-
def _join_tokens(tokens: list[str]) -> str | None:
|
| 501 |
-
cleaned = " ".join(token for token in tokens if token).strip()
|
| 502 |
-
return cleaned or None
|
| 503 |
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
return "an" if text[:1].lower() in {"a", "e", "i", "o", "u"} else "a"
|
| 507 |
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
tokens = normalized.split()
|
| 511 |
-
for stopword in ("what", "where", "who", "how", "is", "does", "did", "the", "a", "an", "about", "to", "after", "before"):
|
| 512 |
-
tokens = [token for token in tokens if token != stopword]
|
| 513 |
-
if not tokens:
|
| 514 |
-
return None
|
| 515 |
-
return " ".join(tokens[:3])
|
| 516 |
|
| 517 |
|
| 518 |
if __name__ == "__main__":
|
| 519 |
model = AethonOpenStructureModel.from_hub("OkeyMetaLtd/Aethon-N1-Base-Open-Structure")
|
| 520 |
try:
|
| 521 |
prompts = [
|
| 522 |
-
"
|
| 523 |
-
"
|
| 524 |
-
"
|
| 525 |
]
|
| 526 |
for prompt in prompts:
|
| 527 |
reply = model.ask(prompt)
|
| 528 |
print(f"Q: {prompt}")
|
| 529 |
print(f"A: {reply.text}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 530 |
print()
|
| 531 |
finally:
|
| 532 |
model.close()
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
+
import sys
|
|
|
|
|
|
|
|
|
|
| 4 |
from dataclasses import dataclass
|
| 5 |
from pathlib import Path
|
| 6 |
|
| 7 |
+
from huggingface_hub import snapshot_download
|
| 8 |
|
| 9 |
|
| 10 |
@dataclass(frozen=True)
|
| 11 |
class AethonOpenStructureResponse:
|
| 12 |
answer: str
|
| 13 |
text: str
|
| 14 |
+
explanation: str
|
| 15 |
+
proof: tuple[str, ...]
|
| 16 |
+
reasoning: tuple[str, ...]
|
| 17 |
mode: str
|
| 18 |
|
| 19 |
|
| 20 |
class AethonOpenStructureModel:
|
| 21 |
+
"""Portable bundle-native Aethon runtime wrapper.
|
| 22 |
|
| 23 |
+
This wrapper downloads the public Open Structure release, loads the bundled
|
| 24 |
+
native runtime, and exposes a simple model-facing API:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
- `from_hub(...)`
|
| 27 |
+
- `ask(...)`
|
| 28 |
+
- `learn(...)`
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
def __init__(self, release_dir: str | Path) -> None:
|
| 32 |
+
self.release_dir = Path(release_dir)
|
| 33 |
+
runtime_root = self.release_dir / "runtime"
|
| 34 |
+
if str(runtime_root) not in sys.path:
|
| 35 |
+
sys.path.insert(0, str(runtime_root))
|
| 36 |
+
|
| 37 |
+
from aethon.rfi_bundle import NativeBundleManager # type: ignore
|
| 38 |
+
|
| 39 |
+
self._runtime = NativeBundleManager.load(self.release_dir / "bundle")
|
| 40 |
+
self.metadata = getattr(self._runtime, "metadata", None)
|
| 41 |
|
| 42 |
@classmethod
|
| 43 |
def from_hub(
|
| 44 |
cls,
|
| 45 |
repo_id: str,
|
| 46 |
*,
|
| 47 |
+
local_dir: str | Path = "aethon_open_structure_release",
|
| 48 |
) -> "AethonOpenStructureModel":
|
| 49 |
+
release_dir = snapshot_download(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
repo_id=repo_id,
|
| 51 |
+
local_dir=str(local_dir),
|
|
|
|
| 52 |
local_dir_use_symlinks=False,
|
| 53 |
)
|
| 54 |
+
return cls(release_dir)
|
| 55 |
|
| 56 |
def ask(self, question: str) -> AethonOpenStructureResponse:
|
| 57 |
+
response = self._runtime.ask(question)
|
| 58 |
+
return AethonOpenStructureResponse(
|
| 59 |
+
answer=response.answer,
|
| 60 |
+
text=response.text,
|
| 61 |
+
explanation=response.explanation,
|
| 62 |
+
proof=tuple(response.proof),
|
| 63 |
+
reasoning=tuple(response.reasoning),
|
| 64 |
+
mode=response.mode,
|
| 65 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
+
def learn(self, text: str) -> dict[str, object]:
|
| 68 |
+
return self._runtime.learn(text)
|
|
|
|
| 69 |
|
| 70 |
+
def close(self) -> None:
|
| 71 |
+
self._runtime.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
|
| 74 |
if __name__ == "__main__":
|
| 75 |
model = AethonOpenStructureModel.from_hub("OkeyMetaLtd/Aethon-N1-Base-Open-Structure")
|
| 76 |
try:
|
| 77 |
prompts = [
|
| 78 |
+
"Amina used to live in Lagos, but now she lives in Accra. What changed about her location?",
|
| 79 |
+
"Tell me the story of Zainab after she misses the last train and discovers the map was outdated.",
|
| 80 |
+
"If the meeting conflicts with lunch and the report must finish before the client call, what should happen first and what should be rescheduled?",
|
| 81 |
]
|
| 82 |
for prompt in prompts:
|
| 83 |
reply = model.ask(prompt)
|
| 84 |
print(f"Q: {prompt}")
|
| 85 |
print(f"A: {reply.text}")
|
| 86 |
+
if reply.reasoning:
|
| 87 |
+
print("reasoning:")
|
| 88 |
+
for step in reply.reasoning:
|
| 89 |
+
print(f" - {step}")
|
| 90 |
print()
|
| 91 |
finally:
|
| 92 |
model.close()
|
run_aethon.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import sys
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
ROOT = Path(__file__).resolve().parent
|
| 8 |
+
sys.path.insert(0, str(ROOT / "runtime"))
|
| 9 |
+
|
| 10 |
+
from aethon.rfi_bundle import NativeBundleManager
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def main() -> None:
|
| 14 |
+
parser = argparse.ArgumentParser(description="Run Aethon Open Structure bundle.")
|
| 15 |
+
parser.add_argument("--ask", action="append", default=[])
|
| 16 |
+
parser.add_argument("--learn", action="append", default=[])
|
| 17 |
+
args = parser.parse_args()
|
| 18 |
+
|
| 19 |
+
runtime = NativeBundleManager.load(ROOT / "bundle")
|
| 20 |
+
try:
|
| 21 |
+
for fact in args.learn:
|
| 22 |
+
runtime.learn(fact)
|
| 23 |
+
for query in args.ask:
|
| 24 |
+
response = runtime.ask(query)
|
| 25 |
+
print(f"Q: {query}")
|
| 26 |
+
print(f"A: {response.text}")
|
| 27 |
+
if response.reasoning:
|
| 28 |
+
print("reasoning:")
|
| 29 |
+
for step in response.reasoning:
|
| 30 |
+
print(f" - {step}")
|
| 31 |
+
finally:
|
| 32 |
+
runtime.close()
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
if __name__ == "__main__":
|
| 36 |
+
main()
|
runtime/aethon/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Portable Aethon Open Structure runtime package."""
|
runtime/aethon/rfi_abstraction.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from collections import Counter
|
| 5 |
+
|
| 6 |
+
from .rfi_graph import RelationalGraphStore
|
| 7 |
+
from .rfi_ontology import ConceptOntology
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@dataclass(frozen=True)
|
| 11 |
+
class AbstractionRule:
|
| 12 |
+
subject_class: str
|
| 13 |
+
relation: str
|
| 14 |
+
object_class: str
|
| 15 |
+
support: int
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class AbstractionEngine:
|
| 19 |
+
"""Lifts repeated graph motifs into class-level rules."""
|
| 20 |
+
|
| 21 |
+
_IGNORED_RELATIONS = {
|
| 22 |
+
"is_a",
|
| 23 |
+
"has_instance",
|
| 24 |
+
"contains",
|
| 25 |
+
"home_of",
|
| 26 |
+
"liked_by",
|
| 27 |
+
"preferred_by",
|
| 28 |
+
"chased_by",
|
| 29 |
+
"attacked_by",
|
| 30 |
+
"watched_by",
|
| 31 |
+
"purchase_site_of",
|
| 32 |
+
"kept_by",
|
| 33 |
+
"carried_by",
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
def __init__(self, graph: RelationalGraphStore, ontology: ConceptOntology | None = None) -> None:
|
| 37 |
+
self.graph = graph
|
| 38 |
+
self.ontology = ontology or ConceptOntology()
|
| 39 |
+
|
| 40 |
+
def derive_rules(self, min_support: int = 2) -> list[AbstractionRule]:
|
| 41 |
+
counts: Counter[tuple[str, str, str]] = Counter()
|
| 42 |
+
for edge in self.graph.iter_active_edges():
|
| 43 |
+
if edge.source_kind == "derived":
|
| 44 |
+
continue
|
| 45 |
+
if edge.relation in self._IGNORED_RELATIONS:
|
| 46 |
+
continue
|
| 47 |
+
subject_parents = self.ontology.lift(edge.subject)
|
| 48 |
+
object_parents = self.ontology.lift(edge.object)
|
| 49 |
+
for subject_parent in subject_parents:
|
| 50 |
+
for object_parent in object_parents:
|
| 51 |
+
counts[(subject_parent, edge.relation, object_parent)] += 1
|
| 52 |
+
|
| 53 |
+
rules: list[AbstractionRule] = []
|
| 54 |
+
for (subject_class, relation, object_class), support in sorted(counts.items()):
|
| 55 |
+
if support >= min_support:
|
| 56 |
+
rules.append(
|
| 57 |
+
AbstractionRule(
|
| 58 |
+
subject_class=subject_class,
|
| 59 |
+
relation=relation,
|
| 60 |
+
object_class=object_class,
|
| 61 |
+
support=support,
|
| 62 |
+
)
|
| 63 |
+
)
|
| 64 |
+
return rules
|
| 65 |
+
|
| 66 |
+
def materialize_rules(self, min_support: int = 2) -> list[AbstractionRule]:
|
| 67 |
+
rules = self.derive_rules(min_support=min_support)
|
| 68 |
+
for rule in rules:
|
| 69 |
+
self.graph.add_derived_fact(
|
| 70 |
+
subject=rule.subject_class,
|
| 71 |
+
relation=rule.relation,
|
| 72 |
+
object=rule.object_class,
|
| 73 |
+
source_text=f"abstraction:{rule.subject_class}:{rule.relation}:{rule.object_class}:{rule.support}",
|
| 74 |
+
commit=False,
|
| 75 |
+
)
|
| 76 |
+
if rules:
|
| 77 |
+
self.graph.commit()
|
| 78 |
+
return rules
|
runtime/aethon/rfi_bundle.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
import shutil
|
| 6 |
+
import sqlite3
|
| 7 |
+
from typing import TYPE_CHECKING
|
| 8 |
+
|
| 9 |
+
from .rfi_metrics import StructuralCapacityMeter
|
| 10 |
+
|
| 11 |
+
if TYPE_CHECKING:
|
| 12 |
+
from .rfi_runtime import AethonNativeBase
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class NativeBundleManager:
|
| 16 |
+
"""Persists and reloads Aethon N1 as a portable native base bundle."""
|
| 17 |
+
|
| 18 |
+
METADATA_FILE = "metadata.json"
|
| 19 |
+
GRAPH_FILE = "graph.sqlite3"
|
| 20 |
+
BUNDLE_FORMAT = 2
|
| 21 |
+
PUBLIC_CONTRACT = "aethon.n1.bundle.v1"
|
| 22 |
+
ASSETS_DIR = "assets"
|
| 23 |
+
SEMANTIC_FILE = "semantic_lexicon.jsonl"
|
| 24 |
+
SURFACE_FILE = "surface_lexicon.json"
|
| 25 |
+
REASONING_RULES_FILE = "reasoning_rules.json"
|
| 26 |
+
|
| 27 |
+
@classmethod
|
| 28 |
+
def save(cls, runtime: "AethonNativeBase", bundle_dir: str | Path) -> Path:
|
| 29 |
+
target = Path(bundle_dir)
|
| 30 |
+
target.mkdir(parents=True, exist_ok=True)
|
| 31 |
+
graph_path = target / cls.GRAPH_FILE
|
| 32 |
+
metadata_path = target / cls.METADATA_FILE
|
| 33 |
+
|
| 34 |
+
cls._write_graph(runtime, graph_path)
|
| 35 |
+
capacity = StructuralCapacityMeter.from_sqlite(graph_path)
|
| 36 |
+
metadata = {
|
| 37 |
+
"bundle_format": cls.BUNDLE_FORMAT,
|
| 38 |
+
"public_contract": cls.PUBLIC_CONTRACT,
|
| 39 |
+
"release_class": "open-structure",
|
| 40 |
+
"name": runtime.NAME,
|
| 41 |
+
"family": runtime.FAMILY,
|
| 42 |
+
"tokenizer": runtime.TOKENIZER,
|
| 43 |
+
"size_unit": "Structural Capacity (SC)",
|
| 44 |
+
"capacity": capacity.to_metadata(),
|
| 45 |
+
"bundle_files": [cls.METADATA_FILE, cls.GRAPH_FILE],
|
| 46 |
+
"graph_file": cls.GRAPH_FILE,
|
| 47 |
+
"graph_dialect": "sqlite3",
|
| 48 |
+
"sqlite_schema": {
|
| 49 |
+
"required_tables": ["concepts", "edges", "contradictions"],
|
| 50 |
+
"optional_tables": ["raw_units"],
|
| 51 |
+
"edge_active_flag": "is_active",
|
| 52 |
+
"edge_support_pointer": "supports_edge_id",
|
| 53 |
+
},
|
| 54 |
+
"semantic_lexicon": runtime.ontology.semantic_lexicon.to_payload(),
|
| 55 |
+
"surface_lexicon": runtime.surface.lexicon.to_payload(),
|
| 56 |
+
"query_forms": runtime.query_forms.to_payload(),
|
| 57 |
+
"reasoning_rules": runtime.reasoner.rule_set.to_payload(),
|
| 58 |
+
}
|
| 59 |
+
metadata_path.write_text(json.dumps(metadata, indent=2), encoding="utf-8")
|
| 60 |
+
return target
|
| 61 |
+
|
| 62 |
+
@classmethod
|
| 63 |
+
def load(cls, bundle_dir: str | Path) -> "AethonNativeBase":
|
| 64 |
+
from .rfi_runtime import AethonNativeBase
|
| 65 |
+
|
| 66 |
+
source = Path(bundle_dir)
|
| 67 |
+
graph_path = source / cls.GRAPH_FILE
|
| 68 |
+
metadata_path = source / cls.METADATA_FILE
|
| 69 |
+
if not graph_path.exists():
|
| 70 |
+
raise FileNotFoundError(f"Missing bundle graph: {graph_path}")
|
| 71 |
+
metadata = json.loads(metadata_path.read_text(encoding="utf-8")) if metadata_path.exists() else {}
|
| 72 |
+
if metadata.get("bundle_format", 1) >= cls.BUNDLE_FORMAT:
|
| 73 |
+
return AethonNativeBase(
|
| 74 |
+
db_path=str(graph_path),
|
| 75 |
+
semantic_lexicon_payload=metadata.get("semantic_lexicon"),
|
| 76 |
+
surface_lexicon_payload=metadata.get("surface_lexicon"),
|
| 77 |
+
query_form_payload=metadata.get("query_forms"),
|
| 78 |
+
reasoning_rules_payload=metadata.get("reasoning_rules"),
|
| 79 |
+
)
|
| 80 |
+
assets_dir = source / cls.ASSETS_DIR
|
| 81 |
+
return AethonNativeBase(
|
| 82 |
+
db_path=str(graph_path),
|
| 83 |
+
semantic_lexicon_path=str(assets_dir / cls.SEMANTIC_FILE),
|
| 84 |
+
surface_lexicon_path=str(assets_dir / cls.SURFACE_FILE),
|
| 85 |
+
reasoning_rules_path=str(assets_dir / cls.REASONING_RULES_FILE),
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
@staticmethod
|
| 89 |
+
def _write_graph(runtime: "AethonNativeBase", graph_path: Path) -> None:
|
| 90 |
+
runtime.graph.conn.commit()
|
| 91 |
+
if runtime.graph.db_path != ":memory:":
|
| 92 |
+
source = Path(runtime.graph.db_path)
|
| 93 |
+
if source.resolve() != graph_path.resolve():
|
| 94 |
+
shutil.copy2(source, graph_path)
|
| 95 |
+
return
|
| 96 |
+
|
| 97 |
+
destination = sqlite3.connect(str(graph_path))
|
| 98 |
+
try:
|
| 99 |
+
runtime.graph.conn.backup(destination)
|
| 100 |
+
finally:
|
| 101 |
+
destination.close()
|
runtime/aethon/rfi_document_filter.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@dataclass(frozen=True)
|
| 7 |
+
class DocumentSelection:
|
| 8 |
+
accepted: bool
|
| 9 |
+
reason: str
|
| 10 |
+
units: tuple[str, ...]
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class DocumentQualityGate:
|
| 14 |
+
"""Filters low-value corpus documents and extracts higher-value learning units."""
|
| 15 |
+
|
| 16 |
+
_BLOCKLIST = (
|
| 17 |
+
"cookie policy",
|
| 18 |
+
"privacy policy",
|
| 19 |
+
"terms of service",
|
| 20 |
+
"all rights reserved",
|
| 21 |
+
"sign up",
|
| 22 |
+
"subscribe",
|
| 23 |
+
"javascript",
|
| 24 |
+
"enable cookies",
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
def select(self, title: str, text: str) -> DocumentSelection:
|
| 28 |
+
title = title.strip()
|
| 29 |
+
text = text.strip()
|
| 30 |
+
merged = "\n".join(part for part in (title, text) if part)
|
| 31 |
+
if not merged:
|
| 32 |
+
return DocumentSelection(False, "empty", ())
|
| 33 |
+
lowered = merged.lower()
|
| 34 |
+
if any(marker in lowered for marker in self._BLOCKLIST):
|
| 35 |
+
return DocumentSelection(False, "boilerplate", ())
|
| 36 |
+
if lowered.count("http") > 3:
|
| 37 |
+
return DocumentSelection(False, "too_many_urls", ())
|
| 38 |
+
alpha_chars = sum(1 for char in merged if char.isalpha())
|
| 39 |
+
if alpha_chars < 24:
|
| 40 |
+
return DocumentSelection(False, "too_little_language", ())
|
| 41 |
+
units = self._extract_units(title, text)
|
| 42 |
+
if not units:
|
| 43 |
+
return DocumentSelection(False, "no_viable_units", ())
|
| 44 |
+
return DocumentSelection(True, "accepted", tuple(units))
|
| 45 |
+
|
| 46 |
+
def _extract_units(self, title: str, text: str) -> list[str]:
|
| 47 |
+
units: list[str] = []
|
| 48 |
+
if title and self._is_viable_sentence(title):
|
| 49 |
+
units.append(title.strip())
|
| 50 |
+
|
| 51 |
+
for raw_line in text.splitlines():
|
| 52 |
+
line = raw_line.strip().strip("`")
|
| 53 |
+
if not line:
|
| 54 |
+
continue
|
| 55 |
+
if self._looks_like_assignment(line) or self._looks_like_return(line) or self._looks_like_equation(line):
|
| 56 |
+
units.append(line)
|
| 57 |
+
|
| 58 |
+
prose = text.replace("\r", "\n")
|
| 59 |
+
for sentence in self._split_prose_units(prose):
|
| 60 |
+
cleaned = sentence.strip(" -:;,\t")
|
| 61 |
+
if self._is_viable_sentence(cleaned):
|
| 62 |
+
units.append(cleaned)
|
| 63 |
+
if len(units) >= 64:
|
| 64 |
+
break
|
| 65 |
+
return list(dict.fromkeys(units))
|
| 66 |
+
|
| 67 |
+
def _is_viable_sentence(self, sentence: str) -> bool:
|
| 68 |
+
if not sentence:
|
| 69 |
+
return False
|
| 70 |
+
lowered = sentence.lower()
|
| 71 |
+
if any(marker in lowered for marker in self._BLOCKLIST):
|
| 72 |
+
return False
|
| 73 |
+
tokens = sentence.split()
|
| 74 |
+
if len(tokens) < 3 or len(tokens) > 24:
|
| 75 |
+
return False
|
| 76 |
+
if sentence.count("http") or sentence.count("@") > 1:
|
| 77 |
+
return False
|
| 78 |
+
if sum(1 for token in tokens if token.isupper() and len(token) > 3) > max(3, len(tokens) // 3):
|
| 79 |
+
return False
|
| 80 |
+
return True
|
| 81 |
+
|
| 82 |
+
def _looks_like_assignment(self, line: str) -> bool:
|
| 83 |
+
if sum(line.count(mark) for mark in ".!?") > 0:
|
| 84 |
+
return False
|
| 85 |
+
if "=" not in line or "==" in line:
|
| 86 |
+
return False
|
| 87 |
+
left, right = (part.strip() for part in line.split("=", 1))
|
| 88 |
+
return self._is_identifier(left) and bool(right) and not right.startswith("=")
|
| 89 |
+
|
| 90 |
+
def _looks_like_return(self, line: str) -> bool:
|
| 91 |
+
if sum(line.count(mark) for mark in ".!?") > 0:
|
| 92 |
+
return False
|
| 93 |
+
tokens = line.split()
|
| 94 |
+
if len(tokens) < 3:
|
| 95 |
+
return False
|
| 96 |
+
start = 0
|
| 97 |
+
if tokens[0] in {"def", "function"}:
|
| 98 |
+
if len(tokens) < 4:
|
| 99 |
+
return False
|
| 100 |
+
start = 1
|
| 101 |
+
name = tokens[start]
|
| 102 |
+
verb = tokens[start + 1]
|
| 103 |
+
value = " ".join(tokens[start + 2 :]).strip()
|
| 104 |
+
return self._is_identifier(name) and verb in {"return", "returns"} and bool(value)
|
| 105 |
+
|
| 106 |
+
def _looks_like_equation(self, line: str) -> bool:
|
| 107 |
+
if sum(line.count(mark) for mark in ".!?") > 0:
|
| 108 |
+
return False
|
| 109 |
+
if "=" not in line or "==" in line:
|
| 110 |
+
return False
|
| 111 |
+
left, right = (part.strip() for part in line.split("=", 1))
|
| 112 |
+
if not left or not right:
|
| 113 |
+
return False
|
| 114 |
+
allowed = set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_+-*/() ")
|
| 115 |
+
if any(char not in allowed for char in left + right):
|
| 116 |
+
return False
|
| 117 |
+
return any(op in line for op in "+-*/")
|
| 118 |
+
|
| 119 |
+
@staticmethod
|
| 120 |
+
def _split_prose_units(prose: str) -> list[str]:
|
| 121 |
+
units: list[str] = []
|
| 122 |
+
current: list[str] = []
|
| 123 |
+
length = len(prose)
|
| 124 |
+
for index, char in enumerate(prose):
|
| 125 |
+
if char in "\n":
|
| 126 |
+
if current:
|
| 127 |
+
units.append("".join(current).strip())
|
| 128 |
+
current = []
|
| 129 |
+
continue
|
| 130 |
+
current.append(char)
|
| 131 |
+
if char not in ".!?":
|
| 132 |
+
continue
|
| 133 |
+
next_char = prose[index + 1] if index + 1 < length else ""
|
| 134 |
+
if not next_char or next_char.isspace():
|
| 135 |
+
units.append("".join(current).strip())
|
| 136 |
+
current = []
|
| 137 |
+
if current:
|
| 138 |
+
units.append("".join(current).strip())
|
| 139 |
+
return [unit for unit in units if unit]
|
| 140 |
+
|
| 141 |
+
@staticmethod
|
| 142 |
+
def _is_identifier(value: str) -> bool:
|
| 143 |
+
if not value:
|
| 144 |
+
return False
|
| 145 |
+
if value[0] != "_" and not value[0].isalpha():
|
| 146 |
+
return False
|
| 147 |
+
return all(char == "_" or char.isalnum() for char in value)
|
runtime/aethon/rfi_graph.py
ADDED
|
@@ -0,0 +1,769 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from collections import deque
|
| 5 |
+
import sqlite3
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
import shutil
|
| 8 |
+
|
| 9 |
+
from .rfi_ingest import Triple
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@dataclass(frozen=True)
|
| 13 |
+
class EdgeRecord:
|
| 14 |
+
edge_id: int
|
| 15 |
+
subject: str
|
| 16 |
+
relation: str
|
| 17 |
+
object: str
|
| 18 |
+
source_kind: str
|
| 19 |
+
source_text: str
|
| 20 |
+
is_active: bool
|
| 21 |
+
supports_edge_id: int | None = None
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
@dataclass(frozen=True)
|
| 25 |
+
class ContradictionRecord:
|
| 26 |
+
contradiction_id: int
|
| 27 |
+
subject: str
|
| 28 |
+
relation: str
|
| 29 |
+
previous_object: str
|
| 30 |
+
new_object: str
|
| 31 |
+
previous_edge_id: int
|
| 32 |
+
new_edge_id: int
|
| 33 |
+
source_text: str
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class RelationalGraphStore:
|
| 37 |
+
"""SQLite-backed sparse graph for one-shot structural learning."""
|
| 38 |
+
|
| 39 |
+
INVERSE_RELATIONS = {
|
| 40 |
+
"located_in": "contains",
|
| 41 |
+
"contains": "located_in",
|
| 42 |
+
"chase": "chased_by",
|
| 43 |
+
"chased_by": "chase",
|
| 44 |
+
"attack": "attacked_by",
|
| 45 |
+
"attacked_by": "attack",
|
| 46 |
+
"watch": "watched_by",
|
| 47 |
+
"watched_by": "watch",
|
| 48 |
+
"lives_in": "home_of",
|
| 49 |
+
"home_of": "lives_in",
|
| 50 |
+
"like": "liked_by",
|
| 51 |
+
"liked_by": "like",
|
| 52 |
+
"prefer": "preferred_by",
|
| 53 |
+
"preferred_by": "prefer",
|
| 54 |
+
"bought_in": "purchase_site_of",
|
| 55 |
+
"purchase_site_of": "bought_in",
|
| 56 |
+
"is_a": "has_instance",
|
| 57 |
+
"has_instance": "is_a",
|
| 58 |
+
"keeps": "kept_by",
|
| 59 |
+
"kept_by": "keeps",
|
| 60 |
+
"carrying": "carried_by",
|
| 61 |
+
"carried_by": "carrying",
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
def __init__(self, db_path: str | Path = ":memory:") -> None:
|
| 65 |
+
self.db_path = str(db_path)
|
| 66 |
+
self.conn = sqlite3.connect(self.db_path)
|
| 67 |
+
self.conn.row_factory = sqlite3.Row
|
| 68 |
+
self.mutation_version = 0
|
| 69 |
+
self._configure_connection()
|
| 70 |
+
self._init_schema()
|
| 71 |
+
|
| 72 |
+
def _configure_connection(self) -> None:
|
| 73 |
+
self.conn.execute("PRAGMA journal_mode=WAL")
|
| 74 |
+
self.conn.execute("PRAGMA synchronous=NORMAL")
|
| 75 |
+
self.conn.execute("PRAGMA temp_store=MEMORY")
|
| 76 |
+
self.conn.execute("PRAGMA foreign_keys=OFF")
|
| 77 |
+
self.conn.execute("PRAGMA cache_size=-200000")
|
| 78 |
+
|
| 79 |
+
def _init_schema(self) -> None:
|
| 80 |
+
self.conn.executescript(
|
| 81 |
+
"""
|
| 82 |
+
CREATE TABLE IF NOT EXISTS concepts (
|
| 83 |
+
concept_id TEXT PRIMARY KEY,
|
| 84 |
+
display_name TEXT NOT NULL DEFAULT ''
|
| 85 |
+
);
|
| 86 |
+
|
| 87 |
+
CREATE TABLE IF NOT EXISTS edges (
|
| 88 |
+
edge_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 89 |
+
subject TEXT NOT NULL,
|
| 90 |
+
relation TEXT NOT NULL,
|
| 91 |
+
object TEXT NOT NULL,
|
| 92 |
+
source_kind TEXT NOT NULL,
|
| 93 |
+
source_text TEXT NOT NULL,
|
| 94 |
+
is_active INTEGER NOT NULL DEFAULT 1,
|
| 95 |
+
supports_edge_id INTEGER,
|
| 96 |
+
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
| 97 |
+
);
|
| 98 |
+
|
| 99 |
+
CREATE INDEX IF NOT EXISTS idx_edges_subject_relation_active
|
| 100 |
+
ON edges(subject, relation, is_active);
|
| 101 |
+
|
| 102 |
+
CREATE INDEX IF NOT EXISTS idx_edges_object_relation_active
|
| 103 |
+
ON edges(object, relation, is_active);
|
| 104 |
+
|
| 105 |
+
CREATE TABLE IF NOT EXISTS contradictions (
|
| 106 |
+
contradiction_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 107 |
+
subject TEXT NOT NULL,
|
| 108 |
+
relation TEXT NOT NULL,
|
| 109 |
+
previous_object TEXT NOT NULL,
|
| 110 |
+
new_object TEXT NOT NULL,
|
| 111 |
+
previous_edge_id INTEGER NOT NULL,
|
| 112 |
+
new_edge_id INTEGER NOT NULL,
|
| 113 |
+
source_text TEXT NOT NULL,
|
| 114 |
+
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
| 115 |
+
);
|
| 116 |
+
|
| 117 |
+
CREATE TABLE IF NOT EXISTS raw_units (
|
| 118 |
+
unit_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 119 |
+
lane TEXT NOT NULL,
|
| 120 |
+
source TEXT NOT NULL,
|
| 121 |
+
text TEXT NOT NULL,
|
| 122 |
+
normalized_text TEXT NOT NULL,
|
| 123 |
+
is_digested INTEGER NOT NULL DEFAULT 0,
|
| 124 |
+
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
| 125 |
+
);
|
| 126 |
+
|
| 127 |
+
CREATE INDEX IF NOT EXISTS idx_raw_units_lane
|
| 128 |
+
ON raw_units(lane);
|
| 129 |
+
"""
|
| 130 |
+
)
|
| 131 |
+
columns = {
|
| 132 |
+
str(row["name"])
|
| 133 |
+
for row in self.conn.execute("PRAGMA table_info(concepts)").fetchall()
|
| 134 |
+
}
|
| 135 |
+
if "display_name" not in columns:
|
| 136 |
+
self.conn.execute("ALTER TABLE concepts ADD COLUMN display_name TEXT NOT NULL DEFAULT ''")
|
| 137 |
+
self.conn.execute("UPDATE concepts SET display_name = concept_id WHERE display_name = ''")
|
| 138 |
+
raw_unit_columns = {
|
| 139 |
+
str(row["name"])
|
| 140 |
+
for row in self.conn.execute("PRAGMA table_info(raw_units)").fetchall()
|
| 141 |
+
}
|
| 142 |
+
if "is_digested" not in raw_unit_columns:
|
| 143 |
+
self.conn.execute("ALTER TABLE raw_units ADD COLUMN is_digested INTEGER NOT NULL DEFAULT 0")
|
| 144 |
+
self.conn.commit()
|
| 145 |
+
|
| 146 |
+
def ingest_triples(self, triples: list[Triple], *, commit: bool = True) -> list[int]:
|
| 147 |
+
edge_ids: list[int] = []
|
| 148 |
+
for triple in triples:
|
| 149 |
+
edge_ids.append(self.add_fact(triple, commit=False))
|
| 150 |
+
if commit and edge_ids:
|
| 151 |
+
self.conn.commit()
|
| 152 |
+
return edge_ids
|
| 153 |
+
|
| 154 |
+
def ingest_triples_fast(self, triples: list[Triple], *, commit: bool = True) -> int:
|
| 155 |
+
if not triples:
|
| 156 |
+
return 0
|
| 157 |
+
|
| 158 |
+
concepts: dict[str, str] = {}
|
| 159 |
+
edge_rows: list[tuple[str, str, str, str, str, int, int | None]] = []
|
| 160 |
+
for triple in triples:
|
| 161 |
+
if triple.subject not in concepts:
|
| 162 |
+
concepts[triple.subject] = triple.subject_surface.strip() or triple.subject
|
| 163 |
+
if triple.object not in concepts:
|
| 164 |
+
concepts[triple.object] = triple.object_surface.strip() or triple.object
|
| 165 |
+
edge_rows.append(
|
| 166 |
+
(
|
| 167 |
+
triple.subject,
|
| 168 |
+
triple.relation,
|
| 169 |
+
triple.object,
|
| 170 |
+
triple.source_kind,
|
| 171 |
+
triple.source_text,
|
| 172 |
+
1,
|
| 173 |
+
None,
|
| 174 |
+
)
|
| 175 |
+
)
|
| 176 |
+
inverse_relation = self.INVERSE_RELATIONS.get(triple.relation)
|
| 177 |
+
if inverse_relation is not None:
|
| 178 |
+
edge_rows.append(
|
| 179 |
+
(
|
| 180 |
+
triple.object,
|
| 181 |
+
inverse_relation,
|
| 182 |
+
triple.subject,
|
| 183 |
+
triple.source_kind,
|
| 184 |
+
triple.source_text,
|
| 185 |
+
1,
|
| 186 |
+
None,
|
| 187 |
+
)
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
self.conn.executemany(
|
| 191 |
+
"""
|
| 192 |
+
INSERT OR IGNORE INTO concepts(concept_id, display_name)
|
| 193 |
+
VALUES (?, ?)
|
| 194 |
+
""",
|
| 195 |
+
tuple(concepts.items()),
|
| 196 |
+
)
|
| 197 |
+
self.conn.executemany(
|
| 198 |
+
"""
|
| 199 |
+
INSERT INTO edges(subject, relation, object, source_kind, source_text, is_active, supports_edge_id)
|
| 200 |
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
| 201 |
+
""",
|
| 202 |
+
edge_rows,
|
| 203 |
+
)
|
| 204 |
+
self._touch()
|
| 205 |
+
if commit:
|
| 206 |
+
self.conn.commit()
|
| 207 |
+
return len(edge_rows)
|
| 208 |
+
|
| 209 |
+
def ingest_raw_units(
|
| 210 |
+
self,
|
| 211 |
+
units: list[tuple[str, str, str]],
|
| 212 |
+
*,
|
| 213 |
+
commit: bool = True,
|
| 214 |
+
) -> int:
|
| 215 |
+
if not units:
|
| 216 |
+
return 0
|
| 217 |
+
rows = [
|
| 218 |
+
(
|
| 219 |
+
lane,
|
| 220 |
+
source,
|
| 221 |
+
text,
|
| 222 |
+
" ".join(text.lower().split()),
|
| 223 |
+
)
|
| 224 |
+
for lane, source, text in units
|
| 225 |
+
if text.strip()
|
| 226 |
+
]
|
| 227 |
+
if not rows:
|
| 228 |
+
return 0
|
| 229 |
+
self.conn.executemany(
|
| 230 |
+
"""
|
| 231 |
+
INSERT INTO raw_units(lane, source, text, normalized_text, is_digested)
|
| 232 |
+
VALUES (?, ?, ?, ?, 0)
|
| 233 |
+
""",
|
| 234 |
+
rows,
|
| 235 |
+
)
|
| 236 |
+
self._touch()
|
| 237 |
+
if commit:
|
| 238 |
+
self.conn.commit()
|
| 239 |
+
return len(rows)
|
| 240 |
+
|
| 241 |
+
def fetch_undigested_raw_units(
|
| 242 |
+
self,
|
| 243 |
+
*,
|
| 244 |
+
limit: int,
|
| 245 |
+
) -> list[dict[str, object]]:
|
| 246 |
+
rows = self.conn.execute(
|
| 247 |
+
"""
|
| 248 |
+
SELECT unit_id, lane, source, text
|
| 249 |
+
FROM raw_units
|
| 250 |
+
WHERE is_digested = 0
|
| 251 |
+
ORDER BY unit_id ASC
|
| 252 |
+
LIMIT ?
|
| 253 |
+
""",
|
| 254 |
+
(limit,),
|
| 255 |
+
).fetchall()
|
| 256 |
+
return [
|
| 257 |
+
{
|
| 258 |
+
"unit_id": int(row["unit_id"]),
|
| 259 |
+
"lane": str(row["lane"]),
|
| 260 |
+
"source": str(row["source"]),
|
| 261 |
+
"text": str(row["text"]),
|
| 262 |
+
}
|
| 263 |
+
for row in rows
|
| 264 |
+
]
|
| 265 |
+
|
| 266 |
+
def mark_raw_units_digested(self, unit_ids: list[int], *, commit: bool = True) -> int:
|
| 267 |
+
if not unit_ids:
|
| 268 |
+
return 0
|
| 269 |
+
placeholders = ",".join("?" for _ in unit_ids)
|
| 270 |
+
self.conn.execute(
|
| 271 |
+
f"UPDATE raw_units SET is_digested = 1 WHERE unit_id IN ({placeholders})",
|
| 272 |
+
tuple(unit_ids),
|
| 273 |
+
)
|
| 274 |
+
self._touch()
|
| 275 |
+
if commit:
|
| 276 |
+
self.conn.commit()
|
| 277 |
+
return len(unit_ids)
|
| 278 |
+
|
| 279 |
+
def count_undigested_raw_units(self) -> int:
|
| 280 |
+
row = self.conn.execute(
|
| 281 |
+
"SELECT COUNT(*) AS count FROM raw_units WHERE is_digested = 0"
|
| 282 |
+
).fetchone()
|
| 283 |
+
return int(row["count"]) if row else 0
|
| 284 |
+
|
| 285 |
+
def purge_digested_raw_units(self, *, commit: bool = True) -> int:
|
| 286 |
+
before = self.conn.total_changes
|
| 287 |
+
self.conn.execute("DELETE FROM raw_units WHERE is_digested = 1")
|
| 288 |
+
removed = self.conn.total_changes - before
|
| 289 |
+
if removed:
|
| 290 |
+
self._touch()
|
| 291 |
+
if commit:
|
| 292 |
+
self.conn.commit()
|
| 293 |
+
return int(removed)
|
| 294 |
+
|
| 295 |
+
def restore_from_db(self, source_db_path: str | Path) -> None:
|
| 296 |
+
source_path = Path(source_db_path)
|
| 297 |
+
self.conn.close()
|
| 298 |
+
if self.db_path == ":memory:":
|
| 299 |
+
source = sqlite3.connect(str(source_path))
|
| 300 |
+
source.row_factory = sqlite3.Row
|
| 301 |
+
try:
|
| 302 |
+
self.conn = sqlite3.connect(":memory:")
|
| 303 |
+
self.conn.row_factory = sqlite3.Row
|
| 304 |
+
self._configure_connection()
|
| 305 |
+
source.backup(self.conn)
|
| 306 |
+
finally:
|
| 307 |
+
source.close()
|
| 308 |
+
return
|
| 309 |
+
|
| 310 |
+
target_path = Path(self.db_path)
|
| 311 |
+
target_path.parent.mkdir(parents=True, exist_ok=True)
|
| 312 |
+
shutil.copy2(source_path, target_path)
|
| 313 |
+
self.conn = sqlite3.connect(self.db_path)
|
| 314 |
+
self.conn.row_factory = sqlite3.Row
|
| 315 |
+
self._configure_connection()
|
| 316 |
+
self._touch()
|
| 317 |
+
|
| 318 |
+
def add_fact(self, triple: Triple, *, commit: bool = True) -> int:
|
| 319 |
+
self._ensure_concept(triple.subject, triple.subject_surface)
|
| 320 |
+
self._ensure_concept(triple.object, triple.object_surface)
|
| 321 |
+
prior_edge = self.get_active_edge(triple.subject, triple.relation)
|
| 322 |
+
self._deactivate_conflicting_edges(triple.subject, triple.relation)
|
| 323 |
+
cursor = self.conn.execute(
|
| 324 |
+
"""
|
| 325 |
+
INSERT INTO edges(subject, relation, object, source_kind, source_text, is_active)
|
| 326 |
+
VALUES (?, ?, ?, ?, ?, 1)
|
| 327 |
+
""",
|
| 328 |
+
(triple.subject, triple.relation, triple.object, triple.source_kind, triple.source_text),
|
| 329 |
+
)
|
| 330 |
+
edge_id = int(cursor.lastrowid)
|
| 331 |
+
if prior_edge is not None and prior_edge.object != triple.object:
|
| 332 |
+
self.conn.execute(
|
| 333 |
+
"""
|
| 334 |
+
INSERT INTO contradictions(
|
| 335 |
+
subject, relation, previous_object, new_object, previous_edge_id, new_edge_id, source_text
|
| 336 |
+
)
|
| 337 |
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
| 338 |
+
""",
|
| 339 |
+
(
|
| 340 |
+
triple.subject,
|
| 341 |
+
triple.relation,
|
| 342 |
+
prior_edge.object,
|
| 343 |
+
triple.object,
|
| 344 |
+
prior_edge.edge_id,
|
| 345 |
+
edge_id,
|
| 346 |
+
triple.source_text,
|
| 347 |
+
),
|
| 348 |
+
)
|
| 349 |
+
self._upsert_inverse_edge(
|
| 350 |
+
subject=triple.subject,
|
| 351 |
+
relation=triple.relation,
|
| 352 |
+
object_value=triple.object,
|
| 353 |
+
source_kind=triple.source_kind,
|
| 354 |
+
source_text=triple.source_text,
|
| 355 |
+
supports_edge_id=edge_id,
|
| 356 |
+
)
|
| 357 |
+
self._touch()
|
| 358 |
+
if commit:
|
| 359 |
+
self.conn.commit()
|
| 360 |
+
return edge_id
|
| 361 |
+
|
| 362 |
+
def add_derived_fact(
|
| 363 |
+
self,
|
| 364 |
+
*,
|
| 365 |
+
subject: str,
|
| 366 |
+
relation: str,
|
| 367 |
+
object: str,
|
| 368 |
+
source_text: str,
|
| 369 |
+
supports_edge_id: int | None = None,
|
| 370 |
+
commit: bool = True,
|
| 371 |
+
) -> int:
|
| 372 |
+
self._ensure_concept(subject)
|
| 373 |
+
self._ensure_concept(object)
|
| 374 |
+
existing = self.get_active_edge(subject, relation)
|
| 375 |
+
if existing is not None and existing.object == object and existing.source_kind == "derived":
|
| 376 |
+
return existing.edge_id
|
| 377 |
+
cursor = self.conn.execute(
|
| 378 |
+
"""
|
| 379 |
+
INSERT INTO edges(subject, relation, object, source_kind, source_text, is_active, supports_edge_id)
|
| 380 |
+
VALUES (?, ?, ?, 'derived', ?, 1, ?)
|
| 381 |
+
""",
|
| 382 |
+
(subject, relation, object, source_text, supports_edge_id),
|
| 383 |
+
)
|
| 384 |
+
edge_id = int(cursor.lastrowid)
|
| 385 |
+
self._upsert_inverse_edge(
|
| 386 |
+
subject=subject,
|
| 387 |
+
relation=relation,
|
| 388 |
+
object_value=object,
|
| 389 |
+
source_kind="derived",
|
| 390 |
+
source_text=source_text,
|
| 391 |
+
supports_edge_id=edge_id,
|
| 392 |
+
)
|
| 393 |
+
self._touch()
|
| 394 |
+
if commit:
|
| 395 |
+
self.conn.commit()
|
| 396 |
+
return edge_id
|
| 397 |
+
|
| 398 |
+
def get_active_edge(self, subject: str, relation: str) -> EdgeRecord | None:
|
| 399 |
+
row = self.conn.execute(
|
| 400 |
+
"""
|
| 401 |
+
SELECT * FROM edges
|
| 402 |
+
WHERE subject = ? AND relation = ? AND is_active = 1
|
| 403 |
+
ORDER BY edge_id DESC
|
| 404 |
+
LIMIT 1
|
| 405 |
+
""",
|
| 406 |
+
(subject, relation),
|
| 407 |
+
).fetchone()
|
| 408 |
+
return self._row_to_edge(row) if row else None
|
| 409 |
+
|
| 410 |
+
def get_objects(self, subject: str, relation: str) -> list[EdgeRecord]:
|
| 411 |
+
rows = self.conn.execute(
|
| 412 |
+
"""
|
| 413 |
+
SELECT * FROM edges
|
| 414 |
+
WHERE subject = ? AND relation = ? AND is_active = 1
|
| 415 |
+
ORDER BY edge_id DESC
|
| 416 |
+
""",
|
| 417 |
+
(subject, relation),
|
| 418 |
+
).fetchall()
|
| 419 |
+
return [self._row_to_edge(row) for row in rows]
|
| 420 |
+
|
| 421 |
+
def get_subjects(self, relation: str, object_value: str) -> list[EdgeRecord]:
|
| 422 |
+
rows = self.conn.execute(
|
| 423 |
+
"""
|
| 424 |
+
SELECT * FROM edges
|
| 425 |
+
WHERE relation = ? AND object = ? AND is_active = 1
|
| 426 |
+
ORDER BY edge_id DESC
|
| 427 |
+
""",
|
| 428 |
+
(relation, object_value),
|
| 429 |
+
).fetchall()
|
| 430 |
+
return [self._row_to_edge(row) for row in rows]
|
| 431 |
+
|
| 432 |
+
def iter_active_edges(self) -> list[EdgeRecord]:
|
| 433 |
+
rows = self.conn.execute(
|
| 434 |
+
"""
|
| 435 |
+
SELECT * FROM edges
|
| 436 |
+
WHERE is_active = 1
|
| 437 |
+
ORDER BY edge_id ASC
|
| 438 |
+
"""
|
| 439 |
+
).fetchall()
|
| 440 |
+
return [self._row_to_edge(row) for row in rows]
|
| 441 |
+
|
| 442 |
+
def list_concepts(self) -> list[str]:
|
| 443 |
+
rows = self.conn.execute(
|
| 444 |
+
"""
|
| 445 |
+
SELECT concept_id FROM concepts
|
| 446 |
+
ORDER BY concept_id ASC
|
| 447 |
+
"""
|
| 448 |
+
).fetchall()
|
| 449 |
+
return [str(row["concept_id"]) for row in rows]
|
| 450 |
+
|
| 451 |
+
def iter_outgoing_edges(self, subject: str) -> list[EdgeRecord]:
|
| 452 |
+
rows = self.conn.execute(
|
| 453 |
+
"""
|
| 454 |
+
SELECT * FROM edges
|
| 455 |
+
WHERE subject = ? AND is_active = 1
|
| 456 |
+
ORDER BY edge_id ASC
|
| 457 |
+
""",
|
| 458 |
+
(subject,),
|
| 459 |
+
).fetchall()
|
| 460 |
+
return [self._row_to_edge(row) for row in rows]
|
| 461 |
+
|
| 462 |
+
def find_path(
|
| 463 |
+
self,
|
| 464 |
+
start: str,
|
| 465 |
+
goal: str,
|
| 466 |
+
max_hops: int = 4,
|
| 467 |
+
*,
|
| 468 |
+
include_derived: bool = True,
|
| 469 |
+
) -> list[EdgeRecord] | None:
|
| 470 |
+
if start == goal:
|
| 471 |
+
return []
|
| 472 |
+
queue: deque[tuple[str, list[EdgeRecord]]] = deque([(start, [])])
|
| 473 |
+
visited = {start}
|
| 474 |
+
while queue:
|
| 475 |
+
node, path = queue.popleft()
|
| 476 |
+
if len(path) >= max_hops:
|
| 477 |
+
continue
|
| 478 |
+
for edge in self.iter_outgoing_edges(node):
|
| 479 |
+
if not include_derived and edge.source_kind == "derived":
|
| 480 |
+
continue
|
| 481 |
+
if edge.object == goal:
|
| 482 |
+
return path + [edge]
|
| 483 |
+
if edge.object not in visited:
|
| 484 |
+
visited.add(edge.object)
|
| 485 |
+
queue.append((edge.object, path + [edge]))
|
| 486 |
+
return None
|
| 487 |
+
|
| 488 |
+
def get_contradictions(self, subject: str | None = None, relation: str | None = None) -> list[ContradictionRecord]:
|
| 489 |
+
where_parts: list[str] = []
|
| 490 |
+
params: list[str] = []
|
| 491 |
+
if subject is not None:
|
| 492 |
+
where_parts.append("subject = ?")
|
| 493 |
+
params.append(subject)
|
| 494 |
+
if relation is not None:
|
| 495 |
+
where_parts.append("relation = ?")
|
| 496 |
+
params.append(relation)
|
| 497 |
+
where_sql = f"WHERE {' AND '.join(where_parts)}" if where_parts else ""
|
| 498 |
+
rows = self.conn.execute(
|
| 499 |
+
f"""
|
| 500 |
+
SELECT * FROM contradictions
|
| 501 |
+
{where_sql}
|
| 502 |
+
ORDER BY contradiction_id DESC
|
| 503 |
+
""",
|
| 504 |
+
params,
|
| 505 |
+
).fetchall()
|
| 506 |
+
return [self._row_to_contradiction(row) for row in rows]
|
| 507 |
+
|
| 508 |
+
def close(self) -> None:
|
| 509 |
+
self.conn.close()
|
| 510 |
+
|
| 511 |
+
def commit(self) -> None:
|
| 512 |
+
self.conn.commit()
|
| 513 |
+
|
| 514 |
+
def merge_from_db(self, source_db_path: str | Path, *, fast: bool = False) -> dict[str, int]:
|
| 515 |
+
source = sqlite3.connect(str(source_db_path))
|
| 516 |
+
source.row_factory = sqlite3.Row
|
| 517 |
+
edge_id_map: dict[int, int] = {}
|
| 518 |
+
merged_edges = 0
|
| 519 |
+
merged_concepts = 0
|
| 520 |
+
merged_contradictions = 0
|
| 521 |
+
merged_raw_units = 0
|
| 522 |
+
try:
|
| 523 |
+
for row in source.execute("SELECT concept_id FROM concepts ORDER BY concept_id ASC"):
|
| 524 |
+
before = self.conn.total_changes
|
| 525 |
+
display_name = str(row["display_name"]) if "display_name" in row.keys() else ""
|
| 526 |
+
self._ensure_concept(str(row["concept_id"]), display_name)
|
| 527 |
+
if self.conn.total_changes > before:
|
| 528 |
+
merged_concepts += 1
|
| 529 |
+
|
| 530 |
+
for row in source.execute("SELECT * FROM edges ORDER BY edge_id ASC"):
|
| 531 |
+
original_edge_id = int(row["edge_id"])
|
| 532 |
+
if not fast:
|
| 533 |
+
existing_edge_id = self._find_matching_edge(
|
| 534 |
+
subject=str(row["subject"]),
|
| 535 |
+
relation=str(row["relation"]),
|
| 536 |
+
object_value=str(row["object"]),
|
| 537 |
+
source_kind=str(row["source_kind"]),
|
| 538 |
+
source_text=str(row["source_text"]),
|
| 539 |
+
is_active=bool(row["is_active"]),
|
| 540 |
+
)
|
| 541 |
+
if existing_edge_id is not None:
|
| 542 |
+
edge_id_map[original_edge_id] = existing_edge_id
|
| 543 |
+
continue
|
| 544 |
+
|
| 545 |
+
self._ensure_concept(str(row["subject"]))
|
| 546 |
+
self._ensure_concept(str(row["object"]))
|
| 547 |
+
supports_edge_id = row["supports_edge_id"]
|
| 548 |
+
mapped_support = edge_id_map.get(int(supports_edge_id)) if supports_edge_id is not None else None
|
| 549 |
+
cursor = self.conn.execute(
|
| 550 |
+
"""
|
| 551 |
+
INSERT INTO edges(subject, relation, object, source_kind, source_text, is_active, supports_edge_id)
|
| 552 |
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
| 553 |
+
""",
|
| 554 |
+
(
|
| 555 |
+
str(row["subject"]),
|
| 556 |
+
str(row["relation"]),
|
| 557 |
+
str(row["object"]),
|
| 558 |
+
str(row["source_kind"]),
|
| 559 |
+
str(row["source_text"]),
|
| 560 |
+
int(row["is_active"]),
|
| 561 |
+
mapped_support,
|
| 562 |
+
),
|
| 563 |
+
)
|
| 564 |
+
edge_id = int(cursor.lastrowid)
|
| 565 |
+
edge_id_map[original_edge_id] = edge_id
|
| 566 |
+
merged_edges += 1
|
| 567 |
+
|
| 568 |
+
for row in source.execute("SELECT * FROM contradictions ORDER BY contradiction_id ASC"):
|
| 569 |
+
previous_edge_id = edge_id_map.get(int(row["previous_edge_id"]))
|
| 570 |
+
new_edge_id = edge_id_map.get(int(row["new_edge_id"]))
|
| 571 |
+
if previous_edge_id is None or new_edge_id is None:
|
| 572 |
+
continue
|
| 573 |
+
if not fast:
|
| 574 |
+
if self._contradiction_exists(
|
| 575 |
+
subject=str(row["subject"]),
|
| 576 |
+
relation=str(row["relation"]),
|
| 577 |
+
previous_object=str(row["previous_object"]),
|
| 578 |
+
new_object=str(row["new_object"]),
|
| 579 |
+
previous_edge_id=previous_edge_id,
|
| 580 |
+
new_edge_id=new_edge_id,
|
| 581 |
+
):
|
| 582 |
+
continue
|
| 583 |
+
self.conn.execute(
|
| 584 |
+
"""
|
| 585 |
+
INSERT INTO contradictions(
|
| 586 |
+
subject, relation, previous_object, new_object, previous_edge_id, new_edge_id, source_text
|
| 587 |
+
)
|
| 588 |
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
| 589 |
+
""",
|
| 590 |
+
(
|
| 591 |
+
str(row["subject"]),
|
| 592 |
+
str(row["relation"]),
|
| 593 |
+
str(row["previous_object"]),
|
| 594 |
+
str(row["new_object"]),
|
| 595 |
+
previous_edge_id,
|
| 596 |
+
new_edge_id,
|
| 597 |
+
str(row["source_text"]),
|
| 598 |
+
),
|
| 599 |
+
)
|
| 600 |
+
merged_contradictions += 1
|
| 601 |
+
for row in source.execute("SELECT * FROM raw_units ORDER BY unit_id ASC"):
|
| 602 |
+
self.conn.execute(
|
| 603 |
+
"""
|
| 604 |
+
INSERT INTO raw_units(lane, source, text, normalized_text, is_digested)
|
| 605 |
+
VALUES (?, ?, ?, ?, ?)
|
| 606 |
+
""",
|
| 607 |
+
(
|
| 608 |
+
str(row["lane"]),
|
| 609 |
+
str(row["source"]),
|
| 610 |
+
str(row["text"]),
|
| 611 |
+
str(row["normalized_text"]),
|
| 612 |
+
int(row["is_digested"]) if "is_digested" in row.keys() else 0,
|
| 613 |
+
),
|
| 614 |
+
)
|
| 615 |
+
merged_raw_units += 1
|
| 616 |
+
self.conn.commit()
|
| 617 |
+
if merged_concepts or merged_edges or merged_contradictions or merged_raw_units:
|
| 618 |
+
self._touch()
|
| 619 |
+
finally:
|
| 620 |
+
source.close()
|
| 621 |
+
return {
|
| 622 |
+
"concepts": merged_concepts,
|
| 623 |
+
"edges": merged_edges,
|
| 624 |
+
"contradictions": merged_contradictions,
|
| 625 |
+
"raw_units": merged_raw_units,
|
| 626 |
+
}
|
| 627 |
+
|
| 628 |
+
def _ensure_concept(self, concept_id: str, display_name: str = "") -> None:
|
| 629 |
+
cleaned_display = display_name.strip()
|
| 630 |
+
self.conn.execute(
|
| 631 |
+
"INSERT OR IGNORE INTO concepts(concept_id, display_name) VALUES (?, ?)",
|
| 632 |
+
(concept_id, cleaned_display or concept_id),
|
| 633 |
+
)
|
| 634 |
+
if cleaned_display:
|
| 635 |
+
current = self.conn.execute(
|
| 636 |
+
"SELECT display_name FROM concepts WHERE concept_id = ? LIMIT 1",
|
| 637 |
+
(concept_id,),
|
| 638 |
+
).fetchone()
|
| 639 |
+
current_name = str(current["display_name"]).strip() if current else ""
|
| 640 |
+
if self._prefer_display_name(cleaned_display, current_name):
|
| 641 |
+
self.conn.execute(
|
| 642 |
+
"UPDATE concepts SET display_name = ? WHERE concept_id = ?",
|
| 643 |
+
(cleaned_display, concept_id),
|
| 644 |
+
)
|
| 645 |
+
|
| 646 |
+
def _touch(self) -> None:
|
| 647 |
+
self.mutation_version += 1
|
| 648 |
+
|
| 649 |
+
def _deactivate_conflicting_edges(self, subject: str, relation: str) -> None:
|
| 650 |
+
if relation in {"is_a"}:
|
| 651 |
+
return
|
| 652 |
+
self.conn.execute(
|
| 653 |
+
"""
|
| 654 |
+
UPDATE edges
|
| 655 |
+
SET is_active = 0
|
| 656 |
+
WHERE subject = ? AND relation = ? AND is_active = 1
|
| 657 |
+
""",
|
| 658 |
+
(subject, relation),
|
| 659 |
+
)
|
| 660 |
+
|
| 661 |
+
def _upsert_inverse_edge(
|
| 662 |
+
self,
|
| 663 |
+
*,
|
| 664 |
+
subject: str,
|
| 665 |
+
relation: str,
|
| 666 |
+
object_value: str,
|
| 667 |
+
source_kind: str,
|
| 668 |
+
source_text: str,
|
| 669 |
+
supports_edge_id: int | None,
|
| 670 |
+
) -> None:
|
| 671 |
+
inverse_relation = self.INVERSE_RELATIONS.get(relation)
|
| 672 |
+
if inverse_relation is None:
|
| 673 |
+
return
|
| 674 |
+
self._ensure_concept(object_value)
|
| 675 |
+
self._ensure_concept(subject)
|
| 676 |
+
if inverse_relation not in {"has_instance", "is_a"}:
|
| 677 |
+
self._deactivate_conflicting_edges(object_value, inverse_relation)
|
| 678 |
+
self.conn.execute(
|
| 679 |
+
"""
|
| 680 |
+
INSERT INTO edges(subject, relation, object, source_kind, source_text, is_active, supports_edge_id)
|
| 681 |
+
VALUES (?, ?, ?, ?, ?, 1, ?)
|
| 682 |
+
""",
|
| 683 |
+
(object_value, inverse_relation, subject, source_kind, source_text, supports_edge_id),
|
| 684 |
+
)
|
| 685 |
+
|
| 686 |
+
def _find_matching_edge(
|
| 687 |
+
self,
|
| 688 |
+
*,
|
| 689 |
+
subject: str,
|
| 690 |
+
relation: str,
|
| 691 |
+
object_value: str,
|
| 692 |
+
source_kind: str,
|
| 693 |
+
source_text: str,
|
| 694 |
+
is_active: bool,
|
| 695 |
+
) -> int | None:
|
| 696 |
+
row = self.conn.execute(
|
| 697 |
+
"""
|
| 698 |
+
SELECT edge_id FROM edges
|
| 699 |
+
WHERE subject = ? AND relation = ? AND object = ? AND source_kind = ? AND source_text = ? AND is_active = ?
|
| 700 |
+
ORDER BY edge_id DESC
|
| 701 |
+
LIMIT 1
|
| 702 |
+
""",
|
| 703 |
+
(subject, relation, object_value, source_kind, source_text, int(is_active)),
|
| 704 |
+
).fetchone()
|
| 705 |
+
return int(row["edge_id"]) if row else None
|
| 706 |
+
|
| 707 |
+
def _contradiction_exists(
|
| 708 |
+
self,
|
| 709 |
+
*,
|
| 710 |
+
subject: str,
|
| 711 |
+
relation: str,
|
| 712 |
+
previous_object: str,
|
| 713 |
+
new_object: str,
|
| 714 |
+
previous_edge_id: int,
|
| 715 |
+
new_edge_id: int,
|
| 716 |
+
) -> bool:
|
| 717 |
+
row = self.conn.execute(
|
| 718 |
+
"""
|
| 719 |
+
SELECT contradiction_id FROM contradictions
|
| 720 |
+
WHERE subject = ? AND relation = ? AND previous_object = ? AND new_object = ?
|
| 721 |
+
AND previous_edge_id = ? AND new_edge_id = ?
|
| 722 |
+
LIMIT 1
|
| 723 |
+
""",
|
| 724 |
+
(subject, relation, previous_object, new_object, previous_edge_id, new_edge_id),
|
| 725 |
+
).fetchone()
|
| 726 |
+
return row is not None
|
| 727 |
+
|
| 728 |
+
def get_display_name(self, concept_id: str) -> str:
|
| 729 |
+
row = self.conn.execute(
|
| 730 |
+
"SELECT display_name FROM concepts WHERE concept_id = ? LIMIT 1",
|
| 731 |
+
(concept_id,),
|
| 732 |
+
).fetchone()
|
| 733 |
+
if row and str(row["display_name"]).strip():
|
| 734 |
+
return str(row["display_name"]).strip()
|
| 735 |
+
return concept_id.replace("_", " ")
|
| 736 |
+
|
| 737 |
+
@staticmethod
|
| 738 |
+
def _prefer_display_name(candidate: str, current: str) -> bool:
|
| 739 |
+
if not current:
|
| 740 |
+
return True
|
| 741 |
+
candidate_score = (sum(1 for char in candidate if char.isupper()), len(candidate))
|
| 742 |
+
current_score = (sum(1 for char in current if char.isupper()), len(current))
|
| 743 |
+
return candidate_score > current_score
|
| 744 |
+
|
| 745 |
+
@staticmethod
|
| 746 |
+
def _row_to_edge(row: sqlite3.Row) -> EdgeRecord:
|
| 747 |
+
return EdgeRecord(
|
| 748 |
+
edge_id=int(row["edge_id"]),
|
| 749 |
+
subject=str(row["subject"]),
|
| 750 |
+
relation=str(row["relation"]),
|
| 751 |
+
object=str(row["object"]),
|
| 752 |
+
source_kind=str(row["source_kind"]),
|
| 753 |
+
source_text=str(row["source_text"]),
|
| 754 |
+
is_active=bool(row["is_active"]),
|
| 755 |
+
supports_edge_id=int(row["supports_edge_id"]) if row["supports_edge_id"] is not None else None,
|
| 756 |
+
)
|
| 757 |
+
|
| 758 |
+
@staticmethod
|
| 759 |
+
def _row_to_contradiction(row: sqlite3.Row) -> ContradictionRecord:
|
| 760 |
+
return ContradictionRecord(
|
| 761 |
+
contradiction_id=int(row["contradiction_id"]),
|
| 762 |
+
subject=str(row["subject"]),
|
| 763 |
+
relation=str(row["relation"]),
|
| 764 |
+
previous_object=str(row["previous_object"]),
|
| 765 |
+
new_object=str(row["new_object"]),
|
| 766 |
+
previous_edge_id=int(row["previous_edge_id"]),
|
| 767 |
+
new_edge_id=int(row["new_edge_id"]),
|
| 768 |
+
source_text=str(row["source_text"]),
|
| 769 |
+
)
|
runtime/aethon/rfi_ingest.py
ADDED
|
@@ -0,0 +1,445 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
|
| 5 |
+
from .rfi_ontology import ConceptOntology
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
@dataclass(frozen=True)
|
| 9 |
+
class Triple:
|
| 10 |
+
subject: str
|
| 11 |
+
relation: str
|
| 12 |
+
object: str
|
| 13 |
+
source_text: str
|
| 14 |
+
source_kind: str = "direct_assertion"
|
| 15 |
+
subject_surface: str = ""
|
| 16 |
+
object_surface: str = ""
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class DeterministicTripleExtractor:
|
| 20 |
+
"""Controlled no-weight ingestion for declarative fact sentences."""
|
| 21 |
+
|
| 22 |
+
_GENERIC_BLOCKLIST = {
|
| 23 |
+
"is",
|
| 24 |
+
"are",
|
| 25 |
+
"was",
|
| 26 |
+
"were",
|
| 27 |
+
"be",
|
| 28 |
+
"been",
|
| 29 |
+
"being",
|
| 30 |
+
"do",
|
| 31 |
+
"does",
|
| 32 |
+
"did",
|
| 33 |
+
"have",
|
| 34 |
+
"has",
|
| 35 |
+
"had",
|
| 36 |
+
"say",
|
| 37 |
+
"says",
|
| 38 |
+
"said",
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
def __init__(self, ontology: ConceptOntology | None = None) -> None:
|
| 42 |
+
self.ontology = ontology or ConceptOntology()
|
| 43 |
+
self._patterns: tuple[tuple[str, str, bool], ...] = (
|
| 44 |
+
(" is located in ", "located_in", False),
|
| 45 |
+
(" is in ", "located_in", False),
|
| 46 |
+
(" lives in ", "lives_in", False),
|
| 47 |
+
(" stays in ", "lives_in", False),
|
| 48 |
+
(" resides in ", "lives_in", False),
|
| 49 |
+
(" works in ", "work_in", False),
|
| 50 |
+
(" studies ", "study", False),
|
| 51 |
+
(" now likes ", "like", False),
|
| 52 |
+
(" likes ", "like", False),
|
| 53 |
+
(" now prefers ", "prefer", False),
|
| 54 |
+
(" prefers ", "prefer", False),
|
| 55 |
+
(" chases ", "chase", False),
|
| 56 |
+
(" attacks ", "attack", False),
|
| 57 |
+
(" hunts ", "hunt", False),
|
| 58 |
+
(" uses ", "use", False),
|
| 59 |
+
(" calls ", "call", False),
|
| 60 |
+
(" imports ", "import", False),
|
| 61 |
+
(" depends on ", "depend_on", False),
|
| 62 |
+
(" is used by ", "use", True),
|
| 63 |
+
(" inherits from ", "is_a", False),
|
| 64 |
+
(" extends ", "is_a", False),
|
| 65 |
+
(" solves ", "solve", False),
|
| 66 |
+
(" bought ", "bought", False),
|
| 67 |
+
(" carries ", "carrying", False),
|
| 68 |
+
(" is carrying ", "carrying", False),
|
| 69 |
+
(" visited ", "visited", False),
|
| 70 |
+
(" reached ", "reached", False),
|
| 71 |
+
(" saw ", "saw", False),
|
| 72 |
+
(" returns ", "return", False),
|
| 73 |
+
(" equals ", "equals", False),
|
| 74 |
+
(" is a ", "is_a", False),
|
| 75 |
+
(" is an ", "is_a", False),
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
def extract(self, text: str) -> list[Triple]:
|
| 79 |
+
triples: list[Triple] = []
|
| 80 |
+
for sentence in self._split_sentences(text):
|
| 81 |
+
try:
|
| 82 |
+
triples.extend(self._extract_sentence(sentence))
|
| 83 |
+
except ValueError:
|
| 84 |
+
continue
|
| 85 |
+
return triples
|
| 86 |
+
|
| 87 |
+
def extract_ultra(self, text: str) -> list[Triple]:
|
| 88 |
+
triples: list[Triple] = []
|
| 89 |
+
for sentence in self._split_sentences(text):
|
| 90 |
+
normalized_sentence = self._cleanup(sentence)
|
| 91 |
+
if not normalized_sentence:
|
| 92 |
+
continue
|
| 93 |
+
try:
|
| 94 |
+
triples.extend(self._extract_sentence_ultra(sentence, normalized_sentence))
|
| 95 |
+
except ValueError:
|
| 96 |
+
continue
|
| 97 |
+
return triples
|
| 98 |
+
|
| 99 |
+
def _extract_sentence(self, sentence: str) -> list[Triple]:
|
| 100 |
+
normalized_sentence = self._cleanup(sentence)
|
| 101 |
+
if not normalized_sentence:
|
| 102 |
+
return []
|
| 103 |
+
|
| 104 |
+
triples: list[Triple] = []
|
| 105 |
+
|
| 106 |
+
keep_parts = self._split_multi(normalized_sentence, (" keeps ", " in "))
|
| 107 |
+
if keep_parts is not None:
|
| 108 |
+
subject, obj, loc = keep_parts
|
| 109 |
+
obj = self._strip_leading_articles(obj)
|
| 110 |
+
triples.append(self._make_triple(subject, "keeps", obj, sentence))
|
| 111 |
+
triples.append(self._make_triple(subject, "located_in", loc, sentence))
|
| 112 |
+
return triples
|
| 113 |
+
|
| 114 |
+
watch_parts = self._split_once(normalized_sentence, " was asked to watch ")
|
| 115 |
+
if watch_parts is not None:
|
| 116 |
+
subject, obj = watch_parts
|
| 117 |
+
triples.append(self._make_triple(subject, "watch", self._strip_leading_articles(obj), sentence))
|
| 118 |
+
return triples
|
| 119 |
+
|
| 120 |
+
negative_preference_parts = self._split_once(normalized_sentence, " does not like ")
|
| 121 |
+
if negative_preference_parts is not None and normalized_sentence.endswith(" anymore"):
|
| 122 |
+
subject, obj = negative_preference_parts
|
| 123 |
+
obj = obj[: -len(" anymore")].strip()
|
| 124 |
+
triples.append(
|
| 125 |
+
self._make_triple(
|
| 126 |
+
subject,
|
| 127 |
+
"not_like_anymore",
|
| 128 |
+
obj,
|
| 129 |
+
sentence,
|
| 130 |
+
)
|
| 131 |
+
)
|
| 132 |
+
return triples
|
| 133 |
+
|
| 134 |
+
carry_parts = self._split_multi(normalized_sentence, (" reached ", " carrying "))
|
| 135 |
+
if carry_parts is not None:
|
| 136 |
+
subject, loc, obj = carry_parts
|
| 137 |
+
triples.append(self._make_triple(subject, "reached", loc, sentence))
|
| 138 |
+
triples.append(self._make_triple(subject, "carrying", self._strip_leading_articles(obj), sentence))
|
| 139 |
+
return triples
|
| 140 |
+
|
| 141 |
+
buy_location_parts = self._split_multi(normalized_sentence, (" bought ", " in "))
|
| 142 |
+
if buy_location_parts is None:
|
| 143 |
+
buy_location_parts = self._split_multi(normalized_sentence, (" bought ", " at "))
|
| 144 |
+
if buy_location_parts is not None:
|
| 145 |
+
subject, obj, loc = buy_location_parts
|
| 146 |
+
triples.append(self._make_triple(subject, "bought", obj, sentence))
|
| 147 |
+
triples.append(self._make_triple(subject, "bought_in", loc, sentence))
|
| 148 |
+
return triples
|
| 149 |
+
|
| 150 |
+
later_buy_parts = self._split_multi(normalized_sentence, (" later bought ", " before leaving "))
|
| 151 |
+
if later_buy_parts is not None:
|
| 152 |
+
subject, obj, loc = later_buy_parts
|
| 153 |
+
triples.append(self._make_triple(subject, "bought", obj, sentence))
|
| 154 |
+
triples.append(self._make_triple(subject, "bought_in", loc, sentence))
|
| 155 |
+
return triples
|
| 156 |
+
|
| 157 |
+
for phrase, relation, reverse in self._iter_patterns():
|
| 158 |
+
parts = self._split_once(normalized_sentence, phrase)
|
| 159 |
+
if parts is None:
|
| 160 |
+
continue
|
| 161 |
+
left, right = parts
|
| 162 |
+
subject = right if reverse else left
|
| 163 |
+
obj = left if reverse else right
|
| 164 |
+
if subject and obj:
|
| 165 |
+
triples.append(self._make_triple(subject, relation, obj, sentence))
|
| 166 |
+
return triples
|
| 167 |
+
|
| 168 |
+
generic_locative = self._parse_generic_locative(normalized_sentence)
|
| 169 |
+
if generic_locative is not None and self._is_simple_clause(normalized_sentence):
|
| 170 |
+
subject, verb, obj = generic_locative
|
| 171 |
+
relation = self.ontology.normalize_relation(f"{verb}_in")
|
| 172 |
+
if relation not in self._GENERIC_BLOCKLIST:
|
| 173 |
+
triples.append(self._make_triple(subject, relation, obj, sentence))
|
| 174 |
+
return triples
|
| 175 |
+
|
| 176 |
+
generic_transitive = self._parse_generic_transitive(normalized_sentence)
|
| 177 |
+
if generic_transitive is not None and self._is_simple_clause(normalized_sentence):
|
| 178 |
+
subject, verb, object_value = generic_transitive
|
| 179 |
+
if verb not in self._GENERIC_BLOCKLIST and subject and object_value:
|
| 180 |
+
relation = self.ontology.normalize_relation(verb)
|
| 181 |
+
triples.append(self._make_triple(subject, relation, object_value, sentence))
|
| 182 |
+
return triples
|
| 183 |
+
|
| 184 |
+
assignment = self._parse_assignment(normalized_sentence)
|
| 185 |
+
if assignment is not None:
|
| 186 |
+
left, right = assignment
|
| 187 |
+
triples.append(self._make_triple(left, "equals", right, sentence))
|
| 188 |
+
return triples
|
| 189 |
+
|
| 190 |
+
return_match = self._parse_return_statement(normalized_sentence)
|
| 191 |
+
if return_match is not None:
|
| 192 |
+
name, value = return_match
|
| 193 |
+
triples.append(self._make_triple(name, "return", value, sentence))
|
| 194 |
+
return triples
|
| 195 |
+
|
| 196 |
+
passive_by_match = self._parse_passive_by(normalized_sentence)
|
| 197 |
+
if passive_by_match is not None and self._is_simple_clause(normalized_sentence):
|
| 198 |
+
obj, verb, subject = passive_by_match
|
| 199 |
+
relation = self.ontology.normalize_relation(verb)
|
| 200 |
+
if relation not in self._GENERIC_BLOCKLIST:
|
| 201 |
+
triples.append(self._make_triple(subject, relation, obj, sentence))
|
| 202 |
+
return triples
|
| 203 |
+
|
| 204 |
+
return triples
|
| 205 |
+
|
| 206 |
+
def _extract_sentence_ultra(self, sentence: str, normalized_sentence: str) -> list[Triple]:
|
| 207 |
+
triples: list[Triple] = []
|
| 208 |
+
|
| 209 |
+
for phrase, relation, reverse in self._iter_patterns():
|
| 210 |
+
parts = self._split_once(normalized_sentence, phrase)
|
| 211 |
+
if parts is None:
|
| 212 |
+
continue
|
| 213 |
+
left, right = parts
|
| 214 |
+
subject = right if reverse else left
|
| 215 |
+
obj = left if reverse else right
|
| 216 |
+
if subject and obj:
|
| 217 |
+
triples.append(self._make_triple(subject, relation, obj, sentence))
|
| 218 |
+
return triples
|
| 219 |
+
|
| 220 |
+
assignment = self._parse_assignment(normalized_sentence)
|
| 221 |
+
if assignment is not None:
|
| 222 |
+
left, right = assignment
|
| 223 |
+
triples.append(self._make_triple(left, "equals", right, sentence))
|
| 224 |
+
return triples
|
| 225 |
+
|
| 226 |
+
return_match = self._parse_return_statement(normalized_sentence)
|
| 227 |
+
if return_match is not None:
|
| 228 |
+
name, value = return_match
|
| 229 |
+
triples.append(self._make_triple(name, "return", value, sentence))
|
| 230 |
+
return triples
|
| 231 |
+
|
| 232 |
+
if self._is_simple_clause(normalized_sentence):
|
| 233 |
+
generic_locative = self._parse_generic_locative(normalized_sentence)
|
| 234 |
+
if generic_locative is not None:
|
| 235 |
+
subject, verb, obj = generic_locative
|
| 236 |
+
relation = self.ontology.normalize_relation(f"{verb}_in")
|
| 237 |
+
if relation not in self._GENERIC_BLOCKLIST:
|
| 238 |
+
triples.append(self._make_triple(subject, relation, obj, sentence))
|
| 239 |
+
return triples
|
| 240 |
+
|
| 241 |
+
generic_transitive = self._parse_generic_transitive(normalized_sentence)
|
| 242 |
+
if generic_transitive is not None:
|
| 243 |
+
subject, verb, object_value = generic_transitive
|
| 244 |
+
if verb not in self._GENERIC_BLOCKLIST and subject and object_value:
|
| 245 |
+
relation = self.ontology.normalize_relation(verb)
|
| 246 |
+
triples.append(self._make_triple(subject, relation, object_value, sentence))
|
| 247 |
+
return triples
|
| 248 |
+
|
| 249 |
+
return triples
|
| 250 |
+
|
| 251 |
+
def _iter_patterns(self) -> tuple[tuple[str, str, bool], ...]:
|
| 252 |
+
learned: list[tuple[str, str, bool]] = []
|
| 253 |
+
for phrase, meaning in self.ontology.semantic_lexicon.phrase_alias_map.items():
|
| 254 |
+
if "_" not in meaning:
|
| 255 |
+
continue
|
| 256 |
+
learned.append((f" {phrase} ", self.ontology.normalize_relation(meaning), False))
|
| 257 |
+
return tuple(dict.fromkeys((*self._patterns, *learned)))
|
| 258 |
+
|
| 259 |
+
def _make_triple(self, subject: str, relation: str, obj: str, source_text: str) -> Triple:
|
| 260 |
+
subject_surface = self._recover_surface(subject, source_text)
|
| 261 |
+
object_surface = self._recover_surface(obj, source_text)
|
| 262 |
+
return Triple(
|
| 263 |
+
subject=self.ontology.resolve(subject).concept_id,
|
| 264 |
+
relation=self.ontology.normalize_relation(relation),
|
| 265 |
+
object=self.ontology.resolve(obj).concept_id,
|
| 266 |
+
source_text=source_text.strip(),
|
| 267 |
+
subject_surface=subject_surface,
|
| 268 |
+
object_surface=object_surface,
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
+
@staticmethod
|
| 272 |
+
def _recover_surface(fragment: str, source_text: str) -> str:
|
| 273 |
+
cleaned = fragment.strip()
|
| 274 |
+
if not cleaned:
|
| 275 |
+
return cleaned
|
| 276 |
+
source_lower = source_text.lower()
|
| 277 |
+
fragment_lower = cleaned.lower()
|
| 278 |
+
index = source_lower.find(fragment_lower)
|
| 279 |
+
if index >= 0:
|
| 280 |
+
return source_text[index : index + len(cleaned)].strip()
|
| 281 |
+
return cleaned
|
| 282 |
+
|
| 283 |
+
@staticmethod
|
| 284 |
+
def _split_sentences(text: str) -> list[str]:
|
| 285 |
+
clean = text.replace("\r", "\n")
|
| 286 |
+
parts: list[str] = []
|
| 287 |
+
current: list[str] = []
|
| 288 |
+
for char in clean:
|
| 289 |
+
if char in "\n.!?":
|
| 290 |
+
chunk = "".join(current).strip(" -:;,\t")
|
| 291 |
+
if chunk:
|
| 292 |
+
parts.append(chunk)
|
| 293 |
+
current = []
|
| 294 |
+
continue
|
| 295 |
+
current.append(char)
|
| 296 |
+
chunk = "".join(current).strip(" -:;,\t")
|
| 297 |
+
if chunk:
|
| 298 |
+
parts.append(chunk)
|
| 299 |
+
return parts
|
| 300 |
+
|
| 301 |
+
@staticmethod
|
| 302 |
+
def _cleanup(sentence: str) -> str:
|
| 303 |
+
sentence = sentence.strip()
|
| 304 |
+
lowered = sentence.lower()
|
| 305 |
+
prefixes = (
|
| 306 |
+
"record:",
|
| 307 |
+
"latest record:",
|
| 308 |
+
"correction:",
|
| 309 |
+
"update:",
|
| 310 |
+
"first note:",
|
| 311 |
+
"distractor:",
|
| 312 |
+
"delay note:",
|
| 313 |
+
"target record:",
|
| 314 |
+
"briefing:",
|
| 315 |
+
)
|
| 316 |
+
for prefix in prefixes:
|
| 317 |
+
if lowered.startswith(prefix):
|
| 318 |
+
sentence = sentence[len(prefix) :].strip()
|
| 319 |
+
break
|
| 320 |
+
return " ".join(sentence.split()).lower()
|
| 321 |
+
|
| 322 |
+
@staticmethod
|
| 323 |
+
def _is_simple_clause(sentence: str) -> bool:
|
| 324 |
+
tokens = sentence.split()
|
| 325 |
+
if len(tokens) < 3 or len(tokens) > 12:
|
| 326 |
+
return False
|
| 327 |
+
if any(marker in sentence for marker in (",", ";", " that ", " which ", " because ", " while ", " although ")):
|
| 328 |
+
return False
|
| 329 |
+
return True
|
| 330 |
+
|
| 331 |
+
@staticmethod
|
| 332 |
+
def _split_once(text: str, phrase: str) -> tuple[str, str] | None:
|
| 333 |
+
if phrase not in text:
|
| 334 |
+
return None
|
| 335 |
+
left, right = text.split(phrase, 1)
|
| 336 |
+
left = left.strip()
|
| 337 |
+
right = right.strip()
|
| 338 |
+
if not left or not right:
|
| 339 |
+
return None
|
| 340 |
+
return left, right
|
| 341 |
+
|
| 342 |
+
@classmethod
|
| 343 |
+
def _split_multi(cls, text: str, phrases: tuple[str, ...]) -> tuple[str, ...] | None:
|
| 344 |
+
parts: list[str] = []
|
| 345 |
+
remainder = text
|
| 346 |
+
for phrase in phrases:
|
| 347 |
+
split = cls._split_once(remainder, phrase)
|
| 348 |
+
if split is None:
|
| 349 |
+
return None
|
| 350 |
+
left, remainder = split
|
| 351 |
+
parts.append(left)
|
| 352 |
+
remainder = remainder.strip()
|
| 353 |
+
if not remainder:
|
| 354 |
+
return None
|
| 355 |
+
parts.append(remainder)
|
| 356 |
+
return tuple(parts)
|
| 357 |
+
|
| 358 |
+
@staticmethod
|
| 359 |
+
def _strip_leading_articles(text: str) -> str:
|
| 360 |
+
for article in ("the ", "a ", "an "):
|
| 361 |
+
if text.startswith(article):
|
| 362 |
+
return text[len(article) :].strip()
|
| 363 |
+
return text.strip()
|
| 364 |
+
|
| 365 |
+
def _parse_generic_locative(self, sentence: str) -> tuple[str, str, str] | None:
|
| 366 |
+
tokens = sentence.split()
|
| 367 |
+
prepositions = {"in", "at", "inside", "within"}
|
| 368 |
+
for index, token in enumerate(tokens):
|
| 369 |
+
if token not in prepositions or index < 2 or index == len(tokens) - 1:
|
| 370 |
+
continue
|
| 371 |
+
verb = tokens[index - 1]
|
| 372 |
+
if not verb.isalpha():
|
| 373 |
+
continue
|
| 374 |
+
subject = " ".join(tokens[: index - 1]).strip()
|
| 375 |
+
obj = " ".join(tokens[index + 1 :]).strip()
|
| 376 |
+
if subject and obj:
|
| 377 |
+
return subject, verb, obj
|
| 378 |
+
return None
|
| 379 |
+
|
| 380 |
+
def _parse_generic_transitive(self, sentence: str) -> tuple[str, str, str] | None:
|
| 381 |
+
tokens = sentence.split()
|
| 382 |
+
if len(tokens) < 3:
|
| 383 |
+
return None
|
| 384 |
+
for index, token in enumerate(tokens):
|
| 385 |
+
if not token.isalpha():
|
| 386 |
+
continue
|
| 387 |
+
subject = " ".join(tokens[:index]).strip()
|
| 388 |
+
obj_tokens = tokens[index + 1 :]
|
| 389 |
+
if not subject or not obj_tokens:
|
| 390 |
+
continue
|
| 391 |
+
object_value = " ".join(obj_tokens).strip()
|
| 392 |
+
object_value = self._strip_leading_articles(object_value)
|
| 393 |
+
if object_value:
|
| 394 |
+
return subject, token, object_value
|
| 395 |
+
return None
|
| 396 |
+
|
| 397 |
+
@staticmethod
|
| 398 |
+
def _is_identifier(value: str) -> bool:
|
| 399 |
+
if not value:
|
| 400 |
+
return False
|
| 401 |
+
if value[0] != "_" and not value[0].isalpha():
|
| 402 |
+
return False
|
| 403 |
+
return all(char == "_" or char.isalnum() for char in value)
|
| 404 |
+
|
| 405 |
+
def _parse_assignment(self, sentence: str) -> tuple[str, str] | None:
|
| 406 |
+
if "=" not in sentence or "==" in sentence:
|
| 407 |
+
return None
|
| 408 |
+
left, right = (part.strip() for part in sentence.split("=", 1))
|
| 409 |
+
if not self._is_identifier(left) or not right or right.startswith("="):
|
| 410 |
+
return None
|
| 411 |
+
return left, right
|
| 412 |
+
|
| 413 |
+
def _parse_return_statement(self, sentence: str) -> tuple[str, str] | None:
|
| 414 |
+
tokens = sentence.split()
|
| 415 |
+
if len(tokens) < 3:
|
| 416 |
+
return None
|
| 417 |
+
start = 0
|
| 418 |
+
if tokens[0] in {"def", "function"}:
|
| 419 |
+
if len(tokens) < 4:
|
| 420 |
+
return None
|
| 421 |
+
start = 1
|
| 422 |
+
name = tokens[start]
|
| 423 |
+
verb = tokens[start + 1]
|
| 424 |
+
value = " ".join(tokens[start + 2 :]).strip()
|
| 425 |
+
if not self._is_identifier(name) or verb not in {"return", "returns"} or not value:
|
| 426 |
+
return None
|
| 427 |
+
return name, value
|
| 428 |
+
|
| 429 |
+
def _parse_passive_by(self, sentence: str) -> tuple[str, str, str] | None:
|
| 430 |
+
tokens = sentence.split()
|
| 431 |
+
if len(tokens) < 5:
|
| 432 |
+
return None
|
| 433 |
+
if " is " not in f" {sentence} " or " by " not in f" {sentence} ":
|
| 434 |
+
return None
|
| 435 |
+
left_right = self._split_once(sentence, " is ")
|
| 436 |
+
if left_right is None:
|
| 437 |
+
return None
|
| 438 |
+
obj, tail = left_right
|
| 439 |
+
verb_subject = self._split_once(tail, " by ")
|
| 440 |
+
if verb_subject is None:
|
| 441 |
+
return None
|
| 442 |
+
verb, subject = verb_subject
|
| 443 |
+
if not verb.isalpha() or not subject:
|
| 444 |
+
return None
|
| 445 |
+
return obj, verb, subject
|
runtime/aethon/rfi_interpreter.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
|
| 5 |
+
from .rfi_lexicon import AethonNativeConceptCodec
|
| 6 |
+
from .rfi_math import ExactMathReasoner
|
| 7 |
+
from .rfi_query_forms import NativeQueryFormSet
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@dataclass(frozen=True)
|
| 11 |
+
class ParsedQuery:
|
| 12 |
+
intent: str
|
| 13 |
+
subject: str | None = None
|
| 14 |
+
relation: str | None = None
|
| 15 |
+
object_value: str | None = None
|
| 16 |
+
expression: str | None = None
|
| 17 |
+
attribute: str | None = None
|
| 18 |
+
raw: str = ""
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class NativeQueryInterpreter:
|
| 22 |
+
"""Interprets queries through Aethon's native lexical codec and token structure."""
|
| 23 |
+
|
| 24 |
+
def __init__(
|
| 25 |
+
self,
|
| 26 |
+
codec: AethonNativeConceptCodec,
|
| 27 |
+
query_form_path: str | None = None,
|
| 28 |
+
query_form_payload: list[dict[str, object]] | None = None,
|
| 29 |
+
) -> None:
|
| 30 |
+
self.codec = codec
|
| 31 |
+
self.math = ExactMathReasoner()
|
| 32 |
+
self.query_forms = NativeQueryFormSet(query_form_path, query_form_payload)
|
| 33 |
+
|
| 34 |
+
def parse(self, query: str) -> ParsedQuery | None:
|
| 35 |
+
math_expression = self.math.extract_expression(query)
|
| 36 |
+
if math_expression is not None:
|
| 37 |
+
return ParsedQuery(intent="math_eval", expression=math_expression, raw=query)
|
| 38 |
+
|
| 39 |
+
raw_words = self._clean_query_words(self.codec.ontology.normalize(query).split())
|
| 40 |
+
if not raw_words:
|
| 41 |
+
return None
|
| 42 |
+
|
| 43 |
+
form_parsed = self._parse_from_forms(raw_words, raw=query)
|
| 44 |
+
if form_parsed is not None:
|
| 45 |
+
return form_parsed
|
| 46 |
+
|
| 47 |
+
return None
|
| 48 |
+
|
| 49 |
+
def _clean_query_words(self, words: list[str]) -> list[str]:
|
| 50 |
+
cleaned = list(words)
|
| 51 |
+
filler_words = self.codec.ontology.semantic_lexicon.query_fillers
|
| 52 |
+
while cleaned and cleaned[-1] in filler_words:
|
| 53 |
+
cleaned.pop()
|
| 54 |
+
return cleaned
|
| 55 |
+
|
| 56 |
+
def _parse_from_forms(self, words: list[str], *, raw: str) -> ParsedQuery | None:
|
| 57 |
+
for form in self.query_forms.forms:
|
| 58 |
+
prefix = list(form.prefix)
|
| 59 |
+
if len(words) < len(prefix) or words[: len(prefix)] != prefix:
|
| 60 |
+
continue
|
| 61 |
+
if form.requires and not all(token in words for token in form.requires):
|
| 62 |
+
continue
|
| 63 |
+
subject = self._resolve_form_subject(words, form)
|
| 64 |
+
object_value = self._resolve_form_object(words, form)
|
| 65 |
+
relation = self._resolve_form_relation(words, form)
|
| 66 |
+
if form.intent == "used_emoji":
|
| 67 |
+
return ParsedQuery(intent=form.intent, raw=raw)
|
| 68 |
+
if form.intent in {"what_changed", "has_contradiction", "where_entity", "classify", "plan_first", "plan_next", "plan_previous"} and subject:
|
| 69 |
+
return ParsedQuery(intent=form.intent, subject=subject, raw=raw)
|
| 70 |
+
if form.intent == "story_query" and subject:
|
| 71 |
+
return ParsedQuery(intent=form.intent, subject=subject, object_value=object_value, raw=raw)
|
| 72 |
+
if form.intent == "relation_path" and subject and object_value:
|
| 73 |
+
return ParsedQuery(intent=form.intent, subject=subject, object_value=object_value, raw=raw)
|
| 74 |
+
if form.intent == "keep_location_lookup" and subject and object_value:
|
| 75 |
+
return ParsedQuery(intent=form.intent, subject=subject, object_value=object_value, raw=raw)
|
| 76 |
+
if form.intent == "relation_object" and subject and relation:
|
| 77 |
+
return ParsedQuery(intent=form.intent, subject=subject, relation=relation, raw=raw)
|
| 78 |
+
if form.intent == "relation_subject" and object_value and relation:
|
| 79 |
+
return ParsedQuery(intent=form.intent, relation=relation, object_value=object_value, raw=raw)
|
| 80 |
+
return None
|
| 81 |
+
|
| 82 |
+
def _resolve_form_subject(self, words: list[str], form) -> str | None:
|
| 83 |
+
subject_words: list[str] = []
|
| 84 |
+
if form.subject_mode == "tail":
|
| 85 |
+
subject_words = words[len(form.prefix) :]
|
| 86 |
+
elif form.subject_mode == "tail_without_last":
|
| 87 |
+
subject_words = words[len(form.prefix) : -1]
|
| 88 |
+
elif form.subject_mode == "single_after_prefix":
|
| 89 |
+
if len(words) > len(form.prefix):
|
| 90 |
+
subject_words = [words[len(form.prefix)]]
|
| 91 |
+
elif form.subject_mode == "between_indexes":
|
| 92 |
+
if form.subject_end_anchor and form.subject_end_anchor in words:
|
| 93 |
+
end_index = words.index(form.subject_end_anchor)
|
| 94 |
+
subject_words = words[form.subject_start : end_index]
|
| 95 |
+
elif form.subject_mode == "between_anchors":
|
| 96 |
+
if form.subject_end_anchor and form.subject_end_anchor in words:
|
| 97 |
+
end_index = words.index(form.subject_end_anchor)
|
| 98 |
+
subject_words = words[form.subject_start : end_index]
|
| 99 |
+
if not subject_words:
|
| 100 |
+
return None
|
| 101 |
+
return self.codec.ontology.resolve(" ".join(subject_words)).concept_id
|
| 102 |
+
|
| 103 |
+
def _resolve_form_object(self, words: list[str], form) -> str | None:
|
| 104 |
+
object_words: list[str] = []
|
| 105 |
+
if form.object_mode == "after_anchor":
|
| 106 |
+
if form.object_start_anchor and form.object_start_anchor in words:
|
| 107 |
+
anchor_index = words.index(form.object_start_anchor)
|
| 108 |
+
start_index = anchor_index + 1
|
| 109 |
+
if start_index < len(words) and words[start_index] in self._relation_prepositions():
|
| 110 |
+
start_index += 1
|
| 111 |
+
object_words = words[start_index:]
|
| 112 |
+
elif form.object_mode == "tail_after_prefix":
|
| 113 |
+
object_words = words[len(form.prefix) :]
|
| 114 |
+
elif form.object_mode == "from_relation_words":
|
| 115 |
+
start_index = len(form.prefix) + 1
|
| 116 |
+
if start_index < len(words) and words[start_index] in self._relation_prepositions():
|
| 117 |
+
start_index += 1
|
| 118 |
+
object_words = words[start_index:]
|
| 119 |
+
if not object_words:
|
| 120 |
+
return None
|
| 121 |
+
return self.codec.ontology.resolve(" ".join(object_words)).concept_id
|
| 122 |
+
|
| 123 |
+
def _relation_prepositions(self) -> set[str]:
|
| 124 |
+
return self.codec.ontology.semantic_lexicon.relation_prepositions
|
| 125 |
+
|
| 126 |
+
def _resolve_form_relation(self, words: list[str], form) -> str | None:
|
| 127 |
+
if form.relation_mode == "fixed":
|
| 128 |
+
return form.relation or None
|
| 129 |
+
if form.relation_mode == "from_words":
|
| 130 |
+
start_index = len(form.prefix)
|
| 131 |
+
if form.subject_mode == "single_after_prefix":
|
| 132 |
+
start_index += 1
|
| 133 |
+
return self._relation_from_words(words, start_index=start_index)
|
| 134 |
+
return None
|
| 135 |
+
|
| 136 |
+
def _relation_from_words(self, words: list[str], *, start_index: int) -> str | None:
|
| 137 |
+
if start_index >= len(words):
|
| 138 |
+
return None
|
| 139 |
+
relation = words[start_index]
|
| 140 |
+
if relation in {"is", "are"}:
|
| 141 |
+
return None
|
| 142 |
+
if start_index + 1 < len(words) and words[start_index + 1] in self._relation_prepositions():
|
| 143 |
+
relation = f"{relation}_{words[start_index + 1]}"
|
| 144 |
+
return self.codec.ontology.normalize_relation(relation)
|
runtime/aethon/rfi_lexicon.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
from dataclasses import dataclass
|
| 5 |
+
import json
|
| 6 |
+
|
| 7 |
+
from .rfi_ontology import ConceptOntology
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@dataclass(frozen=True)
|
| 11 |
+
class LexicalUnit:
|
| 12 |
+
surface: str
|
| 13 |
+
normalized: str
|
| 14 |
+
role: str
|
| 15 |
+
features: tuple[str, ...] = ()
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class AethonNativeConceptCodec:
|
| 19 |
+
"""Aethon's graph-native tokenizer/codec for post-weight intelligence."""
|
| 20 |
+
|
| 21 |
+
NAME = "Aethon Native Concept Codec"
|
| 22 |
+
SHORT_NAME = "ANCC"
|
| 23 |
+
|
| 24 |
+
_RELATION_PHRASES = (
|
| 25 |
+
"is located in",
|
| 26 |
+
"does not like anymore",
|
| 27 |
+
"was asked to watch",
|
| 28 |
+
"later bought",
|
| 29 |
+
"bought in",
|
| 30 |
+
"bought at",
|
| 31 |
+
"lives in",
|
| 32 |
+
"stays in",
|
| 33 |
+
"located in",
|
| 34 |
+
"keeps",
|
| 35 |
+
"prefers",
|
| 36 |
+
"likes",
|
| 37 |
+
"chases",
|
| 38 |
+
"attacks",
|
| 39 |
+
"hunts",
|
| 40 |
+
"bought",
|
| 41 |
+
"carries",
|
| 42 |
+
"carrying",
|
| 43 |
+
"visited",
|
| 44 |
+
"reached",
|
| 45 |
+
"saw",
|
| 46 |
+
"returns",
|
| 47 |
+
"equals",
|
| 48 |
+
"is a",
|
| 49 |
+
"is an",
|
| 50 |
+
"is in",
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
_QUERY_WORDS = {"what", "where", "who", "which", "how", "is", "does", "did", "there", "about", "after"}
|
| 54 |
+
_STRUCTURE_WORDS = {"the", "a", "an", "in", "at", "to", "of", "now", "later", "before", "and"}
|
| 55 |
+
_SUFFIX_MAP = {
|
| 56 |
+
"ing": "progressive",
|
| 57 |
+
"ed": "past",
|
| 58 |
+
"er": "agentive",
|
| 59 |
+
"or": "agentive",
|
| 60 |
+
"ous": "property",
|
| 61 |
+
"ly": "adverbial",
|
| 62 |
+
"tion": "abstract_noun",
|
| 63 |
+
"s": "plural_or_third_person",
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
def __init__(self, ontology: ConceptOntology | None = None) -> None:
|
| 67 |
+
self.ontology = ontology or ConceptOntology()
|
| 68 |
+
|
| 69 |
+
def encode(self, text: str) -> list[LexicalUnit]:
|
| 70 |
+
normalized = self.ontology.normalize(text)
|
| 71 |
+
if not normalized:
|
| 72 |
+
return []
|
| 73 |
+
words = normalized.split()
|
| 74 |
+
relation_match = self._extract_relation_chunk(words)
|
| 75 |
+
if relation_match is None:
|
| 76 |
+
return self._encode_segment(normalized, allow_empty=False)
|
| 77 |
+
start, end, phrase = relation_match
|
| 78 |
+
units: list[LexicalUnit] = []
|
| 79 |
+
before = " ".join(words[:start])
|
| 80 |
+
after = " ".join(words[end:])
|
| 81 |
+
units.extend(self._encode_segment(before, allow_empty=True))
|
| 82 |
+
relation = self.ontology.normalize_relation(phrase)
|
| 83 |
+
units.append(LexicalUnit(surface=phrase, normalized=relation, role="relation", features=("typed_edge",)))
|
| 84 |
+
if after:
|
| 85 |
+
units.extend(self.encode(after))
|
| 86 |
+
return units
|
| 87 |
+
|
| 88 |
+
def concept_signature(self, text: str) -> tuple[str, ...]:
|
| 89 |
+
normalized = self.ontology.resolve(text).concept_id
|
| 90 |
+
parts = normalized.split("_")
|
| 91 |
+
features: list[str] = [f"concept:{normalized}"]
|
| 92 |
+
if len(parts) > 1:
|
| 93 |
+
for part in parts:
|
| 94 |
+
features.append(f"compound_part:{part}")
|
| 95 |
+
for suffix, tag in self._SUFFIX_MAP.items():
|
| 96 |
+
if suffix == "s" and normalized.endswith(("os", "is", "us", "ss")):
|
| 97 |
+
continue
|
| 98 |
+
if normalized.endswith(suffix) and len(normalized) > len(suffix) + 1:
|
| 99 |
+
root = normalized[: -len(suffix)]
|
| 100 |
+
features.append(f"root:{root}")
|
| 101 |
+
features.append(f"suffix:{tag}")
|
| 102 |
+
break
|
| 103 |
+
for parent in self.ontology.lift(normalized):
|
| 104 |
+
features.append(f"parent:{parent}")
|
| 105 |
+
return tuple(dict.fromkeys(features))
|
| 106 |
+
|
| 107 |
+
def export_tokens(self, text: str) -> list[dict[str, object]]:
|
| 108 |
+
return [
|
| 109 |
+
{
|
| 110 |
+
"surface": unit.surface,
|
| 111 |
+
"normalized": unit.normalized,
|
| 112 |
+
"role": unit.role,
|
| 113 |
+
"features": list(unit.features),
|
| 114 |
+
}
|
| 115 |
+
for unit in self.encode(text)
|
| 116 |
+
]
|
| 117 |
+
|
| 118 |
+
def _encode_segment(self, text: str, *, allow_empty: bool) -> list[LexicalUnit]:
|
| 119 |
+
normalized = self.ontology.normalize(text)
|
| 120 |
+
if not normalized:
|
| 121 |
+
return [] if allow_empty else []
|
| 122 |
+
units: list[LexicalUnit] = []
|
| 123 |
+
for token in normalized.split():
|
| 124 |
+
if token in self._STRUCTURE_WORDS:
|
| 125 |
+
units.append(LexicalUnit(surface=token, normalized=token, role="structure", features=("grammar",)))
|
| 126 |
+
continue
|
| 127 |
+
if token in self._QUERY_WORDS:
|
| 128 |
+
units.append(LexicalUnit(surface=token, normalized=token, role="query", features=("control",)))
|
| 129 |
+
continue
|
| 130 |
+
if token.isdigit():
|
| 131 |
+
units.append(LexicalUnit(surface=token, normalized=token, role="number", features=("scalar",)))
|
| 132 |
+
continue
|
| 133 |
+
concept = self.ontology.resolve(token)
|
| 134 |
+
units.append(
|
| 135 |
+
LexicalUnit(
|
| 136 |
+
surface=token,
|
| 137 |
+
normalized=concept.concept_id,
|
| 138 |
+
role="concept",
|
| 139 |
+
features=self.concept_signature(token),
|
| 140 |
+
)
|
| 141 |
+
)
|
| 142 |
+
return units
|
| 143 |
+
|
| 144 |
+
def _extract_relation_chunk(self, words: list[str]) -> tuple[int, int, str] | None:
|
| 145 |
+
best: tuple[int, int, str] | None = None
|
| 146 |
+
for phrase in self._relation_phrases():
|
| 147 |
+
phrase_words = phrase.split()
|
| 148 |
+
phrase_len = len(phrase_words)
|
| 149 |
+
if phrase_len == 0 or phrase_len > len(words):
|
| 150 |
+
continue
|
| 151 |
+
for start in range(0, len(words) - phrase_len + 1):
|
| 152 |
+
if words[start : start + phrase_len] == phrase_words:
|
| 153 |
+
candidate = (start, start + phrase_len, phrase)
|
| 154 |
+
if best is None or candidate[0] < best[0] or (candidate[0] == best[0] and phrase_len > (best[1] - best[0])):
|
| 155 |
+
best = candidate
|
| 156 |
+
break
|
| 157 |
+
return best
|
| 158 |
+
|
| 159 |
+
def _relation_phrases(self) -> tuple[str, ...]:
|
| 160 |
+
learned: list[str] = []
|
| 161 |
+
for phrase, meaning in self.ontology.semantic_lexicon.phrase_alias_map.items():
|
| 162 |
+
if "_" not in meaning:
|
| 163 |
+
continue
|
| 164 |
+
learned.append(phrase)
|
| 165 |
+
learned.append(meaning)
|
| 166 |
+
return tuple(dict.fromkeys((*self._RELATION_PHRASES, *learned)))
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def parse_args() -> argparse.Namespace:
|
| 170 |
+
parser = argparse.ArgumentParser(description="Inspect Aethon's native no-weight lexical codec.")
|
| 171 |
+
parser.add_argument("--text", type=str, required=True)
|
| 172 |
+
return parser.parse_args()
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
def main() -> None:
|
| 176 |
+
args = parse_args()
|
| 177 |
+
codec = AethonNativeConceptCodec()
|
| 178 |
+
print(json.dumps(codec.export_tokens(args.text), indent=2))
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
if __name__ == "__main__":
|
| 182 |
+
main()
|
runtime/aethon/rfi_math.py
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import ast
|
| 4 |
+
from dataclasses import dataclass
|
| 5 |
+
from fractions import Fraction
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
@dataclass(frozen=True)
|
| 9 |
+
class MathResult:
|
| 10 |
+
expression: str
|
| 11 |
+
value: str
|
| 12 |
+
proof: tuple[str, ...]
|
| 13 |
+
reasoning: tuple[str, ...]
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class ExactMathReasoner:
|
| 17 |
+
"""Exact arithmetic evaluator for the native no-weight Aethon core."""
|
| 18 |
+
|
| 19 |
+
_NUMBER_WORDS = {
|
| 20 |
+
"zero": 0,
|
| 21 |
+
"one": 1,
|
| 22 |
+
"two": 2,
|
| 23 |
+
"three": 3,
|
| 24 |
+
"four": 4,
|
| 25 |
+
"five": 5,
|
| 26 |
+
"six": 6,
|
| 27 |
+
"seven": 7,
|
| 28 |
+
"eight": 8,
|
| 29 |
+
"nine": 9,
|
| 30 |
+
"ten": 10,
|
| 31 |
+
"eleven": 11,
|
| 32 |
+
"twelve": 12,
|
| 33 |
+
"thirteen": 13,
|
| 34 |
+
"fourteen": 14,
|
| 35 |
+
"fifteen": 15,
|
| 36 |
+
"sixteen": 16,
|
| 37 |
+
"seventeen": 17,
|
| 38 |
+
"eighteen": 18,
|
| 39 |
+
"nineteen": 19,
|
| 40 |
+
"twenty": 20,
|
| 41 |
+
"thirty": 30,
|
| 42 |
+
"forty": 40,
|
| 43 |
+
"fifty": 50,
|
| 44 |
+
"sixty": 60,
|
| 45 |
+
"seventy": 70,
|
| 46 |
+
"eighty": 80,
|
| 47 |
+
"ninety": 90,
|
| 48 |
+
"hundred": 100,
|
| 49 |
+
}
|
| 50 |
+
_NUMBER_CONNECTORS = {"and"}
|
| 51 |
+
_OPERATOR_PHRASES = (
|
| 52 |
+
("multiplied by", "*"),
|
| 53 |
+
("divided by", "/"),
|
| 54 |
+
("plus", "+"),
|
| 55 |
+
("minus", "-"),
|
| 56 |
+
("times", "*"),
|
| 57 |
+
("over", "/"),
|
| 58 |
+
("modulo", "%"),
|
| 59 |
+
("mod", "%"),
|
| 60 |
+
)
|
| 61 |
+
_TRAILING_CHATTER = (
|
| 62 |
+
"please",
|
| 63 |
+
"thanks",
|
| 64 |
+
"thank you",
|
| 65 |
+
"laughing",
|
| 66 |
+
"happy",
|
| 67 |
+
"thinking",
|
| 68 |
+
"by the way",
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
_ALLOWED_BINARY = {
|
| 72 |
+
ast.Add: ("add", lambda left, right: left + right),
|
| 73 |
+
ast.Sub: ("subtract", lambda left, right: left - right),
|
| 74 |
+
ast.Mult: ("multiply", lambda left, right: left * right),
|
| 75 |
+
ast.Div: ("divide", lambda left, right: left / right),
|
| 76 |
+
ast.FloorDiv: ("floor divide", lambda left, right: left // right),
|
| 77 |
+
ast.Mod: ("modulo", lambda left, right: left % right),
|
| 78 |
+
ast.Pow: ("power", lambda left, right: left**right),
|
| 79 |
+
}
|
| 80 |
+
_ALLOWED_UNARY = {
|
| 81 |
+
ast.UAdd: ("keep", lambda value: value),
|
| 82 |
+
ast.USub: ("negate", lambda value: -value),
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
def can_evaluate(self, text: str) -> bool:
|
| 86 |
+
expression = self.extract_expression(text)
|
| 87 |
+
return expression is not None
|
| 88 |
+
|
| 89 |
+
def extract_expression(self, text: str) -> str | None:
|
| 90 |
+
normalized = text.strip()
|
| 91 |
+
lowered = normalized.lower().strip(" ?")
|
| 92 |
+
prefixes = ("what is ", "solve ", "compute ", "evaluate ")
|
| 93 |
+
expression = lowered
|
| 94 |
+
for prefix in prefixes:
|
| 95 |
+
if lowered.startswith(prefix):
|
| 96 |
+
expression = lowered[len(prefix) :]
|
| 97 |
+
break
|
| 98 |
+
expression = expression.strip()
|
| 99 |
+
changed = True
|
| 100 |
+
while changed:
|
| 101 |
+
changed = False
|
| 102 |
+
for suffix in self._TRAILING_CHATTER:
|
| 103 |
+
if expression.endswith(f" {suffix}"):
|
| 104 |
+
expression = expression[: -len(suffix)].strip()
|
| 105 |
+
changed = True
|
| 106 |
+
if not expression:
|
| 107 |
+
return None
|
| 108 |
+
if self._is_symbolic_expression(expression):
|
| 109 |
+
return expression if any(symbol in expression for symbol in "+-*/%") else None
|
| 110 |
+
textual = self._textual_to_expression(expression)
|
| 111 |
+
if textual is None or not any(symbol in textual for symbol in "+-*/%"):
|
| 112 |
+
return None
|
| 113 |
+
return textual
|
| 114 |
+
|
| 115 |
+
def evaluate(self, text: str) -> MathResult | None:
|
| 116 |
+
expression = self.extract_expression(text)
|
| 117 |
+
if expression is None:
|
| 118 |
+
return None
|
| 119 |
+
tree = ast.parse(expression, mode="eval")
|
| 120 |
+
steps: list[str] = []
|
| 121 |
+
value = self._eval_node(tree.body, steps)
|
| 122 |
+
rendered = self._render_value(value)
|
| 123 |
+
proof = (f"math:{expression}={rendered}",)
|
| 124 |
+
reasoning = tuple(f"Step {index}: {step}" for index, step in enumerate(steps, start=1))
|
| 125 |
+
if not reasoning:
|
| 126 |
+
reasoning = (f"Step 1: evaluate {expression} = {rendered}.",)
|
| 127 |
+
return MathResult(expression=expression, value=rendered, proof=proof, reasoning=reasoning)
|
| 128 |
+
|
| 129 |
+
def _eval_node(self, node: ast.AST, steps: list[str]) -> Fraction:
|
| 130 |
+
if isinstance(node, ast.Constant) and isinstance(node.value, (int, float)):
|
| 131 |
+
return Fraction(str(node.value))
|
| 132 |
+
if isinstance(node, ast.Num):
|
| 133 |
+
return Fraction(str(node.n))
|
| 134 |
+
if isinstance(node, ast.BinOp):
|
| 135 |
+
operator = type(node.op)
|
| 136 |
+
if operator not in self._ALLOWED_BINARY:
|
| 137 |
+
raise ValueError(f"Unsupported math operator: {operator.__name__}")
|
| 138 |
+
label, operation = self._ALLOWED_BINARY[operator]
|
| 139 |
+
left = self._eval_node(node.left, steps)
|
| 140 |
+
right = self._eval_node(node.right, steps)
|
| 141 |
+
result = operation(left, right)
|
| 142 |
+
steps.append(
|
| 143 |
+
f"{label} {self._render_value(left)} and {self._render_value(right)} to get {self._render_value(result)}."
|
| 144 |
+
)
|
| 145 |
+
return result
|
| 146 |
+
if isinstance(node, ast.UnaryOp):
|
| 147 |
+
operator = type(node.op)
|
| 148 |
+
if operator not in self._ALLOWED_UNARY:
|
| 149 |
+
raise ValueError(f"Unsupported unary operator: {operator.__name__}")
|
| 150 |
+
label, operation = self._ALLOWED_UNARY[operator]
|
| 151 |
+
value = self._eval_node(node.operand, steps)
|
| 152 |
+
result = operation(value)
|
| 153 |
+
steps.append(f"{label} {self._render_value(value)} to get {self._render_value(result)}.")
|
| 154 |
+
return result
|
| 155 |
+
raise ValueError(f"Unsupported math syntax: {type(node).__name__}")
|
| 156 |
+
|
| 157 |
+
def _textual_to_expression(self, expression: str) -> str | None:
|
| 158 |
+
normalized = f" {expression} "
|
| 159 |
+
for phrase, symbol in self._OPERATOR_PHRASES:
|
| 160 |
+
normalized = normalized.replace(f" {phrase} ", f" {symbol} ")
|
| 161 |
+
tokens = normalized.split()
|
| 162 |
+
converted: list[str] = []
|
| 163 |
+
index = 0
|
| 164 |
+
while index < len(tokens):
|
| 165 |
+
token = tokens[index]
|
| 166 |
+
if token in "+-*/%()":
|
| 167 |
+
converted.append(token)
|
| 168 |
+
index += 1
|
| 169 |
+
continue
|
| 170 |
+
if token in self._NUMBER_WORDS or token in self._NUMBER_CONNECTORS:
|
| 171 |
+
number_tokens: list[str] = []
|
| 172 |
+
while index < len(tokens) and (tokens[index] in self._NUMBER_WORDS or tokens[index] in self._NUMBER_CONNECTORS):
|
| 173 |
+
number_tokens.append(tokens[index])
|
| 174 |
+
index += 1
|
| 175 |
+
value = self._parse_number_words(number_tokens)
|
| 176 |
+
if value is None:
|
| 177 |
+
return None
|
| 178 |
+
converted.append(str(value))
|
| 179 |
+
continue
|
| 180 |
+
if self._is_number_token(token):
|
| 181 |
+
converted.append(token)
|
| 182 |
+
index += 1
|
| 183 |
+
continue
|
| 184 |
+
return None
|
| 185 |
+
return " ".join(converted) if converted else None
|
| 186 |
+
|
| 187 |
+
def _parse_number_words(self, tokens: list[str]) -> int | None:
|
| 188 |
+
cleaned = [token for token in tokens if token not in self._NUMBER_CONNECTORS]
|
| 189 |
+
if not cleaned:
|
| 190 |
+
return None
|
| 191 |
+
total = 0
|
| 192 |
+
current = 0
|
| 193 |
+
for token in cleaned:
|
| 194 |
+
value = self._NUMBER_WORDS.get(token)
|
| 195 |
+
if value is None:
|
| 196 |
+
return None
|
| 197 |
+
if token == "hundred":
|
| 198 |
+
current = max(current, 1) * 100
|
| 199 |
+
else:
|
| 200 |
+
current += value
|
| 201 |
+
total += current
|
| 202 |
+
return total
|
| 203 |
+
|
| 204 |
+
@staticmethod
|
| 205 |
+
def _render_value(value: Fraction) -> str:
|
| 206 |
+
if value.denominator == 1:
|
| 207 |
+
return str(value.numerator)
|
| 208 |
+
return str(float(value))
|
| 209 |
+
|
| 210 |
+
@staticmethod
|
| 211 |
+
def _is_symbolic_expression(expression: str) -> bool:
|
| 212 |
+
allowed = set("0123456789.+-*/%() ")
|
| 213 |
+
return all(char in allowed for char in expression)
|
| 214 |
+
|
| 215 |
+
@staticmethod
|
| 216 |
+
def _is_number_token(token: str) -> bool:
|
| 217 |
+
if not token:
|
| 218 |
+
return False
|
| 219 |
+
dot_seen = False
|
| 220 |
+
for char in token:
|
| 221 |
+
if char == ".":
|
| 222 |
+
if dot_seen:
|
| 223 |
+
return False
|
| 224 |
+
dot_seen = True
|
| 225 |
+
continue
|
| 226 |
+
if not char.isdigit():
|
| 227 |
+
return False
|
| 228 |
+
return any(char.isdigit() for char in token)
|
runtime/aethon/rfi_metrics.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
from dataclasses import asdict, dataclass
|
| 5 |
+
import json
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
import sqlite3
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@dataclass(frozen=True)
|
| 11 |
+
class StructuralCapacityReport:
|
| 12 |
+
structural_capacity: int
|
| 13 |
+
concept_count: int
|
| 14 |
+
explicit_edge_count: int
|
| 15 |
+
abstraction_count: int
|
| 16 |
+
revision_count: int
|
| 17 |
+
raw_unit_count: int
|
| 18 |
+
|
| 19 |
+
def to_metadata(self) -> dict[str, int]:
|
| 20 |
+
return {
|
| 21 |
+
"sc": self.structural_capacity,
|
| 22 |
+
"concept_count": self.concept_count,
|
| 23 |
+
"explicit_edge_count": self.explicit_edge_count,
|
| 24 |
+
"abstraction_count": self.abstraction_count,
|
| 25 |
+
"revision_count": self.revision_count,
|
| 26 |
+
"raw_unit_count": self.raw_unit_count,
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class StructuralCapacityMeter:
|
| 31 |
+
"""Measures Aethon's live structural size instead of frozen parameter count."""
|
| 32 |
+
|
| 33 |
+
@staticmethod
|
| 34 |
+
def from_sqlite(db_path: str | Path) -> StructuralCapacityReport:
|
| 35 |
+
conn = sqlite3.connect(str(db_path))
|
| 36 |
+
try:
|
| 37 |
+
concept_count = StructuralCapacityMeter._count(conn, "SELECT COUNT(*) FROM concepts")
|
| 38 |
+
explicit_edge_count = StructuralCapacityMeter._count(
|
| 39 |
+
conn,
|
| 40 |
+
"SELECT COUNT(*) FROM edges WHERE is_active = 1 AND source_kind != 'derived'",
|
| 41 |
+
)
|
| 42 |
+
abstraction_count = StructuralCapacityMeter._count(
|
| 43 |
+
conn,
|
| 44 |
+
"SELECT COUNT(*) FROM edges WHERE is_active = 1 AND source_kind = 'derived'",
|
| 45 |
+
)
|
| 46 |
+
revision_count = StructuralCapacityMeter._count(conn, "SELECT COUNT(*) FROM contradictions")
|
| 47 |
+
raw_unit_count = StructuralCapacityMeter._count(conn, "SELECT COUNT(*) FROM raw_units")
|
| 48 |
+
finally:
|
| 49 |
+
conn.close()
|
| 50 |
+
return StructuralCapacityReport(
|
| 51 |
+
structural_capacity=concept_count + explicit_edge_count + abstraction_count + revision_count + raw_unit_count,
|
| 52 |
+
concept_count=concept_count,
|
| 53 |
+
explicit_edge_count=explicit_edge_count,
|
| 54 |
+
abstraction_count=abstraction_count,
|
| 55 |
+
revision_count=revision_count,
|
| 56 |
+
raw_unit_count=raw_unit_count,
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
@staticmethod
|
| 60 |
+
def _count(conn: sqlite3.Connection, sql: str) -> int:
|
| 61 |
+
row = conn.execute(sql).fetchone()
|
| 62 |
+
return int(row[0]) if row else 0
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def parse_args() -> argparse.Namespace:
|
| 66 |
+
parser = argparse.ArgumentParser(description="Report Aethon Structural Capacity from a native graph store.")
|
| 67 |
+
parser.add_argument("--graph", type=str, required=True)
|
| 68 |
+
return parser.parse_args()
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def main() -> None:
|
| 72 |
+
args = parse_args()
|
| 73 |
+
report = StructuralCapacityMeter.from_sqlite(args.graph)
|
| 74 |
+
print(json.dumps(asdict(report), indent=2))
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
if __name__ == "__main__":
|
| 78 |
+
main()
|
runtime/aethon/rfi_ontology.py
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Iterable
|
| 6 |
+
|
| 7 |
+
from .rfi_semantics import NativeSemanticLexicon
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def _safe_wordnet():
|
| 11 |
+
try:
|
| 12 |
+
from nltk.corpus import wordnet as wn # type: ignore
|
| 13 |
+
|
| 14 |
+
wn.ensure_loaded()
|
| 15 |
+
return wn
|
| 16 |
+
except Exception:
|
| 17 |
+
return None
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@dataclass(frozen=True)
|
| 21 |
+
class ConceptRecord:
|
| 22 |
+
concept_id: str
|
| 23 |
+
lemma: str
|
| 24 |
+
parents: tuple[str, ...]
|
| 25 |
+
aliases: tuple[str, ...] = ()
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class ConceptOntology:
|
| 29 |
+
"""Ontology-backed concept resolver for the post-weight Aethon core."""
|
| 30 |
+
|
| 31 |
+
_FALLBACK_PARENT_MAP = {
|
| 32 |
+
"bobcat": ("feline", "predator"),
|
| 33 |
+
"cat": ("feline", "animal"),
|
| 34 |
+
"dog": ("canine", "animal"),
|
| 35 |
+
"wolf": ("canine", "predator"),
|
| 36 |
+
"fox": ("canine", "predator"),
|
| 37 |
+
"lion": ("feline", "predator"),
|
| 38 |
+
"zebra": ("prey", "animal"),
|
| 39 |
+
"rabbit": ("prey", "animal"),
|
| 40 |
+
"mouse": ("prey", "animal"),
|
| 41 |
+
"hen": ("prey", "animal"),
|
| 42 |
+
"lagos": ("city", "location"),
|
| 43 |
+
"accra": ("city", "location"),
|
| 44 |
+
"nigeria": ("country", "location"),
|
| 45 |
+
"ghana": ("country", "location"),
|
| 46 |
+
"professor": ("person",),
|
| 47 |
+
"developer": ("person",),
|
| 48 |
+
"teacher": ("person",),
|
| 49 |
+
"student": ("person",),
|
| 50 |
+
"predator": ("animal",),
|
| 51 |
+
"prey": ("animal",),
|
| 52 |
+
"feline": ("animal",),
|
| 53 |
+
"canine": ("animal",),
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
_ALIASES = {
|
| 57 |
+
"likes": "like",
|
| 58 |
+
"liked": "like",
|
| 59 |
+
"does_not_like_anymore": "not_like_anymore",
|
| 60 |
+
"prefers": "prefer",
|
| 61 |
+
"preferred": "prefer",
|
| 62 |
+
"chases": "chase",
|
| 63 |
+
"chased": "chase",
|
| 64 |
+
"attacks": "attack",
|
| 65 |
+
"attacked": "attack",
|
| 66 |
+
"hunts": "hunt",
|
| 67 |
+
"works_in": "work_in",
|
| 68 |
+
"studies": "study",
|
| 69 |
+
"uses": "use",
|
| 70 |
+
"calls": "call",
|
| 71 |
+
"imports": "import",
|
| 72 |
+
"depends_on": "depend_on",
|
| 73 |
+
"solves": "solve",
|
| 74 |
+
"bought": "bought",
|
| 75 |
+
"bought_in": "bought_in",
|
| 76 |
+
"lives_in": "lives_in",
|
| 77 |
+
"visited": "visited",
|
| 78 |
+
"reached": "reached",
|
| 79 |
+
"saw": "saw",
|
| 80 |
+
"carries": "carrying",
|
| 81 |
+
"returns": "return",
|
| 82 |
+
"equals": "equals",
|
| 83 |
+
"located": "locate",
|
| 84 |
+
"located_in": "located_in",
|
| 85 |
+
"is_in": "located_in",
|
| 86 |
+
"is_located_in": "located_in",
|
| 87 |
+
"is": "be",
|
| 88 |
+
"are": "be",
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
def __init__(
|
| 92 |
+
self,
|
| 93 |
+
semantic_lexicon_path: str | Path | None = None,
|
| 94 |
+
semantic_lexicon_payload: list[dict[str, str]] | None = None,
|
| 95 |
+
) -> None:
|
| 96 |
+
self._wn = _safe_wordnet()
|
| 97 |
+
self.semantic_lexicon = NativeSemanticLexicon(semantic_lexicon_path, semantic_lexicon_payload)
|
| 98 |
+
|
| 99 |
+
def expand_semantics(self, text: str) -> str:
|
| 100 |
+
expanded = text
|
| 101 |
+
for emoji, meaning in self.semantic_lexicon.emoji_map.items():
|
| 102 |
+
expanded = expanded.replace(emoji, f" {meaning} ")
|
| 103 |
+
return expanded
|
| 104 |
+
|
| 105 |
+
def extract_emojis(self, text: str) -> list[tuple[str, str]]:
|
| 106 |
+
return self.semantic_lexicon.describe_emojis(text)
|
| 107 |
+
|
| 108 |
+
def normalize(self, text: str) -> str:
|
| 109 |
+
text = self.expand_semantics(text)
|
| 110 |
+
lowered = text.strip().lower()
|
| 111 |
+
chars: list[str] = []
|
| 112 |
+
previous_space = False
|
| 113 |
+
for char in lowered:
|
| 114 |
+
allowed = char.isalnum() or char in {"_", "-", " "}
|
| 115 |
+
next_char = char if allowed else " "
|
| 116 |
+
if next_char == " ":
|
| 117 |
+
if previous_space:
|
| 118 |
+
continue
|
| 119 |
+
previous_space = True
|
| 120 |
+
chars.append(" ")
|
| 121 |
+
continue
|
| 122 |
+
previous_space = False
|
| 123 |
+
chars.append(next_char)
|
| 124 |
+
normalized = "".join(chars).strip()
|
| 125 |
+
normalized = self._apply_aliases(normalized)
|
| 126 |
+
return normalized
|
| 127 |
+
|
| 128 |
+
def normalize_relation(self, relation: str) -> str:
|
| 129 |
+
key = self.normalize(relation).replace(" ", "_")
|
| 130 |
+
aliased = self._ALIASES.get(key)
|
| 131 |
+
if aliased is not None:
|
| 132 |
+
return aliased
|
| 133 |
+
if "_" in key:
|
| 134 |
+
head, tail = key.split("_", 1)
|
| 135 |
+
return f"{self._canonicalize_relation_head(head)}_{tail}"
|
| 136 |
+
return self._canonicalize_relation_head(key)
|
| 137 |
+
|
| 138 |
+
@staticmethod
|
| 139 |
+
def _canonicalize_relation_head(head: str) -> str:
|
| 140 |
+
if head.endswith("ies") and len(head) > 3:
|
| 141 |
+
return head[:-3] + "y"
|
| 142 |
+
if head.endswith("s") and len(head) > 3 and not head.endswith(("ss", "us", "is")):
|
| 143 |
+
return head[:-1]
|
| 144 |
+
return head
|
| 145 |
+
|
| 146 |
+
def resolve(self, text: str) -> ConceptRecord:
|
| 147 |
+
lemma = self.normalize(text)
|
| 148 |
+
lemma = self._drop_leading_article(lemma)
|
| 149 |
+
if not lemma:
|
| 150 |
+
raise ValueError("Cannot resolve an empty concept.")
|
| 151 |
+
|
| 152 |
+
parents = list(dict.fromkeys(self._parents_from_wordnet(lemma) + list(self._FALLBACK_PARENT_MAP.get(lemma, ()))))
|
| 153 |
+
return ConceptRecord(
|
| 154 |
+
concept_id=lemma.replace(" ", "_"),
|
| 155 |
+
lemma=lemma,
|
| 156 |
+
parents=tuple(parents),
|
| 157 |
+
aliases=(lemma,),
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
def lift(self, text: str) -> tuple[str, ...]:
|
| 161 |
+
return self.resolve(text).parents
|
| 162 |
+
|
| 163 |
+
def nearest_shared_parent(self, left: str, right: str) -> str | None:
|
| 164 |
+
left_parents = set(self.lift(left))
|
| 165 |
+
for candidate in self.lift(right):
|
| 166 |
+
if candidate in left_parents:
|
| 167 |
+
return candidate
|
| 168 |
+
return None
|
| 169 |
+
|
| 170 |
+
def _parents_from_wordnet(self, lemma: str) -> list[str]:
|
| 171 |
+
if self._wn is None:
|
| 172 |
+
return []
|
| 173 |
+
synsets = self._wn.synsets(lemma)
|
| 174 |
+
parents: list[str] = []
|
| 175 |
+
for synset in synsets[:3]:
|
| 176 |
+
for hypernym in synset.hypernyms()[:3]:
|
| 177 |
+
name = hypernym.lemmas()[0].name().replace("_", " ").lower()
|
| 178 |
+
if name != lemma:
|
| 179 |
+
parents.append(self.normalize(name).replace(" ", "_"))
|
| 180 |
+
return list(dict.fromkeys(parents))
|
| 181 |
+
|
| 182 |
+
def expand_with_parents(self, concepts: Iterable[str]) -> set[str]:
|
| 183 |
+
expanded: set[str] = set()
|
| 184 |
+
for concept in concepts:
|
| 185 |
+
normalized = self.normalize(concept)
|
| 186 |
+
normalized = self._drop_leading_article(normalized)
|
| 187 |
+
if not normalized:
|
| 188 |
+
continue
|
| 189 |
+
expanded.add(normalized.replace(" ", "_"))
|
| 190 |
+
expanded.update(self.lift(normalized))
|
| 191 |
+
return expanded
|
| 192 |
+
|
| 193 |
+
@staticmethod
|
| 194 |
+
def _drop_leading_article(text: str) -> str:
|
| 195 |
+
words = text.strip().split()
|
| 196 |
+
if words and words[0] in {"the", "a", "an"}:
|
| 197 |
+
return " ".join(words[1:]).strip()
|
| 198 |
+
return text.strip()
|
| 199 |
+
|
| 200 |
+
@staticmethod
|
| 201 |
+
def _replace_word_ci(text: str, target: str, replacement: str) -> str:
|
| 202 |
+
words = text.split()
|
| 203 |
+
replaced: list[str] = []
|
| 204 |
+
target_lower = target.lower()
|
| 205 |
+
for word in words:
|
| 206 |
+
if word.lower() == target_lower:
|
| 207 |
+
replaced.append(replacement)
|
| 208 |
+
else:
|
| 209 |
+
replaced.append(word)
|
| 210 |
+
return " ".join(replaced)
|
| 211 |
+
|
| 212 |
+
def _apply_aliases(self, text: str) -> str:
|
| 213 |
+
words = text.split()
|
| 214 |
+
if not words:
|
| 215 |
+
return text
|
| 216 |
+
|
| 217 |
+
lower_words = [word.lower().strip(" ?!.,;:") for word in words]
|
| 218 |
+
replaced_words: list[str] = []
|
| 219 |
+
index = 0
|
| 220 |
+
phrase_items = sorted(
|
| 221 |
+
self.semantic_lexicon.phrase_alias_map.items(),
|
| 222 |
+
key=lambda item: len(item[0].split()),
|
| 223 |
+
reverse=True,
|
| 224 |
+
)
|
| 225 |
+
while index < len(words):
|
| 226 |
+
matched = False
|
| 227 |
+
for phrase, meaning in phrase_items:
|
| 228 |
+
phrase_words = phrase.split()
|
| 229 |
+
end_index = index + len(phrase_words)
|
| 230 |
+
if end_index > len(words):
|
| 231 |
+
continue
|
| 232 |
+
if lower_words[index:end_index] == phrase_words:
|
| 233 |
+
replaced_words.extend(meaning.split())
|
| 234 |
+
index = end_index
|
| 235 |
+
matched = True
|
| 236 |
+
break
|
| 237 |
+
if matched:
|
| 238 |
+
continue
|
| 239 |
+
token = lower_words[index]
|
| 240 |
+
replaced_words.extend(self.semantic_lexicon.alias_map.get(token, token).split())
|
| 241 |
+
index += 1
|
| 242 |
+
return " ".join(replaced_words)
|
runtime/aethon/rfi_query.py
ADDED
|
@@ -0,0 +1,1079 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
import difflib
|
| 5 |
+
from collections import deque
|
| 6 |
+
|
| 7 |
+
from .rfi_abstraction import AbstractionEngine
|
| 8 |
+
from .rfi_graph import EdgeRecord, RelationalGraphStore
|
| 9 |
+
from .rfi_interpreter import NativeQueryInterpreter, ParsedQuery
|
| 10 |
+
from .rfi_lexicon import AethonNativeConceptCodec
|
| 11 |
+
from .rfi_math import ExactMathReasoner
|
| 12 |
+
from .rfi_ontology import ConceptOntology
|
| 13 |
+
from .rfi_query_forms import NativeQueryFormSet
|
| 14 |
+
from .rfi_reasoner import StructuralReasoner
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
@dataclass(frozen=True)
|
| 18 |
+
class QueryResult:
|
| 19 |
+
answer: str
|
| 20 |
+
proof: tuple[str, ...]
|
| 21 |
+
confidence: float
|
| 22 |
+
mode: str
|
| 23 |
+
reasoning: tuple[str, ...] = ()
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class ProofQueryEngine:
|
| 27 |
+
"""Proof-backed query engine over the explicit Aethon relation graph."""
|
| 28 |
+
|
| 29 |
+
_SALIENT_CLASSES = {
|
| 30 |
+
"predator",
|
| 31 |
+
"prey",
|
| 32 |
+
"person",
|
| 33 |
+
"city",
|
| 34 |
+
"country",
|
| 35 |
+
"teacher",
|
| 36 |
+
"student",
|
| 37 |
+
"developer",
|
| 38 |
+
"professor",
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
_PROTECTED_QUERY_TOKENS = {
|
| 42 |
+
"a",
|
| 43 |
+
"about",
|
| 44 |
+
"after",
|
| 45 |
+
"and",
|
| 46 |
+
"animal",
|
| 47 |
+
"are",
|
| 48 |
+
"asked",
|
| 49 |
+
"at",
|
| 50 |
+
"bought",
|
| 51 |
+
"buy",
|
| 52 |
+
"carrying",
|
| 53 |
+
"changed",
|
| 54 |
+
"chases",
|
| 55 |
+
"color",
|
| 56 |
+
"come",
|
| 57 |
+
"comes",
|
| 58 |
+
"contradiction",
|
| 59 |
+
"continue",
|
| 60 |
+
"did",
|
| 61 |
+
"does",
|
| 62 |
+
"emoji",
|
| 63 |
+
"first",
|
| 64 |
+
"happen",
|
| 65 |
+
"happens",
|
| 66 |
+
"how",
|
| 67 |
+
"i",
|
| 68 |
+
"in",
|
| 69 |
+
"is",
|
| 70 |
+
"keep",
|
| 71 |
+
"like",
|
| 72 |
+
"live",
|
| 73 |
+
"now",
|
| 74 |
+
"next",
|
| 75 |
+
"object",
|
| 76 |
+
"plan",
|
| 77 |
+
"prefer",
|
| 78 |
+
"related",
|
| 79 |
+
"return",
|
| 80 |
+
"schedule",
|
| 81 |
+
"should",
|
| 82 |
+
"story",
|
| 83 |
+
"there",
|
| 84 |
+
"tell",
|
| 85 |
+
"to",
|
| 86 |
+
"use",
|
| 87 |
+
"watch",
|
| 88 |
+
"what",
|
| 89 |
+
"where",
|
| 90 |
+
"which",
|
| 91 |
+
"who",
|
| 92 |
+
"your",
|
| 93 |
+
"you",
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
def __init__(
|
| 97 |
+
self,
|
| 98 |
+
graph: RelationalGraphStore,
|
| 99 |
+
ontology: ConceptOntology | None = None,
|
| 100 |
+
abstraction: AbstractionEngine | None = None,
|
| 101 |
+
reasoner: StructuralReasoner | None = None,
|
| 102 |
+
query_forms: NativeQueryFormSet | None = None,
|
| 103 |
+
) -> None:
|
| 104 |
+
self.graph = graph
|
| 105 |
+
self.ontology = ontology or ConceptOntology()
|
| 106 |
+
self.abstraction = abstraction or AbstractionEngine(graph, self.ontology)
|
| 107 |
+
self.reasoner = reasoner or StructuralReasoner(graph)
|
| 108 |
+
self.interpreter = NativeQueryInterpreter(
|
| 109 |
+
AethonNativeConceptCodec(self.ontology),
|
| 110 |
+
query_form_payload=query_forms.to_payload() if query_forms is not None else None,
|
| 111 |
+
)
|
| 112 |
+
self.math = ExactMathReasoner()
|
| 113 |
+
self._reasoned_version = self.graph.mutation_version
|
| 114 |
+
self._query_vocabulary_cache: list[str] | None = None
|
| 115 |
+
self._query_vocabulary_version = self.graph.mutation_version
|
| 116 |
+
|
| 117 |
+
def answer(self, query: str) -> QueryResult | None:
|
| 118 |
+
original_query = query
|
| 119 |
+
query = self.ontology.expand_semantics(query)
|
| 120 |
+
query, corrections = self._soft_correct_query(query)
|
| 121 |
+
query = self._normalize_self_reference(query)
|
| 122 |
+
query, later_corrections = self._soft_correct_query(query)
|
| 123 |
+
corrections.extend(later_corrections)
|
| 124 |
+
surface_normalized = " ".join(query.lower().replace("?", " ").replace("!", " ").replace(".", " ").split())
|
| 125 |
+
metadata = self._answer_metadata_value(surface_normalized)
|
| 126 |
+
if metadata is not None:
|
| 127 |
+
return self._with_query_awareness(metadata, original_query, corrections)
|
| 128 |
+
normalized = self.ontology.normalize(query)
|
| 129 |
+
if not normalized:
|
| 130 |
+
return None
|
| 131 |
+
if self._reasoned_version != self.graph.mutation_version:
|
| 132 |
+
self._reasoned_version = self.graph.mutation_version
|
| 133 |
+
metadata = self._answer_metadata_value(normalized)
|
| 134 |
+
if metadata is not None:
|
| 135 |
+
return self._with_query_awareness(metadata, original_query, corrections)
|
| 136 |
+
keep_lookup = self._answer_keep_location_from_query(normalized)
|
| 137 |
+
if keep_lookup is not None:
|
| 138 |
+
return self._with_query_awareness(keep_lookup, original_query, corrections)
|
| 139 |
+
parsed = self.interpreter.parse(query)
|
| 140 |
+
if parsed is not None:
|
| 141 |
+
interpreted = self._answer_interpreted(parsed)
|
| 142 |
+
if interpreted is not None:
|
| 143 |
+
return self._with_query_awareness(interpreted, original_query, corrections)
|
| 144 |
+
return None
|
| 145 |
+
|
| 146 |
+
def _answer_used_emoji(self, raw_query: str) -> QueryResult | None:
|
| 147 |
+
emojis = self.ontology.extract_emojis(raw_query)
|
| 148 |
+
if not emojis:
|
| 149 |
+
return QueryResult(
|
| 150 |
+
answer="none",
|
| 151 |
+
proof=("emoji:none",),
|
| 152 |
+
confidence=1.0,
|
| 153 |
+
mode="direct",
|
| 154 |
+
reasoning=("I do not find any emoji in your message.",),
|
| 155 |
+
)
|
| 156 |
+
parts = [f"{emoji} ({meaning})" for emoji, meaning in emojis]
|
| 157 |
+
return QueryResult(
|
| 158 |
+
answer=", ".join(parts),
|
| 159 |
+
proof=tuple(f"emoji:{emoji}->{meaning}" for emoji, meaning in emojis),
|
| 160 |
+
confidence=1.0,
|
| 161 |
+
mode="direct",
|
| 162 |
+
reasoning=tuple(f"I find {emoji} and interpret it as {meaning}." for emoji, meaning in emojis),
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
def _answer_metadata_value(self, query: str) -> QueryResult | None:
|
| 166 |
+
subject = None
|
| 167 |
+
if "tokenizer" in query and ("aethon" in query or "your" in query):
|
| 168 |
+
subject = "aethon_tokenizer"
|
| 169 |
+
elif "size unit" in query and ("aethon" in query or "your" in query):
|
| 170 |
+
subject = "aethon_size_unit"
|
| 171 |
+
if subject is not None:
|
| 172 |
+
equals = self._best_edge(subject, "equals")
|
| 173 |
+
if equals is None:
|
| 174 |
+
return None
|
| 175 |
+
return QueryResult(
|
| 176 |
+
answer=self.graph.get_display_name(equals.object).replace("_", " ").lower(),
|
| 177 |
+
proof=(self._edge_to_proof(equals),),
|
| 178 |
+
confidence=1.0,
|
| 179 |
+
mode="direct",
|
| 180 |
+
reasoning=self._reasoning_from_proof((self._edge_to_proof(equals),)),
|
| 181 |
+
)
|
| 182 |
+
return None
|
| 183 |
+
|
| 184 |
+
def _answer_interpreted(self, parsed: ParsedQuery) -> QueryResult | None:
|
| 185 |
+
if parsed.intent == "used_emoji":
|
| 186 |
+
return self._answer_used_emoji(parsed.raw)
|
| 187 |
+
if parsed.intent == "what_changed" and parsed.subject:
|
| 188 |
+
return self._answer_what_changed_subject(parsed.subject)
|
| 189 |
+
if parsed.intent == "has_contradiction" and parsed.subject:
|
| 190 |
+
return self._answer_has_contradiction_subject(parsed.subject)
|
| 191 |
+
if parsed.intent == "math_eval" and parsed.expression:
|
| 192 |
+
result = self.math.evaluate(parsed.expression)
|
| 193 |
+
if result is None:
|
| 194 |
+
return None
|
| 195 |
+
return QueryResult(
|
| 196 |
+
answer=result.value,
|
| 197 |
+
proof=result.proof,
|
| 198 |
+
confidence=1.0,
|
| 199 |
+
mode="derived",
|
| 200 |
+
reasoning=result.reasoning,
|
| 201 |
+
)
|
| 202 |
+
if parsed.intent == "plan_first" and parsed.subject:
|
| 203 |
+
return self._answer_plan_first_subject(parsed.subject)
|
| 204 |
+
if parsed.intent == "plan_next" and parsed.subject:
|
| 205 |
+
return self._answer_plan_next_anchor(parsed.subject)
|
| 206 |
+
if parsed.intent == "plan_previous" and parsed.subject:
|
| 207 |
+
return self._answer_plan_previous_target(parsed.subject)
|
| 208 |
+
if parsed.intent == "story_query" and parsed.subject:
|
| 209 |
+
return self._answer_story_subject(parsed.subject, anchor=parsed.object_value or "")
|
| 210 |
+
if parsed.intent == "where_entity" and parsed.subject:
|
| 211 |
+
location = self._direct_or_abstract(parsed.subject, "located_in")
|
| 212 |
+
if location is not None:
|
| 213 |
+
return location
|
| 214 |
+
carried = self._infer_carried_object_location(parsed.subject)
|
| 215 |
+
if carried is not None:
|
| 216 |
+
return carried
|
| 217 |
+
return None
|
| 218 |
+
if parsed.intent == "classify" and parsed.subject:
|
| 219 |
+
equals = self._best_edge(parsed.subject, "equals")
|
| 220 |
+
if equals is not None:
|
| 221 |
+
return QueryResult(
|
| 222 |
+
answer=equals.object,
|
| 223 |
+
proof=(self._edge_to_proof(equals),),
|
| 224 |
+
confidence=1.0,
|
| 225 |
+
mode="derived" if equals.source_kind == "derived" else "direct",
|
| 226 |
+
reasoning=self._reasoning_from_proof((self._edge_to_proof(equals),)),
|
| 227 |
+
)
|
| 228 |
+
direct = self.graph.get_active_edge(parsed.subject, "is_a")
|
| 229 |
+
if direct is not None:
|
| 230 |
+
return QueryResult(
|
| 231 |
+
answer=direct.object,
|
| 232 |
+
proof=(self._edge_to_proof(direct),),
|
| 233 |
+
confidence=1.0,
|
| 234 |
+
mode="direct",
|
| 235 |
+
reasoning=self._reasoning_from_proof((self._edge_to_proof(direct),)),
|
| 236 |
+
)
|
| 237 |
+
parents = self.ontology.lift(parsed.subject)
|
| 238 |
+
if parents:
|
| 239 |
+
chosen_parent = self._select_ontology_parent(tuple(parents))
|
| 240 |
+
return QueryResult(
|
| 241 |
+
answer=chosen_parent,
|
| 242 |
+
proof=(f"ontology:{parsed.subject}->is_a->{chosen_parent}",),
|
| 243 |
+
confidence=0.7,
|
| 244 |
+
mode="ontology",
|
| 245 |
+
reasoning=self._reasoning_from_proof((f"ontology:{parsed.subject}->is_a->{chosen_parent}",)),
|
| 246 |
+
)
|
| 247 |
+
if parsed.intent == "relation_object" and parsed.subject and parsed.relation:
|
| 248 |
+
if parsed.raw.lower().startswith("where does ") and " keep " in parsed.raw.lower():
|
| 249 |
+
keep_object = self._object_after_phrase(parsed.raw, " keep ")
|
| 250 |
+
if keep_object is not None:
|
| 251 |
+
keep_location = self._answer_keep_location_lookup(parsed.subject, keep_object)
|
| 252 |
+
if keep_location is not None:
|
| 253 |
+
return keep_location
|
| 254 |
+
if parsed.relation == "return":
|
| 255 |
+
value = self._direct_or_abstract(parsed.subject, "return_value")
|
| 256 |
+
if value is not None:
|
| 257 |
+
return value
|
| 258 |
+
return self._direct_or_abstract(parsed.subject, parsed.relation)
|
| 259 |
+
if parsed.intent == "keep_location_lookup" and parsed.subject and parsed.object_value:
|
| 260 |
+
return self._answer_keep_location_lookup(parsed.subject, parsed.object_value)
|
| 261 |
+
if parsed.intent == "relation_subject" and parsed.object_value and parsed.relation:
|
| 262 |
+
direct_subjects = self.graph.get_subjects(parsed.relation, parsed.object_value)
|
| 263 |
+
if direct_subjects:
|
| 264 |
+
edge = direct_subjects[0]
|
| 265 |
+
return QueryResult(
|
| 266 |
+
answer=edge.subject,
|
| 267 |
+
proof=(self._edge_to_proof(edge),),
|
| 268 |
+
confidence=1.0,
|
| 269 |
+
mode="direct",
|
| 270 |
+
reasoning=self._reasoning_from_proof((self._edge_to_proof(edge),)),
|
| 271 |
+
)
|
| 272 |
+
hierarchical_subject = self._subject_from_object_hierarchy(parsed.relation, parsed.object_value)
|
| 273 |
+
if hierarchical_subject is not None:
|
| 274 |
+
return hierarchical_subject
|
| 275 |
+
if parsed.intent == "relation_path" and parsed.subject and parsed.object_value:
|
| 276 |
+
resolved_path = self._resolve_relation_path(parsed.subject, parsed.object_value)
|
| 277 |
+
if resolved_path is not None:
|
| 278 |
+
answer, proof, confidence = resolved_path
|
| 279 |
+
return QueryResult(answer=answer, proof=proof, confidence=confidence, mode="path", reasoning=self._reasoning_from_proof(proof))
|
| 280 |
+
return None
|
| 281 |
+
|
| 282 |
+
def _answer_how_related_pair(self, left: str, right: str) -> QueryResult | None:
|
| 283 |
+
resolved_path = self._resolve_relation_path(left, right)
|
| 284 |
+
if resolved_path is not None:
|
| 285 |
+
answer, proof, confidence = resolved_path
|
| 286 |
+
return QueryResult(answer=answer, proof=proof, confidence=confidence, mode="path", reasoning=self._reasoning_from_proof(proof))
|
| 287 |
+
left_parents = self._concept_hierarchy(left)
|
| 288 |
+
right_parents = self._concept_hierarchy(right)
|
| 289 |
+
shared = next((parent for parent in left_parents if parent in set(right_parents)), None)
|
| 290 |
+
if shared is not None:
|
| 291 |
+
return QueryResult(
|
| 292 |
+
answer=shared,
|
| 293 |
+
proof=(f"ontology:{left}->is_a->{shared}", f"ontology:{right}->is_a->{shared}"),
|
| 294 |
+
confidence=0.7,
|
| 295 |
+
mode="path",
|
| 296 |
+
reasoning=self._reasoning_from_proof((f"ontology:{left}->is_a->{shared}", f"ontology:{right}->is_a->{shared}")),
|
| 297 |
+
)
|
| 298 |
+
return None
|
| 299 |
+
|
| 300 |
+
def _answer_what_changed_subject(self, subject: str) -> QueryResult | None:
|
| 301 |
+
contradictions = self.graph.get_contradictions(subject=subject)
|
| 302 |
+
if not contradictions:
|
| 303 |
+
return None
|
| 304 |
+
latest = contradictions[0]
|
| 305 |
+
return QueryResult(
|
| 306 |
+
answer=f"{latest.relation}:{latest.previous_object}->{latest.new_object}",
|
| 307 |
+
proof=(
|
| 308 |
+
f"revision:{latest.subject}-[{latest.relation}]->{latest.previous_object}",
|
| 309 |
+
f"revision:{latest.subject}-[{latest.relation}]->{latest.new_object}",
|
| 310 |
+
),
|
| 311 |
+
confidence=1.0,
|
| 312 |
+
mode="revision",
|
| 313 |
+
reasoning=self._reasoning_from_proof((
|
| 314 |
+
f"revision:{latest.subject}-[{latest.relation}]->{latest.previous_object}",
|
| 315 |
+
f"revision:{latest.subject}-[{latest.relation}]->{latest.new_object}",
|
| 316 |
+
)),
|
| 317 |
+
)
|
| 318 |
+
|
| 319 |
+
def _answer_has_contradiction_subject(self, subject: str) -> QueryResult | None:
|
| 320 |
+
contradictions = self.graph.get_contradictions(subject=subject)
|
| 321 |
+
if not contradictions:
|
| 322 |
+
return QueryResult(
|
| 323 |
+
answer="no",
|
| 324 |
+
proof=(f"no_contradiction:{subject}",),
|
| 325 |
+
confidence=1.0,
|
| 326 |
+
mode="direct",
|
| 327 |
+
reasoning=(f"I do not find any stored contradiction for {self.graph.get_display_name(subject)}.",),
|
| 328 |
+
)
|
| 329 |
+
latest = contradictions[0]
|
| 330 |
+
return QueryResult(
|
| 331 |
+
answer="yes",
|
| 332 |
+
proof=(
|
| 333 |
+
f"contradiction:{latest.subject}-[{latest.relation}]->{latest.previous_object}",
|
| 334 |
+
f"contradiction:{latest.subject}-[{latest.relation}]->{latest.new_object}",
|
| 335 |
+
),
|
| 336 |
+
confidence=1.0,
|
| 337 |
+
mode="contradiction",
|
| 338 |
+
reasoning=self._reasoning_from_proof((
|
| 339 |
+
f"contradiction:{latest.subject}-[{latest.relation}]->{latest.previous_object}",
|
| 340 |
+
f"contradiction:{latest.subject}-[{latest.relation}]->{latest.new_object}",
|
| 341 |
+
)),
|
| 342 |
+
)
|
| 343 |
+
|
| 344 |
+
def _answer_keep_location_lookup(self, subject: str, object_value: str) -> QueryResult | None:
|
| 345 |
+
keep_edge = self.graph.get_active_edge(subject, "keep")
|
| 346 |
+
if keep_edge is None:
|
| 347 |
+
keep_edge = self.graph.get_active_edge(subject, "keeps")
|
| 348 |
+
location_edges = self._location_candidates(subject)
|
| 349 |
+
if keep_edge is None or not location_edges or keep_edge.object != object_value:
|
| 350 |
+
return None
|
| 351 |
+
location_edge = max(location_edges, key=self._edge_rank)
|
| 352 |
+
return QueryResult(
|
| 353 |
+
answer=location_edge.object,
|
| 354 |
+
proof=(
|
| 355 |
+
self._edge_to_proof(keep_edge),
|
| 356 |
+
self._edge_to_proof(location_edge),
|
| 357 |
+
f"compose:{subject}-[keeps]->{object_value}; {subject}-[located_in]->{location_edge.object}",
|
| 358 |
+
),
|
| 359 |
+
confidence=1.0,
|
| 360 |
+
mode="composed",
|
| 361 |
+
reasoning=self._reasoning_from_proof((
|
| 362 |
+
self._edge_to_proof(keep_edge),
|
| 363 |
+
self._edge_to_proof(location_edge),
|
| 364 |
+
f"compose:{subject}-[keeps]->{object_value}; {subject}-[located_in]->{location_edge.object}",
|
| 365 |
+
)),
|
| 366 |
+
)
|
| 367 |
+
|
| 368 |
+
def _answer_keep_location_from_query(self, query: str) -> QueryResult | None:
|
| 369 |
+
tokens = query.split()
|
| 370 |
+
if len(tokens) < 4 or tokens[0] != "where" or "keep" not in tokens:
|
| 371 |
+
return None
|
| 372 |
+
keep_index = tokens.index("keep")
|
| 373 |
+
subject_tokens = [token for token in tokens[1:keep_index] if token not in {"does", "is", "the", "a", "an"}]
|
| 374 |
+
object_tokens = [token for token in tokens[keep_index + 1 :] if token not in {"the", "a", "an"}]
|
| 375 |
+
if not subject_tokens or not object_tokens:
|
| 376 |
+
return None
|
| 377 |
+
subject = self.ontology.resolve(" ".join(subject_tokens)).concept_id
|
| 378 |
+
object_value = self.ontology.resolve(" ".join(object_tokens)).concept_id
|
| 379 |
+
return self._answer_keep_location_lookup(subject, object_value)
|
| 380 |
+
|
| 381 |
+
def _answer_plan_first_subject(self, target: str) -> QueryResult | None:
|
| 382 |
+
proof: list[str] = []
|
| 383 |
+
current = target
|
| 384 |
+
seen = {current}
|
| 385 |
+
while True:
|
| 386 |
+
edge = self._best_edge(current, "depend_on")
|
| 387 |
+
if edge is None or edge.object in seen:
|
| 388 |
+
break
|
| 389 |
+
proof.append(self._edge_to_proof(edge))
|
| 390 |
+
current = edge.object
|
| 391 |
+
seen.add(current)
|
| 392 |
+
if not proof:
|
| 393 |
+
return None
|
| 394 |
+
return QueryResult(
|
| 395 |
+
answer=current,
|
| 396 |
+
proof=tuple(proof),
|
| 397 |
+
confidence=1.0,
|
| 398 |
+
mode="plan",
|
| 399 |
+
reasoning=self._reasoning_from_proof(tuple(proof)),
|
| 400 |
+
)
|
| 401 |
+
|
| 402 |
+
def _answer_plan_next_anchor(self, anchor: str) -> QueryResult | None:
|
| 403 |
+
dependents = self.graph.get_subjects("depend_on", anchor)
|
| 404 |
+
if not dependents:
|
| 405 |
+
return None
|
| 406 |
+
edge = max(dependents, key=self._edge_rank)
|
| 407 |
+
return QueryResult(
|
| 408 |
+
answer=edge.subject,
|
| 409 |
+
proof=(self._edge_to_proof(edge),),
|
| 410 |
+
confidence=1.0,
|
| 411 |
+
mode="plan",
|
| 412 |
+
reasoning=self._reasoning_from_proof((self._edge_to_proof(edge),)),
|
| 413 |
+
)
|
| 414 |
+
|
| 415 |
+
def _answer_plan_previous_target(self, target: str) -> QueryResult | None:
|
| 416 |
+
edge = self._best_edge(target, "depend_on")
|
| 417 |
+
if edge is None:
|
| 418 |
+
return None
|
| 419 |
+
return QueryResult(
|
| 420 |
+
answer=edge.object,
|
| 421 |
+
proof=(self._edge_to_proof(edge),),
|
| 422 |
+
confidence=1.0,
|
| 423 |
+
mode="plan",
|
| 424 |
+
reasoning=self._reasoning_from_proof((self._edge_to_proof(edge),)),
|
| 425 |
+
)
|
| 426 |
+
|
| 427 |
+
def _answer_story_subject(self, subject: str, *, anchor: str = "") -> QueryResult | None:
|
| 428 |
+
allowed = {
|
| 429 |
+
"approved",
|
| 430 |
+
"bought",
|
| 431 |
+
"bought_in",
|
| 432 |
+
"carrying",
|
| 433 |
+
"depend_on",
|
| 434 |
+
"found",
|
| 435 |
+
"inspected",
|
| 436 |
+
"left",
|
| 437 |
+
"like",
|
| 438 |
+
"lives_in",
|
| 439 |
+
"located_in",
|
| 440 |
+
"met",
|
| 441 |
+
"planned",
|
| 442 |
+
"visited",
|
| 443 |
+
"prefer",
|
| 444 |
+
"reached",
|
| 445 |
+
"returned",
|
| 446 |
+
"scheduled",
|
| 447 |
+
"saw",
|
| 448 |
+
"started",
|
| 449 |
+
"stopped",
|
| 450 |
+
"watch",
|
| 451 |
+
}
|
| 452 |
+
edges = [
|
| 453 |
+
edge
|
| 454 |
+
for edge in self.graph.iter_outgoing_edges(subject)
|
| 455 |
+
if edge.relation in allowed and edge.source_kind != "derived"
|
| 456 |
+
]
|
| 457 |
+
if not edges:
|
| 458 |
+
return None
|
| 459 |
+
edges.sort(key=lambda edge: edge.edge_id)
|
| 460 |
+
if anchor.strip():
|
| 461 |
+
anchor_index = self._story_anchor_index(edges, anchor.strip())
|
| 462 |
+
if anchor_index is not None:
|
| 463 |
+
edges = edges[anchor_index + 1 :]
|
| 464 |
+
if not edges:
|
| 465 |
+
return None
|
| 466 |
+
proof = tuple(self._edge_to_proof(edge) for edge in edges[:8])
|
| 467 |
+
return QueryResult(
|
| 468 |
+
answer=subject,
|
| 469 |
+
proof=proof,
|
| 470 |
+
confidence=0.92,
|
| 471 |
+
mode="story",
|
| 472 |
+
reasoning=self._reasoning_from_proof(proof),
|
| 473 |
+
)
|
| 474 |
+
|
| 475 |
+
def _story_anchor_index(self, edges: list[EdgeRecord], anchor_phrase: str) -> int | None:
|
| 476 |
+
anchor_normalized = self.ontology.normalize(anchor_phrase)
|
| 477 |
+
anchor_tokens = [token for token in anchor_normalized.split() if token]
|
| 478 |
+
if not anchor_tokens:
|
| 479 |
+
return None
|
| 480 |
+
for index, edge in enumerate(edges):
|
| 481 |
+
candidates = {
|
| 482 |
+
self.ontology.normalize(f"{edge.relation} {edge.object}"),
|
| 483 |
+
self.ontology.normalize(self.graph.get_display_name(edge.relation)),
|
| 484 |
+
self.ontology.normalize(self.graph.get_display_name(edge.object)),
|
| 485 |
+
self.ontology.normalize(f"{self.graph.get_display_name(edge.relation)} {self.graph.get_display_name(edge.object)}"),
|
| 486 |
+
}
|
| 487 |
+
for candidate in candidates:
|
| 488 |
+
candidate_tokens = candidate.split()
|
| 489 |
+
if candidate_tokens and all(token in candidate_tokens for token in anchor_tokens):
|
| 490 |
+
return index
|
| 491 |
+
return None
|
| 492 |
+
|
| 493 |
+
def _infer_carried_object_location(self, object_value: str) -> QueryResult | None:
|
| 494 |
+
carriers = self.graph.get_subjects("carrying", object_value)
|
| 495 |
+
if not carriers:
|
| 496 |
+
return None
|
| 497 |
+
best_carrier = max(carriers, key=self._edge_rank)
|
| 498 |
+
carrier_locations = self._location_candidates(best_carrier.subject)
|
| 499 |
+
if not carrier_locations:
|
| 500 |
+
return None
|
| 501 |
+
location_edge = max(carrier_locations, key=self._edge_rank)
|
| 502 |
+
proof = (
|
| 503 |
+
self._edge_to_proof(best_carrier),
|
| 504 |
+
self._edge_to_proof(location_edge),
|
| 505 |
+
f"compose:{best_carrier.subject}-[carrying]->{object_value}; {best_carrier.subject}-[located_in]->{location_edge.object}",
|
| 506 |
+
)
|
| 507 |
+
return QueryResult(
|
| 508 |
+
answer=location_edge.object,
|
| 509 |
+
proof=proof,
|
| 510 |
+
confidence=0.92,
|
| 511 |
+
mode="composed",
|
| 512 |
+
reasoning=self._reasoning_from_proof(proof),
|
| 513 |
+
)
|
| 514 |
+
|
| 515 |
+
def _answer_who_chases_object(self, obj: str) -> QueryResult | None:
|
| 516 |
+
direct_subjects = self.graph.get_subjects("chase", obj)
|
| 517 |
+
if direct_subjects:
|
| 518 |
+
edge = direct_subjects[0]
|
| 519 |
+
return QueryResult(
|
| 520 |
+
answer=edge.subject,
|
| 521 |
+
proof=(f"direct:{edge.subject}-[chase]->{edge.object}",),
|
| 522 |
+
confidence=1.0,
|
| 523 |
+
mode="direct",
|
| 524 |
+
reasoning=self._reasoning_from_proof((f"direct:{edge.subject}-[chase]->{edge.object}",)),
|
| 525 |
+
)
|
| 526 |
+
hierarchical_subject = self._subject_from_object_hierarchy("chase", obj)
|
| 527 |
+
if hierarchical_subject is not None:
|
| 528 |
+
return hierarchical_subject
|
| 529 |
+
candidate_rules = [
|
| 530 |
+
rule
|
| 531 |
+
for rule in self.abstraction.derive_rules()
|
| 532 |
+
if rule.relation == "chase" and rule.object_class in set(self._concept_hierarchy(obj))
|
| 533 |
+
]
|
| 534 |
+
candidate_rules.sort(key=self._rule_specificity, reverse=True)
|
| 535 |
+
for rule in candidate_rules:
|
| 536 |
+
return QueryResult(
|
| 537 |
+
answer=rule.subject_class,
|
| 538 |
+
proof=(f"abstract:{rule.subject_class}-[chase]->{rule.object_class}", f"object:{obj}->is_a->{rule.object_class}"),
|
| 539 |
+
confidence=0.65,
|
| 540 |
+
mode="abstract",
|
| 541 |
+
reasoning=self._reasoning_from_proof((f"abstract:{rule.subject_class}-[chase]->{rule.object_class}", f"object:{obj}->is_a->{rule.object_class}")),
|
| 542 |
+
)
|
| 543 |
+
return None
|
| 544 |
+
|
| 545 |
+
def _direct_or_abstract(self, subject: str, relation: str) -> QueryResult | None:
|
| 546 |
+
direct = self._best_edge(subject, relation)
|
| 547 |
+
if direct is not None:
|
| 548 |
+
return QueryResult(
|
| 549 |
+
answer=direct.object,
|
| 550 |
+
proof=(self._edge_to_proof(direct),),
|
| 551 |
+
confidence=1.0,
|
| 552 |
+
mode="derived" if direct.source_kind == "derived" else "direct",
|
| 553 |
+
reasoning=self._reasoning_from_proof((self._edge_to_proof(direct),)),
|
| 554 |
+
)
|
| 555 |
+
inherited = self._inherit_relation_from_parents(subject, relation)
|
| 556 |
+
if inherited is not None:
|
| 557 |
+
return inherited
|
| 558 |
+
exemplar_inference = self._infer_relation_from_exemplars(subject, relation)
|
| 559 |
+
if exemplar_inference is not None:
|
| 560 |
+
return exemplar_inference
|
| 561 |
+
|
| 562 |
+
subject_parents = set(self._concept_hierarchy(subject))
|
| 563 |
+
candidate_rules = [
|
| 564 |
+
rule
|
| 565 |
+
for rule in self.abstraction.derive_rules()
|
| 566 |
+
if rule.relation == relation and rule.subject_class in subject_parents
|
| 567 |
+
]
|
| 568 |
+
candidate_rules.sort(key=self._rule_specificity, reverse=True)
|
| 569 |
+
for rule in candidate_rules:
|
| 570 |
+
return QueryResult(
|
| 571 |
+
answer=rule.object_class,
|
| 572 |
+
proof=(f"abstract:{rule.subject_class}-[{relation}]->{rule.object_class}", f"subject:{subject}->is_a->{rule.subject_class}"),
|
| 573 |
+
confidence=0.7,
|
| 574 |
+
mode="abstract",
|
| 575 |
+
reasoning=self._reasoning_from_proof((f"abstract:{rule.subject_class}-[{relation}]->{rule.object_class}", f"subject:{subject}->is_a->{rule.subject_class}")),
|
| 576 |
+
)
|
| 577 |
+
return None
|
| 578 |
+
|
| 579 |
+
def _inherit_relation_from_parents(self, subject: str, relation: str) -> QueryResult | None:
|
| 580 |
+
for parent in self._concept_hierarchy(subject):
|
| 581 |
+
inherited_edge = self._best_edge(parent, relation)
|
| 582 |
+
if inherited_edge is None:
|
| 583 |
+
continue
|
| 584 |
+
proof = (
|
| 585 |
+
f"subject:{subject}->is_a->{parent}",
|
| 586 |
+
self._edge_to_proof(inherited_edge),
|
| 587 |
+
)
|
| 588 |
+
return QueryResult(
|
| 589 |
+
answer=inherited_edge.object,
|
| 590 |
+
proof=proof,
|
| 591 |
+
confidence=0.76 if inherited_edge.source_kind != "derived" else 0.7,
|
| 592 |
+
mode="abstract",
|
| 593 |
+
reasoning=self._reasoning_from_proof(proof),
|
| 594 |
+
)
|
| 595 |
+
return None
|
| 596 |
+
|
| 597 |
+
def _subject_from_object_hierarchy(self, relation: str, object_value: str) -> QueryResult | None:
|
| 598 |
+
for parent in self._concept_hierarchy(object_value):
|
| 599 |
+
direct_subjects = self.graph.get_subjects(relation, parent)
|
| 600 |
+
if not direct_subjects:
|
| 601 |
+
continue
|
| 602 |
+
edge = max(direct_subjects, key=self._edge_rank)
|
| 603 |
+
proof = (
|
| 604 |
+
self._edge_to_proof(edge),
|
| 605 |
+
f"object:{object_value}->is_a->{parent}",
|
| 606 |
+
)
|
| 607 |
+
return QueryResult(
|
| 608 |
+
answer=edge.subject,
|
| 609 |
+
proof=proof,
|
| 610 |
+
confidence=0.74 if edge.source_kind != "derived" else 0.68,
|
| 611 |
+
mode="abstract",
|
| 612 |
+
reasoning=self._reasoning_from_proof(proof),
|
| 613 |
+
)
|
| 614 |
+
exemplar_inference = self._infer_subject_from_exemplars(relation, object_value)
|
| 615 |
+
if exemplar_inference is not None:
|
| 616 |
+
return exemplar_inference
|
| 617 |
+
return None
|
| 618 |
+
|
| 619 |
+
def _resolve_relation_path(self, left: str, right: str) -> tuple[str, tuple[str, ...], float] | None:
|
| 620 |
+
grounded_location = self._resolve_grounded_location_path(left, right)
|
| 621 |
+
if grounded_location is not None:
|
| 622 |
+
return grounded_location
|
| 623 |
+
anchors_left = [left, *self._concept_hierarchy(left)]
|
| 624 |
+
anchors_right = [right, *self._concept_hierarchy(right)]
|
| 625 |
+
best: tuple[int, int, int, int, int, list[EdgeRecord], str, str] | None = None
|
| 626 |
+
for left_anchor in anchors_left[:6]:
|
| 627 |
+
for right_anchor in anchors_right[:6]:
|
| 628 |
+
path = self.graph.find_path(left_anchor, right_anchor, max_hops=6, include_derived=False)
|
| 629 |
+
if path is None:
|
| 630 |
+
path = self.graph.find_path(left_anchor, right_anchor, max_hops=6, include_derived=True)
|
| 631 |
+
if path is None:
|
| 632 |
+
continue
|
| 633 |
+
hierarchy_cost = int(left_anchor != left) + int(right_anchor != right)
|
| 634 |
+
location_penalty = 0 if all(edge.relation in {"lives_in", "located_in"} for edge in path) else 1
|
| 635 |
+
reverse_penalty = sum(
|
| 636 |
+
1 for edge in path if edge.relation in {"contains", "home_of", "has_instance"}
|
| 637 |
+
)
|
| 638 |
+
derived_penalty = sum(1 for edge in path if edge.source_kind == "derived")
|
| 639 |
+
candidate = (
|
| 640 |
+
hierarchy_cost,
|
| 641 |
+
location_penalty,
|
| 642 |
+
reverse_penalty,
|
| 643 |
+
derived_penalty,
|
| 644 |
+
len(path),
|
| 645 |
+
path,
|
| 646 |
+
left_anchor,
|
| 647 |
+
right_anchor,
|
| 648 |
+
)
|
| 649 |
+
if best is None or candidate[:5] < best[:5]:
|
| 650 |
+
best = candidate
|
| 651 |
+
if best is None:
|
| 652 |
+
return None
|
| 653 |
+
_, _, _, _, _, path, left_anchor, right_anchor = best
|
| 654 |
+
proof: list[str] = []
|
| 655 |
+
if left_anchor != left:
|
| 656 |
+
proof.append(f"subject:{left}->is_a->{left_anchor}")
|
| 657 |
+
proof.extend(self._edge_to_proof(edge) for edge in path)
|
| 658 |
+
if right_anchor != right:
|
| 659 |
+
proof.append(f"object:{right}->is_a->{right_anchor}")
|
| 660 |
+
answer = " -> ".join(edge.relation for edge in path)
|
| 661 |
+
confidence = 1.0 if left_anchor == left and right_anchor == right else 0.78
|
| 662 |
+
return answer, tuple(proof), confidence
|
| 663 |
+
|
| 664 |
+
def _resolve_grounded_location_path(self, left: str, right: str) -> tuple[str, tuple[str, ...], float] | None:
|
| 665 |
+
allowed_relations = {"lives_in", "located_in"}
|
| 666 |
+
queue: deque[tuple[str, list[EdgeRecord]]] = deque([(left, [])])
|
| 667 |
+
seen = {left}
|
| 668 |
+
while queue:
|
| 669 |
+
node, path = queue.popleft()
|
| 670 |
+
if len(path) >= 6:
|
| 671 |
+
continue
|
| 672 |
+
for relation in ("lives_in", "located_in"):
|
| 673 |
+
for edge in self.graph.get_objects(node, relation):
|
| 674 |
+
if edge.source_kind == "derived":
|
| 675 |
+
continue
|
| 676 |
+
next_path = [*path, edge]
|
| 677 |
+
if edge.object == right and all(item.relation in allowed_relations for item in next_path):
|
| 678 |
+
proof = tuple(self._edge_to_proof(item) for item in next_path)
|
| 679 |
+
answer = " -> ".join(item.relation for item in next_path)
|
| 680 |
+
return answer, proof, 1.0
|
| 681 |
+
if edge.object not in seen:
|
| 682 |
+
seen.add(edge.object)
|
| 683 |
+
queue.append((edge.object, next_path))
|
| 684 |
+
return None
|
| 685 |
+
|
| 686 |
+
def _infer_relation_from_exemplars(self, subject: str, relation: str) -> QueryResult | None:
|
| 687 |
+
for subject_class in self._concept_hierarchy(subject):
|
| 688 |
+
exemplars = self.graph.get_subjects("is_a", subject_class)
|
| 689 |
+
if not exemplars:
|
| 690 |
+
continue
|
| 691 |
+
class_votes: dict[str, list[str]] = {}
|
| 692 |
+
for exemplar in exemplars:
|
| 693 |
+
for edge in self.graph.get_objects(exemplar.subject, relation):
|
| 694 |
+
for object_class in self._concept_hierarchy(edge.object):
|
| 695 |
+
class_votes.setdefault(object_class, []).append(self._edge_to_proof(edge))
|
| 696 |
+
if not class_votes:
|
| 697 |
+
continue
|
| 698 |
+
object_class, proofs = max(class_votes.items(), key=lambda item: (len(item[1]), len(self._concept_hierarchy(item[0]))))
|
| 699 |
+
if not proofs:
|
| 700 |
+
continue
|
| 701 |
+
proof = [f"subject:{subject}->is_a->{subject_class}"]
|
| 702 |
+
proof.extend(proofs[:2])
|
| 703 |
+
proof.append(f"object:{self._parse_reasoning_edge(proofs[0])[2]}->is_a->{object_class}")
|
| 704 |
+
proof_tuple = tuple(proof)
|
| 705 |
+
return QueryResult(
|
| 706 |
+
answer=object_class,
|
| 707 |
+
proof=proof_tuple,
|
| 708 |
+
confidence=0.73,
|
| 709 |
+
mode="abstract",
|
| 710 |
+
reasoning=self._reasoning_from_proof(proof_tuple),
|
| 711 |
+
)
|
| 712 |
+
return None
|
| 713 |
+
|
| 714 |
+
def _infer_subject_from_exemplars(self, relation: str, object_value: str) -> QueryResult | None:
|
| 715 |
+
for object_class in self._concept_hierarchy(object_value):
|
| 716 |
+
exemplars = self.graph.get_subjects("is_a", object_class)
|
| 717 |
+
if not exemplars:
|
| 718 |
+
continue
|
| 719 |
+
class_votes: dict[str, list[str]] = {}
|
| 720 |
+
for exemplar in exemplars:
|
| 721 |
+
for edge in self.graph.get_subjects(relation, exemplar.subject):
|
| 722 |
+
for subject_class in self._concept_hierarchy(edge.subject):
|
| 723 |
+
class_votes.setdefault(subject_class, []).append(self._edge_to_proof(edge))
|
| 724 |
+
if not class_votes:
|
| 725 |
+
continue
|
| 726 |
+
subject_class, proofs = max(class_votes.items(), key=lambda item: (len(item[1]), len(self._concept_hierarchy(item[0]))))
|
| 727 |
+
if not proofs:
|
| 728 |
+
continue
|
| 729 |
+
proof = [f"object:{object_value}->is_a->{object_class}"]
|
| 730 |
+
proof.extend(proofs[:2])
|
| 731 |
+
proof.append(f"subject:{self._parse_reasoning_edge(proofs[0])[0]}->is_a->{subject_class}")
|
| 732 |
+
proof_tuple = tuple(proof)
|
| 733 |
+
return QueryResult(
|
| 734 |
+
answer=subject_class,
|
| 735 |
+
proof=proof_tuple,
|
| 736 |
+
confidence=0.73,
|
| 737 |
+
mode="abstract",
|
| 738 |
+
reasoning=self._reasoning_from_proof(proof_tuple),
|
| 739 |
+
)
|
| 740 |
+
return None
|
| 741 |
+
|
| 742 |
+
@staticmethod
|
| 743 |
+
def _edge_to_proof(edge: EdgeRecord) -> str:
|
| 744 |
+
if edge.source_kind == "derived" and edge.source_text:
|
| 745 |
+
return f"{edge.source_kind}:{edge.subject}-[{edge.relation}]->{edge.object}|{edge.source_text}"
|
| 746 |
+
return f"{edge.source_kind}:{edge.subject}-[{edge.relation}]->{edge.object}"
|
| 747 |
+
|
| 748 |
+
def _rule_specificity(self, rule) -> tuple[int, int, int]:
|
| 749 |
+
subject_depth = len(self._concept_hierarchy(rule.subject_class))
|
| 750 |
+
object_depth = len(self._concept_hierarchy(rule.object_class))
|
| 751 |
+
return (object_depth, subject_depth, rule.support)
|
| 752 |
+
|
| 753 |
+
def _best_edge(self, subject: str, relation: str) -> EdgeRecord | None:
|
| 754 |
+
candidates = self.graph.get_objects(subject, relation)
|
| 755 |
+
if not candidates:
|
| 756 |
+
return None
|
| 757 |
+
if relation == "located_in":
|
| 758 |
+
return self._best_location_edge(candidates)
|
| 759 |
+
return max(candidates, key=self._edge_rank)
|
| 760 |
+
|
| 761 |
+
def _edge_rank(self, edge: EdgeRecord) -> tuple[int, int, int]:
|
| 762 |
+
is_direct = 1 if edge.source_kind != "derived" else 0
|
| 763 |
+
object_depth = len(self._concept_hierarchy(edge.object))
|
| 764 |
+
return (is_direct, object_depth, edge.edge_id)
|
| 765 |
+
|
| 766 |
+
def _best_location_edge(self, candidates: list[EdgeRecord]) -> EdgeRecord:
|
| 767 |
+
def specificity(edge: EdgeRecord) -> int:
|
| 768 |
+
score = 0
|
| 769 |
+
for other in candidates:
|
| 770 |
+
if other.edge_id == edge.edge_id:
|
| 771 |
+
continue
|
| 772 |
+
if any(child.object == other.object for child in self.graph.get_objects(edge.object, "located_in")):
|
| 773 |
+
score += 1
|
| 774 |
+
return score
|
| 775 |
+
|
| 776 |
+
return max(candidates, key=lambda edge: (specificity(edge),) + self._edge_rank(edge))
|
| 777 |
+
|
| 778 |
+
def _location_candidates(self, subject: str) -> list[EdgeRecord]:
|
| 779 |
+
return self.graph.get_objects(subject, "located_in") + self.graph.get_objects(subject, "lives_in")
|
| 780 |
+
|
| 781 |
+
def _select_ontology_parent(self, parents: tuple[str, ...]) -> str:
|
| 782 |
+
if len(parents) == 1:
|
| 783 |
+
return parents[0]
|
| 784 |
+
derived_rules = self.abstraction.derive_rules()
|
| 785 |
+
scored: list[tuple[int, int, int, int, str]] = []
|
| 786 |
+
for index, parent in enumerate(parents):
|
| 787 |
+
outgoing = [
|
| 788 |
+
edge
|
| 789 |
+
for edge in self.graph.iter_outgoing_edges(parent)
|
| 790 |
+
if edge.relation not in {"is_a", "has_instance", "contains", "located_in"}
|
| 791 |
+
]
|
| 792 |
+
exemplars = self.graph.get_subjects("is_a", parent)
|
| 793 |
+
exemplar_count = len(exemplars)
|
| 794 |
+
exemplar_behavior = 0
|
| 795 |
+
for exemplar in exemplars:
|
| 796 |
+
exemplar_behavior += sum(
|
| 797 |
+
1
|
| 798 |
+
for edge in self.graph.iter_outgoing_edges(exemplar.subject)
|
| 799 |
+
if edge.relation not in {"is_a", "has_instance", "contains", "located_in"}
|
| 800 |
+
)
|
| 801 |
+
abstract_behavior = sum(
|
| 802 |
+
1
|
| 803 |
+
for rule in derived_rules
|
| 804 |
+
if rule.subject_class == parent and rule.relation not in {"is_a", "has_instance", "contains", "located_in"}
|
| 805 |
+
)
|
| 806 |
+
scored.append((exemplar_behavior, abstract_behavior, len(outgoing), exemplar_count, -index, parent))
|
| 807 |
+
best = max(scored)
|
| 808 |
+
if best[0] == 0 and best[1] == 0 and best[2] == 0 and best[3] == 0:
|
| 809 |
+
salient = [parent for parent in parents if parent in self._SALIENT_CLASSES]
|
| 810 |
+
if salient:
|
| 811 |
+
return salient[-1]
|
| 812 |
+
return parents[0]
|
| 813 |
+
return best[5]
|
| 814 |
+
|
| 815 |
+
def _object_after_phrase(self, raw_query: str, phrase: str) -> str | None:
|
| 816 |
+
normalized = self.ontology.normalize(raw_query)
|
| 817 |
+
if phrase.strip() not in normalized:
|
| 818 |
+
return None
|
| 819 |
+
_, tail = normalized.split(phrase.strip(), 1)
|
| 820 |
+
candidate = tail.strip()
|
| 821 |
+
if not candidate:
|
| 822 |
+
return None
|
| 823 |
+
return self.ontology.resolve(candidate).concept_id
|
| 824 |
+
|
| 825 |
+
def _concept_hierarchy(self, concept: str) -> list[str]:
|
| 826 |
+
seen: set[str] = set()
|
| 827 |
+
ordered: list[str] = []
|
| 828 |
+
|
| 829 |
+
def visit(node: str) -> None:
|
| 830 |
+
for edge in self.graph.get_objects(node, "is_a"):
|
| 831 |
+
if edge.object not in seen:
|
| 832 |
+
seen.add(edge.object)
|
| 833 |
+
ordered.append(edge.object)
|
| 834 |
+
visit(edge.object)
|
| 835 |
+
for parent in self.ontology.lift(node):
|
| 836 |
+
if parent not in seen:
|
| 837 |
+
seen.add(parent)
|
| 838 |
+
ordered.append(parent)
|
| 839 |
+
|
| 840 |
+
visit(concept)
|
| 841 |
+
return ordered
|
| 842 |
+
|
| 843 |
+
def _reasoning_from_proof(self, proof: tuple[str, ...]) -> tuple[str, ...]:
|
| 844 |
+
steps: list[str] = []
|
| 845 |
+
openers = (
|
| 846 |
+
"I start from",
|
| 847 |
+
"Then I use",
|
| 848 |
+
"Next I rely on",
|
| 849 |
+
"After that I connect",
|
| 850 |
+
"From there I infer",
|
| 851 |
+
"Finally I conclude from",
|
| 852 |
+
)
|
| 853 |
+
expanded_proof = self._expand_reasoning_steps(proof)
|
| 854 |
+
for index, step in enumerate(expanded_proof, start=1):
|
| 855 |
+
rendered = self._render_reasoning_step(step)
|
| 856 |
+
opener = openers[(index - 1) % len(openers)]
|
| 857 |
+
steps.append(f"{opener} {rendered}.")
|
| 858 |
+
return tuple(steps)
|
| 859 |
+
|
| 860 |
+
def _expand_reasoning_steps(self, proof: tuple[str, ...]) -> tuple[str, ...]:
|
| 861 |
+
expanded: list[str] = []
|
| 862 |
+
for step in proof:
|
| 863 |
+
expanded.append(step)
|
| 864 |
+
if not step.startswith("derived:") or "|" not in step:
|
| 865 |
+
continue
|
| 866 |
+
_, source_text = step.split("|", 1)
|
| 867 |
+
for detail in source_text.split("|"):
|
| 868 |
+
detail = detail.strip()
|
| 869 |
+
if not detail:
|
| 870 |
+
continue
|
| 871 |
+
expanded.append(detail)
|
| 872 |
+
return tuple(expanded)
|
| 873 |
+
|
| 874 |
+
def _render_reasoning_step(self, step: str) -> str:
|
| 875 |
+
if "-[" in step and "]->" in step:
|
| 876 |
+
parsed = self._parse_reasoning_edge(step)
|
| 877 |
+
if parsed is not None:
|
| 878 |
+
subject, relation, object_value = parsed
|
| 879 |
+
return (
|
| 880 |
+
f"{self.graph.get_display_name(subject)} "
|
| 881 |
+
f"{relation.replace('_', ' ')} "
|
| 882 |
+
f"{self.graph.get_display_name(object_value)}"
|
| 883 |
+
)
|
| 884 |
+
if step.startswith("ontology:"):
|
| 885 |
+
payload = step.split(":", 1)[1]
|
| 886 |
+
subject, _, parent = payload.partition("->is_a->")
|
| 887 |
+
return f"{self.graph.get_display_name(subject)} belongs to {self.graph.get_display_name(parent)}"
|
| 888 |
+
if step.startswith("subject:"):
|
| 889 |
+
payload = step.split(":", 1)[1]
|
| 890 |
+
subject, _, parent = payload.partition("->is_a->")
|
| 891 |
+
return f"{self.graph.get_display_name(subject)} belongs to {self.graph.get_display_name(parent)}"
|
| 892 |
+
if step.startswith("object:"):
|
| 893 |
+
payload = step.split(":", 1)[1]
|
| 894 |
+
obj, _, parent = payload.partition("->is_a->")
|
| 895 |
+
return f"{self.graph.get_display_name(obj)} belongs to {self.graph.get_display_name(parent)}"
|
| 896 |
+
if step.startswith("compose:"):
|
| 897 |
+
payload = step.split(":", 1)[1]
|
| 898 |
+
return f"the composed link {payload}".replace("_", " ")
|
| 899 |
+
if step.startswith("revision:"):
|
| 900 |
+
payload = step.split(":", 1)[1]
|
| 901 |
+
return f"the revision {payload}".replace("_", " ")
|
| 902 |
+
if step.startswith("contradiction:"):
|
| 903 |
+
payload = step.split(":", 1)[1]
|
| 904 |
+
return f"the contradiction {payload}".replace("_", " ")
|
| 905 |
+
if step.startswith("no_contradiction:"):
|
| 906 |
+
payload = step.split(":", 1)[1]
|
| 907 |
+
return f"there is no stored contradiction for {self.graph.get_display_name(payload)}"
|
| 908 |
+
if step.startswith("emoji:"):
|
| 909 |
+
payload = step.split(":", 1)[1]
|
| 910 |
+
return f"the symbol {payload}".replace("_", " ")
|
| 911 |
+
if step.startswith("math:"):
|
| 912 |
+
payload = step.split(":", 1)[1]
|
| 913 |
+
return f"the computation {payload}".replace("_", " ")
|
| 914 |
+
if step.startswith("reason:"):
|
| 915 |
+
payload = step.split(":", 1)[1]
|
| 916 |
+
return f"the rule {payload}".replace("_", " ")
|
| 917 |
+
return step.replace("_", " ")
|
| 918 |
+
|
| 919 |
+
@staticmethod
|
| 920 |
+
def _parse_reasoning_edge(step: str) -> tuple[str, str, str] | None:
|
| 921 |
+
payload = step.split(":", 1)[-1].split("|", 1)[0]
|
| 922 |
+
if "-[" not in payload or "]->" not in payload:
|
| 923 |
+
return None
|
| 924 |
+
subject, rest = payload.split("-[", 1)
|
| 925 |
+
relation, object_value = rest.split("]->", 1)
|
| 926 |
+
return subject, relation, object_value
|
| 927 |
+
|
| 928 |
+
@staticmethod
|
| 929 |
+
def _normalize_self_reference(query: str) -> str:
|
| 930 |
+
normalized = query.strip()
|
| 931 |
+
lowered = normalized.lower()
|
| 932 |
+
if lowered.startswith("who are you"):
|
| 933 |
+
return "What is Aethon?"
|
| 934 |
+
if lowered.startswith("what are you"):
|
| 935 |
+
return "What is Aethon?"
|
| 936 |
+
if lowered.startswith("what is your tokenizer"):
|
| 937 |
+
return "What is Aethon tokenizer?"
|
| 938 |
+
if lowered.startswith("what is your size unit"):
|
| 939 |
+
return "What is Aethon size unit?"
|
| 940 |
+
if lowered.startswith("what is aethon tokenizer"):
|
| 941 |
+
return "What is Aethon tokenizer?"
|
| 942 |
+
if lowered.startswith("what is aethon size unit"):
|
| 943 |
+
return "What is Aethon size unit?"
|
| 944 |
+
replaced: list[str] = []
|
| 945 |
+
for token in normalized.split():
|
| 946 |
+
lower = token.lower()
|
| 947 |
+
if lower == "your":
|
| 948 |
+
replaced.append("Aethon")
|
| 949 |
+
elif lower == "you":
|
| 950 |
+
replaced.append("Aethon")
|
| 951 |
+
else:
|
| 952 |
+
replaced.append(token)
|
| 953 |
+
return " ".join(replaced)
|
| 954 |
+
|
| 955 |
+
def _with_query_awareness(self, result: QueryResult, original_query: str, corrections: list[tuple[str, str]]) -> QueryResult:
|
| 956 |
+
if not corrections:
|
| 957 |
+
return result
|
| 958 |
+
seen: set[tuple[str, str]] = set()
|
| 959 |
+
notes: list[str] = []
|
| 960 |
+
for source, target in corrections:
|
| 961 |
+
pair = (source.lower(), target.lower())
|
| 962 |
+
if pair in seen or source.lower() == target.lower():
|
| 963 |
+
continue
|
| 964 |
+
seen.add(pair)
|
| 965 |
+
notes.append(
|
| 966 |
+
f'I read "{source}" as "{self.graph.get_display_name(target.lower().replace(" ", "_")) if "_" in target or target.islower() else target}" so the prompt still stays grounded.'
|
| 967 |
+
)
|
| 968 |
+
if not notes:
|
| 969 |
+
return result
|
| 970 |
+
return QueryResult(
|
| 971 |
+
answer=result.answer,
|
| 972 |
+
proof=result.proof,
|
| 973 |
+
confidence=result.confidence,
|
| 974 |
+
mode=result.mode,
|
| 975 |
+
reasoning=tuple(notes) + result.reasoning,
|
| 976 |
+
)
|
| 977 |
+
|
| 978 |
+
def _soft_correct_query(self, query: str) -> tuple[str, list[tuple[str, str]]]:
|
| 979 |
+
tokens = query.split()
|
| 980 |
+
if not tokens:
|
| 981 |
+
return query, []
|
| 982 |
+
vocabulary = self._query_vocabulary()
|
| 983 |
+
corrected: list[str] = []
|
| 984 |
+
corrections: list[tuple[str, str]] = []
|
| 985 |
+
for token in tokens:
|
| 986 |
+
prefix_end = 0
|
| 987 |
+
while prefix_end < len(token) and not token[prefix_end].isalnum():
|
| 988 |
+
prefix_end += 1
|
| 989 |
+
suffix_start = len(token)
|
| 990 |
+
while suffix_start > prefix_end and not token[suffix_start - 1].isalnum():
|
| 991 |
+
suffix_start -= 1
|
| 992 |
+
prefix = token[:prefix_end]
|
| 993 |
+
suffix = token[suffix_start:]
|
| 994 |
+
core = token[len(prefix) : len(token) - len(suffix) if suffix else len(token)]
|
| 995 |
+
lower_core = core.lower()
|
| 996 |
+
if lower_core in self._PROTECTED_QUERY_TOKENS:
|
| 997 |
+
corrected.append(token)
|
| 998 |
+
continue
|
| 999 |
+
if lower_core in self.ontology.semantic_lexicon.typo_map:
|
| 1000 |
+
replacement = self.ontology.semantic_lexicon.typo_map[lower_core]
|
| 1001 |
+
if core[:1].isupper():
|
| 1002 |
+
replacement = replacement.capitalize()
|
| 1003 |
+
corrections.append((core, replacement))
|
| 1004 |
+
corrected.append(f"{prefix}{replacement}{suffix}")
|
| 1005 |
+
continue
|
| 1006 |
+
if len(lower_core) <= 2 or not lower_core or lower_core in vocabulary:
|
| 1007 |
+
corrected.append(token)
|
| 1008 |
+
continue
|
| 1009 |
+
match = difflib.get_close_matches(lower_core, vocabulary, n=1, cutoff=0.72)
|
| 1010 |
+
if match:
|
| 1011 |
+
replacement = match[0]
|
| 1012 |
+
if core[:1].isupper():
|
| 1013 |
+
replacement = replacement.capitalize()
|
| 1014 |
+
corrections.append((core, replacement))
|
| 1015 |
+
corrected.append(f"{prefix}{replacement}{suffix}")
|
| 1016 |
+
else:
|
| 1017 |
+
corrected.append(token)
|
| 1018 |
+
return " ".join(corrected), corrections
|
| 1019 |
+
|
| 1020 |
+
def _query_vocabulary(self) -> list[str]:
|
| 1021 |
+
if (
|
| 1022 |
+
self._query_vocabulary_cache is not None
|
| 1023 |
+
and self._query_vocabulary_version == self.graph.mutation_version
|
| 1024 |
+
):
|
| 1025 |
+
return self._query_vocabulary_cache
|
| 1026 |
+
base_words = {
|
| 1027 |
+
"what",
|
| 1028 |
+
"who",
|
| 1029 |
+
"where",
|
| 1030 |
+
"how",
|
| 1031 |
+
"is",
|
| 1032 |
+
"are",
|
| 1033 |
+
"does",
|
| 1034 |
+
"did",
|
| 1035 |
+
"the",
|
| 1036 |
+
"aethon",
|
| 1037 |
+
"tokenizer",
|
| 1038 |
+
"size",
|
| 1039 |
+
"unit",
|
| 1040 |
+
"your",
|
| 1041 |
+
"you",
|
| 1042 |
+
"please",
|
| 1043 |
+
"thanks",
|
| 1044 |
+
"happy",
|
| 1045 |
+
"sad",
|
| 1046 |
+
"thinking",
|
| 1047 |
+
"love",
|
| 1048 |
+
"approve",
|
| 1049 |
+
"correct",
|
| 1050 |
+
"wrong",
|
| 1051 |
+
"related",
|
| 1052 |
+
"return",
|
| 1053 |
+
"returns",
|
| 1054 |
+
"depend",
|
| 1055 |
+
"depends",
|
| 1056 |
+
"on",
|
| 1057 |
+
"work",
|
| 1058 |
+
"works",
|
| 1059 |
+
"live",
|
| 1060 |
+
"like",
|
| 1061 |
+
"prefer",
|
| 1062 |
+
"carrying",
|
| 1063 |
+
"buy",
|
| 1064 |
+
"bought",
|
| 1065 |
+
"watch",
|
| 1066 |
+
"chase",
|
| 1067 |
+
"solve",
|
| 1068 |
+
"plus",
|
| 1069 |
+
"minus",
|
| 1070 |
+
"times",
|
| 1071 |
+
"divided",
|
| 1072 |
+
"by",
|
| 1073 |
+
}
|
| 1074 |
+
for concept in self.graph.list_concepts():
|
| 1075 |
+
base_words.update(part for part in concept.split("_") if part)
|
| 1076 |
+
base_words.add(concept.replace("_", " "))
|
| 1077 |
+
self._query_vocabulary_cache = sorted(base_words)
|
| 1078 |
+
self._query_vocabulary_version = self.graph.mutation_version
|
| 1079 |
+
return self._query_vocabulary_cache
|
runtime/aethon/rfi_query_forms.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
import json
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
@dataclass(frozen=True)
|
| 9 |
+
class QueryForm:
|
| 10 |
+
intent: str
|
| 11 |
+
prefix: tuple[str, ...]
|
| 12 |
+
relation: str = ""
|
| 13 |
+
relation_mode: str = "fixed"
|
| 14 |
+
subject_mode: str = "tail"
|
| 15 |
+
subject_start: int = 0
|
| 16 |
+
subject_end_anchor: str = ""
|
| 17 |
+
object_start_anchor: str = ""
|
| 18 |
+
object_mode: str = "none"
|
| 19 |
+
requires: tuple[str, ...] = ()
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class NativeQueryFormSet:
|
| 23 |
+
DEFAULT_PATH = Path(__file__).resolve().parents[1] / "data" / "native" / "query" / "aethon_query_forms_v1.json"
|
| 24 |
+
|
| 25 |
+
def __init__(self, path: str | Path | None = None, payload: list[dict[str, object]] | None = None) -> None:
|
| 26 |
+
self.path = Path(path) if path is not None else self.DEFAULT_PATH
|
| 27 |
+
self.forms = self._load_forms(payload)
|
| 28 |
+
|
| 29 |
+
def _load_forms(self, payload: list[dict[str, object]] | None) -> tuple[QueryForm, ...]:
|
| 30 |
+
if payload is not None:
|
| 31 |
+
return tuple(self._row_to_form(row) for row in payload)
|
| 32 |
+
if not self.path.exists():
|
| 33 |
+
return ()
|
| 34 |
+
data = json.loads(self.path.read_text(encoding="utf-8"))
|
| 35 |
+
return tuple(self._row_to_form(row) for row in data)
|
| 36 |
+
|
| 37 |
+
@staticmethod
|
| 38 |
+
def _row_to_form(row: dict[str, object]) -> QueryForm:
|
| 39 |
+
return QueryForm(
|
| 40 |
+
intent=str(row["intent"]).strip(),
|
| 41 |
+
prefix=tuple(str(item).strip() for item in row.get("prefix", [])),
|
| 42 |
+
relation=str(row.get("relation", "")).strip(),
|
| 43 |
+
relation_mode=str(row.get("relation_mode", "fixed")).strip(),
|
| 44 |
+
subject_mode=str(row.get("subject_mode", "tail")).strip(),
|
| 45 |
+
subject_start=int(row.get("subject_start", 0)),
|
| 46 |
+
subject_end_anchor=str(row.get("subject_end_anchor", "")).strip(),
|
| 47 |
+
object_start_anchor=str(row.get("object_start_anchor", "")).strip(),
|
| 48 |
+
object_mode=str(row.get("object_mode", "none")).strip(),
|
| 49 |
+
requires=tuple(str(item).strip() for item in row.get("requires", [])),
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
def to_payload(self) -> list[dict[str, object]]:
|
| 53 |
+
return [
|
| 54 |
+
{
|
| 55 |
+
"intent": form.intent,
|
| 56 |
+
"prefix": list(form.prefix),
|
| 57 |
+
"relation": form.relation,
|
| 58 |
+
"relation_mode": form.relation_mode,
|
| 59 |
+
"subject_mode": form.subject_mode,
|
| 60 |
+
"subject_start": form.subject_start,
|
| 61 |
+
"subject_end_anchor": form.subject_end_anchor,
|
| 62 |
+
"object_start_anchor": form.object_start_anchor,
|
| 63 |
+
"object_mode": form.object_mode,
|
| 64 |
+
"requires": list(form.requires),
|
| 65 |
+
}
|
| 66 |
+
for form in self.forms
|
| 67 |
+
]
|
runtime/aethon/rfi_reasoner.py
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from collections import Counter
|
| 4 |
+
|
| 5 |
+
from .rfi_graph import EdgeRecord, RelationalGraphStore
|
| 6 |
+
from .rfi_reasoning_rules import NativeReasoningRuleSet, ReasoningRule
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class StructuralReasoner:
|
| 10 |
+
"""Executes native reasoning rules over the relation graph."""
|
| 11 |
+
|
| 12 |
+
def __init__(self, graph: RelationalGraphStore, rules_path: str | None = None, rules_payload: dict | None = None) -> None:
|
| 13 |
+
self.graph = graph
|
| 14 |
+
self.rule_set = NativeReasoningRuleSet(rules_path, rules_payload)
|
| 15 |
+
self.base_rules = self.rule_set.rules
|
| 16 |
+
self.induced_rules: tuple[ReasoningRule, ...] = ()
|
| 17 |
+
|
| 18 |
+
def materialize(self, *, max_rounds: int = 4, induce_min_support: int = 2) -> list[int]:
|
| 19 |
+
created: list[int] = []
|
| 20 |
+
self.induced_rules = self.induce_rules(min_support=induce_min_support)
|
| 21 |
+
for _ in range(max_rounds):
|
| 22 |
+
round_ids: list[int] = []
|
| 23 |
+
for rule in self._all_rules():
|
| 24 |
+
round_ids.extend(self._apply_rule(rule))
|
| 25 |
+
if not round_ids:
|
| 26 |
+
break
|
| 27 |
+
created.extend(round_ids)
|
| 28 |
+
if created:
|
| 29 |
+
self.graph.commit()
|
| 30 |
+
return created
|
| 31 |
+
|
| 32 |
+
def induce_rules(self, *, min_support: int = 2) -> tuple[ReasoningRule, ...]:
|
| 33 |
+
counts: Counter[tuple[str, str, str]] = Counter()
|
| 34 |
+
existing = {
|
| 35 |
+
(rule.kind, rule.left_relation, rule.right_relation, rule.output_relation)
|
| 36 |
+
for rule in self.base_rules
|
| 37 |
+
}
|
| 38 |
+
active_edges = self.graph.iter_active_edges()
|
| 39 |
+
outgoing: dict[str, list[EdgeRecord]] = {}
|
| 40 |
+
direct_lookup: dict[tuple[str, str, str], bool] = {}
|
| 41 |
+
for edge in active_edges:
|
| 42 |
+
outgoing.setdefault(edge.subject, []).append(edge)
|
| 43 |
+
direct_lookup[(edge.subject, edge.relation, edge.object)] = True
|
| 44 |
+
for left in active_edges:
|
| 45 |
+
for right in outgoing.get(left.object, ()):
|
| 46 |
+
if left.subject == right.object:
|
| 47 |
+
continue
|
| 48 |
+
for candidate in outgoing.get(left.subject, ()):
|
| 49 |
+
if candidate.object != right.object:
|
| 50 |
+
continue
|
| 51 |
+
counts[(left.relation, right.relation, candidate.relation)] += 1
|
| 52 |
+
induced: list[ReasoningRule] = []
|
| 53 |
+
for (left_relation, right_relation, output_relation), support in counts.items():
|
| 54 |
+
signature = ("via_chain", left_relation, right_relation, output_relation)
|
| 55 |
+
if support < min_support or signature in existing:
|
| 56 |
+
continue
|
| 57 |
+
induced.append(
|
| 58 |
+
ReasoningRule(
|
| 59 |
+
name=f"induced_{left_relation}_{right_relation}_{output_relation}",
|
| 60 |
+
kind="via_chain",
|
| 61 |
+
left_relation=left_relation,
|
| 62 |
+
right_relation=right_relation,
|
| 63 |
+
output_relation=output_relation,
|
| 64 |
+
confidence=0.65,
|
| 65 |
+
)
|
| 66 |
+
)
|
| 67 |
+
return tuple(induced)
|
| 68 |
+
|
| 69 |
+
def _all_rules(self) -> tuple[ReasoningRule, ...]:
|
| 70 |
+
return self.base_rules + self.induced_rules
|
| 71 |
+
|
| 72 |
+
def _apply_rule(self, rule: ReasoningRule) -> list[int]:
|
| 73 |
+
if rule.kind == "transitive":
|
| 74 |
+
return self._apply_transitive(rule)
|
| 75 |
+
if rule.kind == "via_chain":
|
| 76 |
+
return self._apply_via_chain(rule)
|
| 77 |
+
if rule.kind == "copy":
|
| 78 |
+
return self._apply_copy(rule)
|
| 79 |
+
if rule.kind == "suffix_copy":
|
| 80 |
+
return self._apply_suffix_copy(rule)
|
| 81 |
+
if rule.kind == "object_support":
|
| 82 |
+
return self._apply_object_support(rule)
|
| 83 |
+
if rule.kind == "object_bridge":
|
| 84 |
+
return self._apply_object_bridge(rule)
|
| 85 |
+
if rule.kind == "via_object":
|
| 86 |
+
return self._apply_via_object(rule)
|
| 87 |
+
return []
|
| 88 |
+
|
| 89 |
+
def _apply_transitive(self, rule: ReasoningRule) -> list[int]:
|
| 90 |
+
created: list[int] = []
|
| 91 |
+
for left in self.graph.iter_active_edges():
|
| 92 |
+
if left.relation != rule.left_relation:
|
| 93 |
+
continue
|
| 94 |
+
for right in self.graph.get_objects(left.object, rule.right_relation):
|
| 95 |
+
created_id = self._add_if_supported(
|
| 96 |
+
subject=left.subject,
|
| 97 |
+
relation=rule.output_relation,
|
| 98 |
+
object_value=right.object,
|
| 99 |
+
rule_name=rule.name,
|
| 100 |
+
support_edges=(left, right),
|
| 101 |
+
)
|
| 102 |
+
if created_id is not None:
|
| 103 |
+
created.append(created_id)
|
| 104 |
+
return created
|
| 105 |
+
|
| 106 |
+
def _apply_via_chain(self, rule: ReasoningRule) -> list[int]:
|
| 107 |
+
created: list[int] = []
|
| 108 |
+
for left in self.graph.iter_active_edges():
|
| 109 |
+
if left.relation != rule.left_relation:
|
| 110 |
+
continue
|
| 111 |
+
for right in self.graph.get_objects(left.object, rule.right_relation):
|
| 112 |
+
created_id = self._add_if_supported(
|
| 113 |
+
subject=left.subject,
|
| 114 |
+
relation=rule.output_relation,
|
| 115 |
+
object_value=right.object,
|
| 116 |
+
rule_name=rule.name,
|
| 117 |
+
support_edges=(left, right),
|
| 118 |
+
)
|
| 119 |
+
if created_id is not None:
|
| 120 |
+
created.append(created_id)
|
| 121 |
+
return created
|
| 122 |
+
|
| 123 |
+
def _apply_copy(self, rule: ReasoningRule) -> list[int]:
|
| 124 |
+
created: list[int] = []
|
| 125 |
+
for edge in self.graph.iter_active_edges():
|
| 126 |
+
if edge.relation != rule.source_relation:
|
| 127 |
+
continue
|
| 128 |
+
created_id = self._add_if_supported(
|
| 129 |
+
subject=edge.subject,
|
| 130 |
+
relation=rule.output_relation,
|
| 131 |
+
object_value=edge.object,
|
| 132 |
+
rule_name=rule.name,
|
| 133 |
+
support_edges=(edge, edge),
|
| 134 |
+
)
|
| 135 |
+
if created_id is not None:
|
| 136 |
+
created.append(created_id)
|
| 137 |
+
return created
|
| 138 |
+
|
| 139 |
+
def _apply_suffix_copy(self, rule: ReasoningRule) -> list[int]:
|
| 140 |
+
created: list[int] = []
|
| 141 |
+
excluded = set(rule.exclude_relations)
|
| 142 |
+
for edge in self.graph.iter_active_edges():
|
| 143 |
+
if not edge.relation.endswith("_in"):
|
| 144 |
+
continue
|
| 145 |
+
if edge.relation in excluded:
|
| 146 |
+
continue
|
| 147 |
+
created_id = self._add_if_supported(
|
| 148 |
+
subject=edge.subject,
|
| 149 |
+
relation=rule.output_relation,
|
| 150 |
+
object_value=edge.object,
|
| 151 |
+
rule_name=rule.name,
|
| 152 |
+
support_edges=(edge, edge),
|
| 153 |
+
)
|
| 154 |
+
if created_id is not None:
|
| 155 |
+
created.append(created_id)
|
| 156 |
+
return created
|
| 157 |
+
|
| 158 |
+
def _apply_object_support(self, rule: ReasoningRule) -> list[int]:
|
| 159 |
+
created: list[int] = []
|
| 160 |
+
support_relations = set(rule.support_relation_set)
|
| 161 |
+
for edge in self.graph.iter_active_edges():
|
| 162 |
+
if edge.relation != rule.left_relation:
|
| 163 |
+
continue
|
| 164 |
+
for support_relation in support_relations:
|
| 165 |
+
for support in self.graph.get_objects(edge.subject, support_relation):
|
| 166 |
+
created_id = self._add_if_supported(
|
| 167 |
+
subject=edge.object,
|
| 168 |
+
relation=rule.output_relation,
|
| 169 |
+
object_value=support.object,
|
| 170 |
+
rule_name=rule.name,
|
| 171 |
+
support_edges=(edge, support),
|
| 172 |
+
)
|
| 173 |
+
if created_id is not None:
|
| 174 |
+
created.append(created_id)
|
| 175 |
+
return created
|
| 176 |
+
|
| 177 |
+
def _apply_object_bridge(self, rule: ReasoningRule) -> list[int]:
|
| 178 |
+
created: list[int] = []
|
| 179 |
+
for left in self.graph.iter_active_edges():
|
| 180 |
+
if left.relation != rule.left_relation:
|
| 181 |
+
continue
|
| 182 |
+
support = self.graph.get_active_edge(left.subject, rule.support_relation)
|
| 183 |
+
if support is None:
|
| 184 |
+
continue
|
| 185 |
+
created_id = self._add_if_supported(
|
| 186 |
+
subject=left.object,
|
| 187 |
+
relation=rule.output_relation,
|
| 188 |
+
object_value=support.object,
|
| 189 |
+
rule_name=rule.name,
|
| 190 |
+
support_edges=(left, support),
|
| 191 |
+
)
|
| 192 |
+
if created_id is not None:
|
| 193 |
+
created.append(created_id)
|
| 194 |
+
return created
|
| 195 |
+
|
| 196 |
+
def _apply_via_object(self, rule: ReasoningRule) -> list[int]:
|
| 197 |
+
created: list[int] = []
|
| 198 |
+
for left in self.graph.iter_active_edges():
|
| 199 |
+
if left.relation != rule.left_relation:
|
| 200 |
+
continue
|
| 201 |
+
right = self.graph.get_active_edge(left.object, rule.via_relation)
|
| 202 |
+
if right is None:
|
| 203 |
+
continue
|
| 204 |
+
created_id = self._add_if_supported(
|
| 205 |
+
subject=left.subject,
|
| 206 |
+
relation=rule.output_relation,
|
| 207 |
+
object_value=right.object,
|
| 208 |
+
rule_name=rule.name,
|
| 209 |
+
support_edges=(left, right),
|
| 210 |
+
)
|
| 211 |
+
if created_id is not None:
|
| 212 |
+
created.append(created_id)
|
| 213 |
+
return created
|
| 214 |
+
|
| 215 |
+
def _add_if_supported(
|
| 216 |
+
self,
|
| 217 |
+
*,
|
| 218 |
+
subject: str,
|
| 219 |
+
relation: str,
|
| 220 |
+
object_value: str,
|
| 221 |
+
rule_name: str,
|
| 222 |
+
support_edges: tuple[EdgeRecord, EdgeRecord],
|
| 223 |
+
) -> int | None:
|
| 224 |
+
existing = self.graph.get_objects(subject, relation)
|
| 225 |
+
for edge in existing:
|
| 226 |
+
if edge.object == object_value:
|
| 227 |
+
return None
|
| 228 |
+
source_text = self._proof_source_text(rule_name, support_edges)
|
| 229 |
+
return self.graph.add_derived_fact(
|
| 230 |
+
subject=subject,
|
| 231 |
+
relation=relation,
|
| 232 |
+
object=object_value,
|
| 233 |
+
source_text=source_text,
|
| 234 |
+
supports_edge_id=support_edges[0].edge_id,
|
| 235 |
+
commit=False,
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
@staticmethod
|
| 239 |
+
def _proof_source_text(rule_name: str, support_edges: tuple[EdgeRecord, EdgeRecord]) -> str:
|
| 240 |
+
left, right = support_edges
|
| 241 |
+
return (
|
| 242 |
+
f"reason:{rule_name}|"
|
| 243 |
+
f"{left.subject}-[{left.relation}]->{left.object}|"
|
| 244 |
+
f"{right.subject}-[{right.relation}]->{right.object}"
|
| 245 |
+
)
|
runtime/aethon/rfi_reasoning_rules.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
import json
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
@dataclass(frozen=True)
|
| 9 |
+
class ReasoningRule:
|
| 10 |
+
name: str
|
| 11 |
+
kind: str
|
| 12 |
+
output_relation: str
|
| 13 |
+
confidence: float
|
| 14 |
+
left_relation: str = ""
|
| 15 |
+
right_relation: str = ""
|
| 16 |
+
source_relation: str = ""
|
| 17 |
+
via_relation: str = ""
|
| 18 |
+
support_relation: str = ""
|
| 19 |
+
exclude_relations: tuple[str, ...] = ()
|
| 20 |
+
support_relation_set: tuple[str, ...] = ()
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class NativeReasoningRuleSet:
|
| 24 |
+
"""Loads native reasoning rules from data instead of freezing rule metadata in code."""
|
| 25 |
+
|
| 26 |
+
DEFAULT_PATH = Path(__file__).resolve().parents[1] / "data" / "native" / "reasoning" / "aethon_reasoning_rules_v1.json"
|
| 27 |
+
|
| 28 |
+
def __init__(self, path: str | Path | None = None, payload: dict | None = None) -> None:
|
| 29 |
+
self.path = Path(path) if path is not None else self.DEFAULT_PATH
|
| 30 |
+
self.payload = self._load_payload(payload)
|
| 31 |
+
self.rules = self._load_rules()
|
| 32 |
+
|
| 33 |
+
def _load_payload(self, payload: dict | None) -> dict:
|
| 34 |
+
if payload is not None:
|
| 35 |
+
return payload
|
| 36 |
+
if not self.path.exists():
|
| 37 |
+
return {}
|
| 38 |
+
return json.loads(self.path.read_text(encoding="utf-8"))
|
| 39 |
+
|
| 40 |
+
def _load_rules(self) -> tuple[ReasoningRule, ...]:
|
| 41 |
+
return tuple(
|
| 42 |
+
ReasoningRule(
|
| 43 |
+
name=str(row["name"]),
|
| 44 |
+
kind=str(row.get("kind", "")),
|
| 45 |
+
output_relation=str(row["output_relation"]),
|
| 46 |
+
confidence=float(row["confidence"]),
|
| 47 |
+
left_relation=str(row.get("left_relation", "")),
|
| 48 |
+
right_relation=str(row.get("right_relation", "")),
|
| 49 |
+
source_relation=str(row.get("source_relation", "")),
|
| 50 |
+
via_relation=str(row.get("via_relation", "")),
|
| 51 |
+
support_relation=str(row.get("support_relation", "")),
|
| 52 |
+
exclude_relations=tuple(str(item) for item in row.get("exclude_relations", [])),
|
| 53 |
+
support_relation_set=tuple(str(item) for item in row.get("support_relation_set", [])),
|
| 54 |
+
)
|
| 55 |
+
for row in self.payload.get("rules", [])
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
def to_payload(self) -> dict:
|
| 59 |
+
return dict(self.payload)
|
runtime/aethon/rfi_runtime.py
ADDED
|
@@ -0,0 +1,359 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
from dataclasses import dataclass
|
| 5 |
+
import json
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
import re
|
| 8 |
+
|
| 9 |
+
from .rfi_bundle import NativeBundleManager
|
| 10 |
+
from .rfi_abstraction import AbstractionEngine
|
| 11 |
+
from .rfi_document_filter import DocumentQualityGate
|
| 12 |
+
from .rfi_graph import RelationalGraphStore
|
| 13 |
+
from .rfi_ingest import DeterministicTripleExtractor
|
| 14 |
+
from .rfi_lexicon import AethonNativeConceptCodec
|
| 15 |
+
from .rfi_metrics import StructuralCapacityMeter
|
| 16 |
+
from .rfi_ontology import ConceptOntology
|
| 17 |
+
from .rfi_query_forms import NativeQueryFormSet
|
| 18 |
+
from .rfi_query import ProofQueryEngine, QueryResult
|
| 19 |
+
from .rfi_reasoner import StructuralReasoner
|
| 20 |
+
from .rfi_surface import GraphVerbalizer
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@dataclass(frozen=True)
|
| 24 |
+
class NativeResponse:
|
| 25 |
+
answer: str
|
| 26 |
+
text: str
|
| 27 |
+
explanation: str
|
| 28 |
+
proof: tuple[str, ...]
|
| 29 |
+
reasoning: tuple[str, ...]
|
| 30 |
+
mode: str
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class AethonNativeBase:
|
| 34 |
+
"""The first real no-weight Aethon base runtime."""
|
| 35 |
+
|
| 36 |
+
NAME = "Aethon N1 Base"
|
| 37 |
+
FAMILY = "Aethon Native"
|
| 38 |
+
TOKENIZER = f"{AethonNativeConceptCodec.NAME} ({AethonNativeConceptCodec.SHORT_NAME})"
|
| 39 |
+
|
| 40 |
+
def __init__(
|
| 41 |
+
self,
|
| 42 |
+
*,
|
| 43 |
+
db_path: str = ":memory:",
|
| 44 |
+
semantic_lexicon_path: str | None = None,
|
| 45 |
+
semantic_lexicon_payload: list[dict[str, str]] | None = None,
|
| 46 |
+
surface_lexicon_path: str | None = None,
|
| 47 |
+
surface_lexicon_payload: dict | None = None,
|
| 48 |
+
query_form_path: str | None = None,
|
| 49 |
+
query_form_payload: list[dict[str, object]] | None = None,
|
| 50 |
+
reasoning_rules_path: str | None = None,
|
| 51 |
+
reasoning_rules_payload: dict | None = None,
|
| 52 |
+
) -> None:
|
| 53 |
+
self.semantic_lexicon_path = semantic_lexicon_path
|
| 54 |
+
self.semantic_lexicon_payload = semantic_lexicon_payload
|
| 55 |
+
self.surface_lexicon_path = surface_lexicon_path
|
| 56 |
+
self.surface_lexicon_payload = surface_lexicon_payload
|
| 57 |
+
self.query_form_path = query_form_path
|
| 58 |
+
self.query_form_payload = query_form_payload
|
| 59 |
+
self.reasoning_rules_path = reasoning_rules_path
|
| 60 |
+
self.reasoning_rules_payload = reasoning_rules_payload
|
| 61 |
+
self.ontology = ConceptOntology(semantic_lexicon_path, semantic_lexicon_payload)
|
| 62 |
+
self.codec = AethonNativeConceptCodec(self.ontology)
|
| 63 |
+
self.query_forms = NativeQueryFormSet(query_form_path, query_form_payload)
|
| 64 |
+
self.document_gate = DocumentQualityGate()
|
| 65 |
+
self.graph = RelationalGraphStore(db_path=db_path)
|
| 66 |
+
self.extractor = DeterministicTripleExtractor(self.ontology)
|
| 67 |
+
self.abstraction = AbstractionEngine(self.graph, self.ontology)
|
| 68 |
+
self.reasoner = StructuralReasoner(self.graph, reasoning_rules_path, reasoning_rules_payload)
|
| 69 |
+
self.query_engine = ProofQueryEngine(self.graph, self.ontology, self.abstraction, self.reasoner, self.query_forms)
|
| 70 |
+
self.surface = GraphVerbalizer(surface_lexicon_path, surface_lexicon_payload, self.graph)
|
| 71 |
+
|
| 72 |
+
def learn(self, text: str) -> dict[str, object]:
|
| 73 |
+
triples = self.extractor.extract(text)
|
| 74 |
+
edge_ids = self.graph.ingest_triples(triples, commit=False)
|
| 75 |
+
rules = self.abstraction.materialize_rules(min_support=2)
|
| 76 |
+
derived = self.reasoner.materialize()
|
| 77 |
+
self.graph.commit()
|
| 78 |
+
return {
|
| 79 |
+
"learned_edges": edge_ids,
|
| 80 |
+
"derived_edges": derived,
|
| 81 |
+
"rule_count": len(rules),
|
| 82 |
+
"lexicon": self.codec.export_tokens(text),
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
def learn_fast(self, text: str) -> dict[str, object]:
|
| 86 |
+
triples = self.extractor.extract(text)
|
| 87 |
+
edge_ids = self.graph.ingest_triples(triples, commit=False)
|
| 88 |
+
return {
|
| 89 |
+
"learned_edges": edge_ids,
|
| 90 |
+
"derived_edges": [],
|
| 91 |
+
"rule_count": 0,
|
| 92 |
+
"lexicon": self.codec.export_tokens(text),
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
def learn_ultra_fast(self, texts: list[str]) -> dict[str, int]:
|
| 96 |
+
triples = []
|
| 97 |
+
for text in texts:
|
| 98 |
+
if text.strip():
|
| 99 |
+
triples.extend(self.extractor.extract_ultra(text))
|
| 100 |
+
learned_edges = self.graph.ingest_triples_fast(triples, commit=False)
|
| 101 |
+
return {"learned_edges": learned_edges, "triple_count": len(triples)}
|
| 102 |
+
|
| 103 |
+
def capture_ultra_fast(self, lane: str, source: str, texts: list[str]) -> dict[str, int]:
|
| 104 |
+
units = [(lane, source, text.strip()) for text in texts if text.strip()]
|
| 105 |
+
captured = self.graph.ingest_raw_units(units, commit=False)
|
| 106 |
+
return {"captured_units": captured}
|
| 107 |
+
|
| 108 |
+
def digest_captured_units(
|
| 109 |
+
self,
|
| 110 |
+
*,
|
| 111 |
+
batch_size: int = 5000,
|
| 112 |
+
flush_every: int = 20000,
|
| 113 |
+
reason_rounds: int = 6,
|
| 114 |
+
purge_after_digest: bool = False,
|
| 115 |
+
progress_callback=None,
|
| 116 |
+
) -> dict[str, int]:
|
| 117 |
+
total_units = self.graph.count_undigested_raw_units()
|
| 118 |
+
processed_units = 0
|
| 119 |
+
learned_edges = 0
|
| 120 |
+
derived_edges = 0
|
| 121 |
+
rule_count = 0
|
| 122 |
+
pending_units = 0
|
| 123 |
+
while True:
|
| 124 |
+
batch = self.graph.fetch_undigested_raw_units(limit=batch_size)
|
| 125 |
+
if not batch:
|
| 126 |
+
break
|
| 127 |
+
texts = [str(item["text"]) for item in batch if str(item["text"]).strip()]
|
| 128 |
+
if texts:
|
| 129 |
+
learned = self.learn_ultra_fast(texts)
|
| 130 |
+
learned_edges += int(learned["learned_edges"])
|
| 131 |
+
unit_ids = [int(item["unit_id"]) for item in batch]
|
| 132 |
+
self.graph.mark_raw_units_digested(unit_ids, commit=False)
|
| 133 |
+
processed_units += len(unit_ids)
|
| 134 |
+
pending_units += len(unit_ids)
|
| 135 |
+
if pending_units >= flush_every:
|
| 136 |
+
flushed = self.flush_learning(reason_rounds=2)
|
| 137 |
+
derived_edges += int(flushed["derived_edges"])
|
| 138 |
+
rule_count += int(flushed["rule_count"])
|
| 139 |
+
pending_units = 0
|
| 140 |
+
if progress_callback is not None:
|
| 141 |
+
progress_callback(
|
| 142 |
+
{
|
| 143 |
+
"processed_units": processed_units,
|
| 144 |
+
"total_units": total_units,
|
| 145 |
+
"learned_edges": learned_edges,
|
| 146 |
+
"derived_edges": derived_edges,
|
| 147 |
+
"rule_count": rule_count,
|
| 148 |
+
}
|
| 149 |
+
)
|
| 150 |
+
if pending_units:
|
| 151 |
+
flushed = self.flush_learning(reason_rounds=reason_rounds)
|
| 152 |
+
derived_edges += int(flushed["derived_edges"])
|
| 153 |
+
rule_count += int(flushed["rule_count"])
|
| 154 |
+
else:
|
| 155 |
+
self.graph.commit()
|
| 156 |
+
purged_units = 0
|
| 157 |
+
if purge_after_digest:
|
| 158 |
+
purged_units = self.graph.purge_digested_raw_units(commit=True)
|
| 159 |
+
return {
|
| 160 |
+
"processed_units": processed_units,
|
| 161 |
+
"total_units": total_units,
|
| 162 |
+
"learned_edges": learned_edges,
|
| 163 |
+
"derived_edges": derived_edges,
|
| 164 |
+
"rule_count": rule_count,
|
| 165 |
+
"purged_units": purged_units,
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
def flush_learning(
|
| 169 |
+
self,
|
| 170 |
+
*,
|
| 171 |
+
min_support: int = 2,
|
| 172 |
+
reason_rounds: int = 4,
|
| 173 |
+
induce_min_support: int | None = None,
|
| 174 |
+
) -> dict[str, int]:
|
| 175 |
+
rules = self.abstraction.materialize_rules(min_support=min_support)
|
| 176 |
+
derived = self.reasoner.materialize(
|
| 177 |
+
max_rounds=reason_rounds,
|
| 178 |
+
induce_min_support=induce_min_support if induce_min_support is not None else min_support,
|
| 179 |
+
)
|
| 180 |
+
self.graph.commit()
|
| 181 |
+
return {"rule_count": len(rules), "derived_edges": len(derived)}
|
| 182 |
+
|
| 183 |
+
def learn_document(self, title: str, text: str) -> dict[str, object]:
|
| 184 |
+
selection = self.document_gate.select(title, text)
|
| 185 |
+
if not selection.accepted:
|
| 186 |
+
return {"learned_edges": [], "derived_edges": [], "rule_count": 0, "lexicon": [], "reason": selection.reason}
|
| 187 |
+
learned_edges: list[int] = []
|
| 188 |
+
derived_edges: list[int] = []
|
| 189 |
+
rule_count = 0
|
| 190 |
+
lexicon: list[dict[str, object]] = []
|
| 191 |
+
for unit in selection.units:
|
| 192 |
+
learned = self.learn_fast(unit)
|
| 193 |
+
learned_edges.extend(learned["learned_edges"])
|
| 194 |
+
lexicon.extend(learned["lexicon"])
|
| 195 |
+
flushed = self.flush_learning()
|
| 196 |
+
derived_count = int(flushed["derived_edges"])
|
| 197 |
+
if derived_count:
|
| 198 |
+
derived_edges = [0] * derived_count
|
| 199 |
+
rule_count = int(flushed["rule_count"])
|
| 200 |
+
return {
|
| 201 |
+
"learned_edges": learned_edges,
|
| 202 |
+
"derived_edges": derived_edges,
|
| 203 |
+
"rule_count": rule_count,
|
| 204 |
+
"lexicon": lexicon,
|
| 205 |
+
"reason": selection.reason,
|
| 206 |
+
"units": list(selection.units),
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
def learn_jsonl(self, path: str) -> dict[str, int]:
|
| 210 |
+
source = Path(path)
|
| 211 |
+
rows = 0
|
| 212 |
+
facts = 0
|
| 213 |
+
with source.open("r", encoding="utf-8") as handle:
|
| 214 |
+
for line in handle:
|
| 215 |
+
row = json.loads(line)
|
| 216 |
+
title = str(row.get("title", "")).strip()
|
| 217 |
+
text = str(row.get("text", "")).strip()
|
| 218 |
+
if title or text:
|
| 219 |
+
learned = self.learn_document(title, text)
|
| 220 |
+
if learned["learned_edges"]:
|
| 221 |
+
facts += 1
|
| 222 |
+
for field in ("memory", "facts", "fact"):
|
| 223 |
+
payload = row.get(field)
|
| 224 |
+
if isinstance(payload, str) and payload.strip():
|
| 225 |
+
self.learn(payload)
|
| 226 |
+
facts += 1
|
| 227 |
+
elif isinstance(payload, list):
|
| 228 |
+
for item in payload:
|
| 229 |
+
if isinstance(item, str) and item.strip():
|
| 230 |
+
self.learn(item)
|
| 231 |
+
facts += 1
|
| 232 |
+
rows += 1
|
| 233 |
+
return {"rows": rows, "facts": facts}
|
| 234 |
+
|
| 235 |
+
def ask(self, query: str) -> NativeResponse:
|
| 236 |
+
parts = self._split_query_parts(query)
|
| 237 |
+
if len(parts) > 1:
|
| 238 |
+
responses = [self.ask(part) for part in parts]
|
| 239 |
+
return NativeResponse(
|
| 240 |
+
answer=" | ".join(response.answer for response in responses),
|
| 241 |
+
text=" ".join(response.text for response in responses if response.text),
|
| 242 |
+
explanation=" ".join(response.explanation for response in responses if response.explanation),
|
| 243 |
+
proof=tuple(step for response in responses for step in response.proof),
|
| 244 |
+
reasoning=tuple(step for response in responses for step in response.reasoning),
|
| 245 |
+
mode="multi",
|
| 246 |
+
)
|
| 247 |
+
self.surface.advance_response_state()
|
| 248 |
+
result = self.query_engine.answer(query)
|
| 249 |
+
if result is None:
|
| 250 |
+
return NativeResponse(
|
| 251 |
+
answer="<unknown>",
|
| 252 |
+
text=self.surface.verbalize_result(query, None),
|
| 253 |
+
explanation=self.surface.explain_result(query, None),
|
| 254 |
+
proof=(),
|
| 255 |
+
reasoning=(),
|
| 256 |
+
mode="unknown",
|
| 257 |
+
)
|
| 258 |
+
return self._render(query, result)
|
| 259 |
+
|
| 260 |
+
def inspect(self, text: str) -> list[dict[str, object]]:
|
| 261 |
+
return self.codec.export_tokens(text)
|
| 262 |
+
|
| 263 |
+
def capacity(self) -> dict[str, int]:
|
| 264 |
+
if self.graph.db_path != ":memory:":
|
| 265 |
+
return StructuralCapacityMeter.from_sqlite(self.graph.db_path).to_metadata()
|
| 266 |
+
|
| 267 |
+
import sqlite3
|
| 268 |
+
import tempfile
|
| 269 |
+
|
| 270 |
+
with tempfile.TemporaryDirectory() as temp_dir:
|
| 271 |
+
temp_path = Path(temp_dir) / "graph.sqlite3"
|
| 272 |
+
destination = sqlite3.connect(str(temp_path))
|
| 273 |
+
try:
|
| 274 |
+
self.graph.conn.backup(destination)
|
| 275 |
+
finally:
|
| 276 |
+
destination.close()
|
| 277 |
+
return StructuralCapacityMeter.from_sqlite(temp_path).to_metadata()
|
| 278 |
+
|
| 279 |
+
def close(self) -> None:
|
| 280 |
+
self.graph.close()
|
| 281 |
+
|
| 282 |
+
def _render(self, query: str, result: QueryResult) -> NativeResponse:
|
| 283 |
+
return NativeResponse(
|
| 284 |
+
answer=result.answer,
|
| 285 |
+
text=self.surface.verbalize_result(query, result),
|
| 286 |
+
explanation=self.surface.explain_result(query, result),
|
| 287 |
+
proof=result.proof,
|
| 288 |
+
reasoning=result.reasoning,
|
| 289 |
+
mode=result.mode,
|
| 290 |
+
)
|
| 291 |
+
|
| 292 |
+
@staticmethod
|
| 293 |
+
def _split_query_parts(query: str) -> list[str]:
|
| 294 |
+
parts: list[str] = []
|
| 295 |
+
for part in re.split(
|
| 296 |
+
r"(?:\?\s+|\?\s*$|(?:\s+and\s+also\s+)|(?:\s+also\s+)|(?:\s*;\s*)|(?:\s+then\s+)|(?:\r?\n+))",
|
| 297 |
+
query,
|
| 298 |
+
):
|
| 299 |
+
cleaned = part.strip()
|
| 300 |
+
cleaned = re.sub(r"^(?:also|and)\s+", "", cleaned, flags=re.IGNORECASE)
|
| 301 |
+
cleaned = re.sub(r"\s+", " ", cleaned).strip(" ?!.")
|
| 302 |
+
if cleaned:
|
| 303 |
+
parts.append(cleaned)
|
| 304 |
+
if len(parts) <= 1:
|
| 305 |
+
return [query.strip()]
|
| 306 |
+
return [part if part.endswith("?") else f"{part}?" for part in parts]
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
def parse_args() -> argparse.Namespace:
|
| 310 |
+
parser = argparse.ArgumentParser(description="Run the first real no-weight Aethon native base.")
|
| 311 |
+
parser.add_argument("--db-path", type=str, default=":memory:")
|
| 312 |
+
parser.add_argument("--learn", action="append", default=[])
|
| 313 |
+
parser.add_argument("--learn-jsonl", type=str, default="")
|
| 314 |
+
parser.add_argument("--ask", action="append", default=[])
|
| 315 |
+
parser.add_argument("--inspect", type=str, default="")
|
| 316 |
+
parser.add_argument("--capacity", action="store_true")
|
| 317 |
+
parser.add_argument("--save-bundle", type=str, default="")
|
| 318 |
+
parser.add_argument("--load-bundle", type=str, default="")
|
| 319 |
+
return parser.parse_args()
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
def main() -> None:
|
| 323 |
+
args = parse_args()
|
| 324 |
+
runtime = NativeBundleManager.load(args.load_bundle) if args.load_bundle else AethonNativeBase(db_path=args.db_path)
|
| 325 |
+
try:
|
| 326 |
+
for fact in args.learn:
|
| 327 |
+
learned = runtime.learn(fact)
|
| 328 |
+
print(f"learned_edges={len(learned['learned_edges'])} derived_edges={len(learned['derived_edges'])} rule_count={learned['rule_count']}")
|
| 329 |
+
if args.learn_jsonl:
|
| 330 |
+
stats = runtime.learn_jsonl(args.learn_jsonl)
|
| 331 |
+
print(f"learned_rows={stats['rows']} learned_facts={stats['facts']}")
|
| 332 |
+
if args.inspect:
|
| 333 |
+
print(json.dumps(runtime.inspect(args.inspect), indent=2))
|
| 334 |
+
if args.capacity:
|
| 335 |
+
print(json.dumps(runtime.capacity(), indent=2))
|
| 336 |
+
for query in args.ask:
|
| 337 |
+
response = runtime.ask(query)
|
| 338 |
+
print(f"Q: {query}")
|
| 339 |
+
print(f"A: {response.answer}")
|
| 340 |
+
print(f"mode: {response.mode}")
|
| 341 |
+
print(f"text: {response.text}")
|
| 342 |
+
print(f"explain: {response.explanation}")
|
| 343 |
+
if response.reasoning:
|
| 344 |
+
print("reasoning:")
|
| 345 |
+
for step in response.reasoning:
|
| 346 |
+
print(f" - {step}")
|
| 347 |
+
if response.proof:
|
| 348 |
+
print("proof:")
|
| 349 |
+
for step in response.proof:
|
| 350 |
+
print(f" - {step}")
|
| 351 |
+
if args.save_bundle:
|
| 352 |
+
bundle = NativeBundleManager.save(runtime, args.save_bundle)
|
| 353 |
+
print(f"bundle={bundle}")
|
| 354 |
+
finally:
|
| 355 |
+
runtime.close()
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
if __name__ == "__main__":
|
| 359 |
+
main()
|
runtime/aethon/rfi_semantics.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
import json
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
@dataclass(frozen=True)
|
| 9 |
+
class SemanticEntry:
|
| 10 |
+
kind: str
|
| 11 |
+
symbol: str
|
| 12 |
+
meaning: str
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class NativeSemanticLexicon:
|
| 16 |
+
"""Loads semantic normalization units from native data files instead of burying them in code."""
|
| 17 |
+
|
| 18 |
+
DEFAULT_PATH = Path(__file__).resolve().parents[1] / "data" / "native" / "semantics" / "aethon_semantic_lexicon_v1.jsonl"
|
| 19 |
+
|
| 20 |
+
def __init__(self, path: str | Path | None = None, payload: list[dict[str, str]] | None = None) -> None:
|
| 21 |
+
self.path = Path(path) if path is not None else self.DEFAULT_PATH
|
| 22 |
+
self.entries = self._load_entries(payload)
|
| 23 |
+
self.emoji_map = {entry.symbol: entry.meaning for entry in self.entries if entry.kind == "emoji"}
|
| 24 |
+
self.abbreviation_map = {entry.symbol.lower(): entry.meaning for entry in self.entries if entry.kind == "abbreviation"}
|
| 25 |
+
self.alias_map = {
|
| 26 |
+
entry.symbol.lower(): entry.meaning
|
| 27 |
+
for entry in self.entries
|
| 28 |
+
if entry.kind in {"abbreviation", "semantic_alias", "multilingual_alias"}
|
| 29 |
+
}
|
| 30 |
+
self.phrase_alias_map = {
|
| 31 |
+
entry.symbol.lower(): entry.meaning
|
| 32 |
+
for entry in self.entries
|
| 33 |
+
if entry.kind in {"phrase_alias", "multilingual_phrase"}
|
| 34 |
+
}
|
| 35 |
+
self.typo_map = {entry.symbol.lower(): entry.meaning for entry in self.entries if entry.kind == "typo"}
|
| 36 |
+
self.relation_prepositions = {
|
| 37 |
+
entry.symbol.lower()
|
| 38 |
+
for entry in self.entries
|
| 39 |
+
if entry.kind == "relation_preposition"
|
| 40 |
+
}
|
| 41 |
+
self.query_fillers = {
|
| 42 |
+
entry.symbol.lower()
|
| 43 |
+
for entry in self.entries
|
| 44 |
+
if entry.kind == "query_filler"
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
def _load_entries(self, payload: list[dict[str, str]] | None) -> tuple[SemanticEntry, ...]:
|
| 48 |
+
if payload is not None:
|
| 49 |
+
return tuple(self._entry_from_row(row) for row in payload)
|
| 50 |
+
if not self.path.exists():
|
| 51 |
+
return ()
|
| 52 |
+
entries: list[SemanticEntry] = []
|
| 53 |
+
with self.path.open("r", encoding="utf-8") as handle:
|
| 54 |
+
for line in handle:
|
| 55 |
+
entries.append(self._entry_from_row(json.loads(line)))
|
| 56 |
+
return tuple(entries)
|
| 57 |
+
|
| 58 |
+
@staticmethod
|
| 59 |
+
def _entry_from_row(row: dict[str, str]) -> SemanticEntry:
|
| 60 |
+
return SemanticEntry(
|
| 61 |
+
kind=str(row["kind"]).strip(),
|
| 62 |
+
symbol=str(row["symbol"]).strip(),
|
| 63 |
+
meaning=str(row["meaning"]).strip(),
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
def to_payload(self) -> list[dict[str, str]]:
|
| 67 |
+
return [
|
| 68 |
+
{
|
| 69 |
+
"kind": entry.kind,
|
| 70 |
+
"symbol": entry.symbol,
|
| 71 |
+
"meaning": entry.meaning,
|
| 72 |
+
}
|
| 73 |
+
for entry in self.entries
|
| 74 |
+
]
|
| 75 |
+
|
| 76 |
+
def describe_emojis(self, text: str) -> list[tuple[str, str]]:
|
| 77 |
+
seen: list[tuple[str, str]] = []
|
| 78 |
+
for emoji, meaning in self.emoji_map.items():
|
| 79 |
+
if emoji in text and (emoji, meaning) not in seen:
|
| 80 |
+
seen.append((emoji, meaning))
|
| 81 |
+
return seen
|
runtime/aethon/rfi_surface.py
ADDED
|
@@ -0,0 +1,546 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from .rfi_graph import RelationalGraphStore
|
| 4 |
+
from .rfi_query import QueryResult
|
| 5 |
+
from .rfi_surface_lexicon import NativeSurfaceLexicon
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class GraphVerbalizer:
|
| 9 |
+
"""Turns proof-backed query results into direct language without weight-based decoding."""
|
| 10 |
+
|
| 11 |
+
def __init__(
|
| 12 |
+
self,
|
| 13 |
+
surface_lexicon_path: str | None = None,
|
| 14 |
+
surface_lexicon_payload: dict | None = None,
|
| 15 |
+
graph: RelationalGraphStore | None = None,
|
| 16 |
+
) -> None:
|
| 17 |
+
self.lexicon = NativeSurfaceLexicon(surface_lexicon_path, surface_lexicon_payload)
|
| 18 |
+
self.graph = graph
|
| 19 |
+
self.response_index = 0
|
| 20 |
+
self.response_history: dict[str, list[str]] = {}
|
| 21 |
+
|
| 22 |
+
def advance_response_state(self) -> None:
|
| 23 |
+
self.response_index += 1
|
| 24 |
+
|
| 25 |
+
def verbalize_result(self, query: str, result: QueryResult | None) -> str:
|
| 26 |
+
if result is None:
|
| 27 |
+
return f"I cannot ground an answer to '{query}' from what I currently know."
|
| 28 |
+
history_key = f"text::{query}::{result.mode}::{result.answer}"
|
| 29 |
+
if result.mode == "direct":
|
| 30 |
+
if result.proof and result.proof[0].startswith("emoji:"):
|
| 31 |
+
return f"The emoji you used is {self._humanize(result.answer)}."
|
| 32 |
+
supports = self._proof_sentences(result)
|
| 33 |
+
if supports:
|
| 34 |
+
return self._compose_support_paragraph(query + result.answer, supports, self._humanize(result.answer))
|
| 35 |
+
return self._sentence(self._humanize(result.answer))
|
| 36 |
+
if result.mode == "derived":
|
| 37 |
+
if result.proof and result.proof[0].startswith("math:"):
|
| 38 |
+
candidates = (
|
| 39 |
+
f"I computed the exact answer as {self._humanize(result.answer)}.",
|
| 40 |
+
f"The exact result is {self._humanize(result.answer)}. I reached it by direct computation.",
|
| 41 |
+
f"I evaluated the arithmetic structure and got {self._humanize(result.answer)}.",
|
| 42 |
+
f"My computation resolves to {self._humanize(result.answer)}.",
|
| 43 |
+
f"Working through the arithmetic gives {self._humanize(result.answer)}.",
|
| 44 |
+
f"I solve the structure exactly and end at {self._humanize(result.answer)}.",
|
| 45 |
+
)
|
| 46 |
+
return self._choose_novel(history_key, candidates)
|
| 47 |
+
supports = self._proof_sentences(result)
|
| 48 |
+
if supports:
|
| 49 |
+
return self._compose_support_paragraph(query + result.answer, supports, self._humanize(result.answer))
|
| 50 |
+
candidates = (
|
| 51 |
+
f"I derive the answer as {self._humanize(result.answer)} from stored relations.",
|
| 52 |
+
f"My structural reasoning leads me to {self._humanize(result.answer)}.",
|
| 53 |
+
f"I do not retrieve {self._humanize(result.answer)} directly; I derive it from linked relations.",
|
| 54 |
+
)
|
| 55 |
+
return self._choose_novel(history_key, candidates)
|
| 56 |
+
if result.mode == "composed":
|
| 57 |
+
subject = self._subject_from_query(query)
|
| 58 |
+
if subject is not None:
|
| 59 |
+
supports = [self._proof_line_to_sentence(step) for step in result.proof if self._proof_line_to_sentence(step)]
|
| 60 |
+
if supports:
|
| 61 |
+
return self._compose_support_paragraph(query + result.answer, supports, self._humanize(result.answer))
|
| 62 |
+
candidates = (
|
| 63 |
+
f"I connect the stored facts to answer with {self._humanize(result.answer)}.",
|
| 64 |
+
f"My answer is {self._humanize(result.answer)} because several linked facts converge on it.",
|
| 65 |
+
f"I compose multiple facts and end at {self._humanize(result.answer)}.",
|
| 66 |
+
)
|
| 67 |
+
return self._choose_novel(history_key, candidates)
|
| 68 |
+
if result.mode == "path":
|
| 69 |
+
supports = self._proof_sentences(result)
|
| 70 |
+
if supports:
|
| 71 |
+
return self._compose_support_paragraph(query + result.answer, supports, self._humanize(self._path_text(result.answer)))
|
| 72 |
+
path = self._humanize(self._path_text(result.answer))
|
| 73 |
+
return self._choose_novel(
|
| 74 |
+
history_key,
|
| 75 |
+
(
|
| 76 |
+
f"I connect them through {path}.",
|
| 77 |
+
f"The path between them runs through {path}.",
|
| 78 |
+
f"My reasoning links them by way of {path}.",
|
| 79 |
+
),
|
| 80 |
+
)
|
| 81 |
+
if result.mode == "plan":
|
| 82 |
+
supports = self._proof_sentences(result)
|
| 83 |
+
if supports:
|
| 84 |
+
return self._compose_support_paragraph(query + result.answer + "::plan", supports, self._humanize(result.answer))
|
| 85 |
+
return self._choose_novel(
|
| 86 |
+
history_key,
|
| 87 |
+
(
|
| 88 |
+
f"The next grounded step is {self._humanize(result.answer)}.",
|
| 89 |
+
f"My planning chain points to {self._humanize(result.answer)} first.",
|
| 90 |
+
f"The structure says {self._humanize(result.answer)} should happen next.",
|
| 91 |
+
),
|
| 92 |
+
)
|
| 93 |
+
if result.mode == "story":
|
| 94 |
+
supports = self._proof_sentences(result)
|
| 95 |
+
if supports:
|
| 96 |
+
return self._compose_story_paragraph(query + result.answer + "::story", supports)
|
| 97 |
+
return self._choose_novel(
|
| 98 |
+
history_key,
|
| 99 |
+
(
|
| 100 |
+
f"I can ground a story around {self._humanize(result.answer)} from the graph, but I need more concrete events to tell it richly.",
|
| 101 |
+
f"The graph mentions {self._humanize(result.answer)}, though I would need more events to narrate a fuller story.",
|
| 102 |
+
),
|
| 103 |
+
)
|
| 104 |
+
if result.mode == "revision":
|
| 105 |
+
relation, transition = result.answer.split(":", 1)
|
| 106 |
+
previous_value, new_value = transition.split("->", 1)
|
| 107 |
+
return (
|
| 108 |
+
f"The stored {relation.replace('_', ' ')} changed from "
|
| 109 |
+
f"{self._humanize(previous_value)} to {self._humanize(new_value)}."
|
| 110 |
+
)
|
| 111 |
+
if result.mode == "contradiction":
|
| 112 |
+
return "Yes. The graph contains a revision or contradiction for that subject."
|
| 113 |
+
if result.mode == "abstract":
|
| 114 |
+
return self._compose_abstract_answer(result)
|
| 115 |
+
if result.mode == "ontology":
|
| 116 |
+
return f"{self._humanize(result.answer)}."
|
| 117 |
+
return f"{self._humanize(result.answer)}."
|
| 118 |
+
|
| 119 |
+
def verbalize_edge(self, subject: str, relation: str, object_value: str) -> str:
|
| 120 |
+
if subject == "aethon":
|
| 121 |
+
return self._compose_self_sentence(relation, object_value)
|
| 122 |
+
return self._compose_relation_sentence(subject, relation, object_value)
|
| 123 |
+
|
| 124 |
+
def verbalize_proof(self, result: QueryResult | None) -> str:
|
| 125 |
+
if result is None or not result.proof:
|
| 126 |
+
return "No proof trace is available."
|
| 127 |
+
lines = ["Proof trace:"]
|
| 128 |
+
for step in result.proof:
|
| 129 |
+
lines.append(f"- {step}")
|
| 130 |
+
return "\n".join(lines)
|
| 131 |
+
|
| 132 |
+
def verbalize_reasoning(self, result: QueryResult | None) -> str:
|
| 133 |
+
if result is None or not result.reasoning:
|
| 134 |
+
return "No reasoning steps are available."
|
| 135 |
+
intros = (
|
| 136 |
+
"I reason through the graph like this:",
|
| 137 |
+
"My reasoning chain is:",
|
| 138 |
+
"I connect the stored facts in this order:",
|
| 139 |
+
)
|
| 140 |
+
intro = intros[self.response_index % len(intros)]
|
| 141 |
+
lines = [intro]
|
| 142 |
+
for step in result.reasoning:
|
| 143 |
+
lines.append(f"- {step}")
|
| 144 |
+
return "\n".join(lines)
|
| 145 |
+
|
| 146 |
+
def explain_result(self, query: str, result: QueryResult | None) -> str:
|
| 147 |
+
if result is None:
|
| 148 |
+
return f"I do not have enough grounded structure yet to answer '{query}'."
|
| 149 |
+
edge = self._first_edge(result)
|
| 150 |
+
history_key = f"explain::{query}::{result.mode}::{result.answer}"
|
| 151 |
+
if result.mode in {"direct", "ontology"}:
|
| 152 |
+
if result.proof and result.proof[0].startswith("emoji:"):
|
| 153 |
+
return f"I identified {self._humanize(result.answer)} directly from the symbols in your message."
|
| 154 |
+
supports = self._proof_sentences(result)
|
| 155 |
+
if supports:
|
| 156 |
+
return self._compose_support_paragraph(query + result.answer + "::explain", supports, self._humanize(result.answer))
|
| 157 |
+
if edge is None:
|
| 158 |
+
return self.verbalize_result(query, result)
|
| 159 |
+
return self.verbalize_result(query, result)
|
| 160 |
+
if result.mode == "derived":
|
| 161 |
+
if not result.proof:
|
| 162 |
+
return self.verbalize_result(query, result)
|
| 163 |
+
proof = result.proof[0]
|
| 164 |
+
if proof.startswith("math:"):
|
| 165 |
+
return self._choose_novel(
|
| 166 |
+
history_key,
|
| 167 |
+
(
|
| 168 |
+
f"I reached {self._humanize(result.answer)} by evaluating the arithmetic structure directly instead of guessing.",
|
| 169 |
+
f"The result {self._humanize(result.answer)} comes from exact computation over the expression.",
|
| 170 |
+
f"I solved the expression structurally, which yields {self._humanize(result.answer)}.",
|
| 171 |
+
),
|
| 172 |
+
)
|
| 173 |
+
if proof.startswith("derived:"):
|
| 174 |
+
supports = self._derived_support_lines(proof)
|
| 175 |
+
if supports:
|
| 176 |
+
return self._compose_support_paragraph(
|
| 177 |
+
query + result.answer + proof,
|
| 178 |
+
supports,
|
| 179 |
+
self._humanize(result.answer),
|
| 180 |
+
)
|
| 181 |
+
detail = proof.split("|", 1)[-1]
|
| 182 |
+
return (
|
| 183 |
+
f"The answer is {self._humanize(result.answer)} because Aethon derived a new fact from "
|
| 184 |
+
f"earlier stored relations: {self._humanize(detail)}."
|
| 185 |
+
)
|
| 186 |
+
return self.verbalize_result(query, result)
|
| 187 |
+
if result.mode == "composed":
|
| 188 |
+
supports = [self._proof_line_to_sentence(step) for step in result.proof if self._proof_line_to_sentence(step)]
|
| 189 |
+
if supports:
|
| 190 |
+
return self._compose_support_paragraph(
|
| 191 |
+
query + result.answer + "".join(result.proof),
|
| 192 |
+
supports,
|
| 193 |
+
self._humanize(result.answer),
|
| 194 |
+
)
|
| 195 |
+
return f"The answer is {self._humanize(result.answer)} because multiple linked facts point to it."
|
| 196 |
+
if result.mode == "path":
|
| 197 |
+
supports = self._proof_sentences(result)
|
| 198 |
+
if supports:
|
| 199 |
+
return self._compose_support_paragraph(query + result.answer + "::path", supports, self._humanize(self._path_text(result.answer)))
|
| 200 |
+
if not result.proof:
|
| 201 |
+
return self.verbalize_result(query, result)
|
| 202 |
+
relation_chain = " then ".join(self._humanize(self._extract_relation(step) or step) for step in result.proof)
|
| 203 |
+
return f"I connect the concepts by following {relation_chain}."
|
| 204 |
+
if result.mode == "plan":
|
| 205 |
+
supports = self._proof_sentences(result)
|
| 206 |
+
if supports:
|
| 207 |
+
return self._compose_support_paragraph(query + result.answer + "::plan_explain", supports, self._humanize(result.answer))
|
| 208 |
+
return f"The planning structure points to {self._humanize(result.answer)} as the next grounded step."
|
| 209 |
+
if result.mode == "story":
|
| 210 |
+
supports = self._proof_sentences(result)
|
| 211 |
+
if supports:
|
| 212 |
+
return self._compose_story_paragraph(query + result.answer + "::story_explain", supports)
|
| 213 |
+
return f"I need more concrete events about {self._humanize(result.answer)} before I can extend the story further."
|
| 214 |
+
if result.mode == "abstract":
|
| 215 |
+
return self._compose_abstract_explanation(result)
|
| 216 |
+
if result.mode == "revision":
|
| 217 |
+
return self.verbalize_result(query, result)
|
| 218 |
+
if result.mode == "contradiction":
|
| 219 |
+
return self.verbalize_result(query, result)
|
| 220 |
+
return self.verbalize_result(query, result)
|
| 221 |
+
|
| 222 |
+
@staticmethod
|
| 223 |
+
def _plain_humanize(text: str) -> str:
|
| 224 |
+
return text.replace("_", " ")
|
| 225 |
+
|
| 226 |
+
def _humanize(self, text: str) -> str:
|
| 227 |
+
if self.graph is not None and text and all(char.islower() or char.isdigit() or char == "_" for char in text):
|
| 228 |
+
return self.graph.get_display_name(text)
|
| 229 |
+
return self._plain_humanize(text)
|
| 230 |
+
|
| 231 |
+
def _sentence(self, text: str) -> str:
|
| 232 |
+
cleaned = text.strip()
|
| 233 |
+
if not cleaned:
|
| 234 |
+
return ""
|
| 235 |
+
cleaned = cleaned[0].upper() + cleaned[1:]
|
| 236 |
+
if cleaned[-1] not in ".!?":
|
| 237 |
+
cleaned += "."
|
| 238 |
+
return cleaned
|
| 239 |
+
|
| 240 |
+
@staticmethod
|
| 241 |
+
def _lower_sentence(text: str) -> str:
|
| 242 |
+
cleaned = text.strip()
|
| 243 |
+
if not cleaned:
|
| 244 |
+
return ""
|
| 245 |
+
if cleaned[-1] in ".!?":
|
| 246 |
+
cleaned = cleaned[:-1]
|
| 247 |
+
lowered = cleaned[0].lower() + cleaned[1:] if cleaned else ""
|
| 248 |
+
words = lowered.split()
|
| 249 |
+
return " ".join("I" if word == "i" else word for word in words)
|
| 250 |
+
|
| 251 |
+
def _first_edge(self, result: QueryResult | None) -> tuple[str, str, str] | None:
|
| 252 |
+
if result is None:
|
| 253 |
+
return None
|
| 254 |
+
for step in result.proof:
|
| 255 |
+
parsed = self._parse_edge(step)
|
| 256 |
+
if parsed is not None:
|
| 257 |
+
return parsed
|
| 258 |
+
return None
|
| 259 |
+
|
| 260 |
+
def _parse_edge(self, proof_line: str) -> tuple[str, str, str] | None:
|
| 261 |
+
if "-[" not in proof_line or "]->" not in proof_line:
|
| 262 |
+
return None
|
| 263 |
+
payload = proof_line.split("|", 1)[0]
|
| 264 |
+
_, edge = payload.split(":", 1)
|
| 265 |
+
subject, rest = edge.split("-[", 1)
|
| 266 |
+
relation, object_value = rest.split("]->", 1)
|
| 267 |
+
return subject, relation, object_value
|
| 268 |
+
|
| 269 |
+
def _proof_line_to_sentence(self, proof_line: str) -> str | None:
|
| 270 |
+
parsed = self._parse_edge(proof_line)
|
| 271 |
+
if parsed is not None:
|
| 272 |
+
subject, relation, object_value = parsed
|
| 273 |
+
return self.verbalize_edge(subject, relation, object_value)
|
| 274 |
+
if proof_line.startswith("compose:"):
|
| 275 |
+
return "Aethon combined linked facts to reach the final answer"
|
| 276 |
+
if proof_line.startswith("ontology:"):
|
| 277 |
+
payload = proof_line.split(":", 1)[1]
|
| 278 |
+
subject, _, object_value = payload.partition("->is_a->")
|
| 279 |
+
return f"{self._humanize(subject)} belongs to the class {self._humanize(object_value)}"
|
| 280 |
+
return None
|
| 281 |
+
|
| 282 |
+
def _derived_support_lines(self, proof_line: str) -> list[str]:
|
| 283 |
+
parts = proof_line.split("|")
|
| 284 |
+
supports: list[str] = []
|
| 285 |
+
seen: set[str] = set()
|
| 286 |
+
for part in parts[1:]:
|
| 287 |
+
parsed = self._parse_edge(f"derived:{part}") if "-[" in part and "]->" in part else None
|
| 288 |
+
if parsed is not None:
|
| 289 |
+
subject, relation, object_value = parsed
|
| 290 |
+
sentence = self.verbalize_edge(subject, relation, object_value)
|
| 291 |
+
if sentence not in seen:
|
| 292 |
+
supports.append(sentence)
|
| 293 |
+
seen.add(sentence)
|
| 294 |
+
return supports
|
| 295 |
+
|
| 296 |
+
def _proof_sentences(self, result: QueryResult) -> list[str]:
|
| 297 |
+
supports: list[str] = []
|
| 298 |
+
seen: set[str] = set()
|
| 299 |
+
for step in result.proof:
|
| 300 |
+
if step.startswith("derived:"):
|
| 301 |
+
for line in self._derived_support_lines(step):
|
| 302 |
+
if line not in seen:
|
| 303 |
+
supports.append(line)
|
| 304 |
+
seen.add(line)
|
| 305 |
+
continue
|
| 306 |
+
sentence = self._proof_line_to_sentence(step)
|
| 307 |
+
if sentence and sentence not in seen:
|
| 308 |
+
supports.append(sentence)
|
| 309 |
+
seen.add(sentence)
|
| 310 |
+
return supports
|
| 311 |
+
|
| 312 |
+
def _extract_relation(self, proof_line: str) -> str | None:
|
| 313 |
+
parsed = self._parse_edge(proof_line)
|
| 314 |
+
if parsed is None:
|
| 315 |
+
return None
|
| 316 |
+
return parsed[1]
|
| 317 |
+
|
| 318 |
+
def _path_text(self, relation_chain: str) -> str:
|
| 319 |
+
pieces = [self._humanize(piece.strip()) for piece in relation_chain.split("->")]
|
| 320 |
+
if not pieces:
|
| 321 |
+
return relation_chain
|
| 322 |
+
if len(pieces) == 1:
|
| 323 |
+
return pieces[0]
|
| 324 |
+
return ", then ".join(pieces)
|
| 325 |
+
|
| 326 |
+
def _subject_from_query(self, query: str) -> str | None:
|
| 327 |
+
lowered = query.strip().rstrip(" ?")
|
| 328 |
+
words = lowered.split()
|
| 329 |
+
if len(words) >= 3 and words[:2] == ["where", "is"]:
|
| 330 |
+
return " ".join(words[2:])
|
| 331 |
+
if len(words) >= 4 and words[:2] == ["where", "does"]:
|
| 332 |
+
return words[2]
|
| 333 |
+
if len(words) >= 4 and words[:2] == ["what", "does"]:
|
| 334 |
+
return words[2]
|
| 335 |
+
if len(words) >= 4 and words[:2] == ["what", "did"]:
|
| 336 |
+
return words[2]
|
| 337 |
+
if len(words) >= 4 and words[:2] == ["what", "is"] and words[-1] == "carrying":
|
| 338 |
+
return " ".join(words[2:-1])
|
| 339 |
+
if len(words) >= 5 and words[:2] == ["which", "animal"] and "watch" in words:
|
| 340 |
+
watch_index = words.index("watch")
|
| 341 |
+
return " ".join(words[3:watch_index]) if watch_index > 3 else None
|
| 342 |
+
return None
|
| 343 |
+
|
| 344 |
+
def _compose_support_paragraph(self, seed: str, supports: list[str], answer: str) -> str:
|
| 345 |
+
cleaned = [self._sentence(line) for line in supports if line.strip()]
|
| 346 |
+
if not cleaned:
|
| 347 |
+
summaries = tuple(variant.format(answer=answer) for variant in self.lexicon.support_summary_variants)
|
| 348 |
+
if summaries:
|
| 349 |
+
return self._choose_novel(f"support::{seed}::{answer}", summaries)
|
| 350 |
+
return f"So I answer {answer}."
|
| 351 |
+
summaries = tuple(variant.format(answer=answer) for variant in self.lexicon.support_summary_variants)
|
| 352 |
+
candidates: list[str] = []
|
| 353 |
+
connectors = self.lexicon.support_connectors or ("Then",)
|
| 354 |
+
first_sentence = cleaned[0]
|
| 355 |
+
lowered_first = self._lower_sentence(first_sentence)
|
| 356 |
+
openings = (
|
| 357 |
+
first_sentence,
|
| 358 |
+
f"I start from this fact: {lowered_first}.",
|
| 359 |
+
f"The graph first gives me this: {lowered_first}.",
|
| 360 |
+
f"One grounded fact is that {lowered_first}.",
|
| 361 |
+
f"I begin with {lowered_first}.",
|
| 362 |
+
f"My first support fact is that {lowered_first}.",
|
| 363 |
+
)
|
| 364 |
+
reflections = (
|
| 365 |
+
f"Across that proof, the same answer keeps surfacing: {answer}.",
|
| 366 |
+
f"The linked structure stays consistent all the way to {answer}.",
|
| 367 |
+
f"Nothing in that chain breaks the answer {answer}.",
|
| 368 |
+
f"Those connected facts keep reinforcing {answer}.",
|
| 369 |
+
)
|
| 370 |
+
for offset in range(min(max(len(connectors), 1), 6)):
|
| 371 |
+
for opening in openings:
|
| 372 |
+
pieces = [opening]
|
| 373 |
+
for index, line in enumerate(cleaned[1:], start=1):
|
| 374 |
+
connector = connectors[(offset + index - 1) % len(connectors)]
|
| 375 |
+
pieces.append(f"{connector}, {self._lower_sentence(line)}.")
|
| 376 |
+
reflection = reflections[(offset + len(opening)) % len(reflections)]
|
| 377 |
+
pieces.append(reflection)
|
| 378 |
+
summary = summaries[(offset + len(candidates)) % len(summaries)] if summaries else f"So I answer {answer}."
|
| 379 |
+
pieces.append(summary)
|
| 380 |
+
candidates.append(" ".join(pieces))
|
| 381 |
+
return self._choose_novel(f"support::{seed}::{answer}", tuple(candidates))
|
| 382 |
+
|
| 383 |
+
def _compose_story_paragraph(self, seed: str, supports: list[str]) -> str:
|
| 384 |
+
cleaned = [self._sentence(line) for line in supports if line.strip()]
|
| 385 |
+
if not cleaned:
|
| 386 |
+
return ""
|
| 387 |
+
intros = (
|
| 388 |
+
"Here is the grounded story I can tell.",
|
| 389 |
+
"The graph lets me tell the story this way.",
|
| 390 |
+
"I can narrate the stored story like this.",
|
| 391 |
+
)
|
| 392 |
+
closers = (
|
| 393 |
+
"That is the grounded thread I can recover from what Aethon remembers.",
|
| 394 |
+
"Those events stay connected in memory, so the story holds together.",
|
| 395 |
+
"That sequence is how the stored events unfold in Aethon's graph.",
|
| 396 |
+
)
|
| 397 |
+
intro = self._choose_novel(f"story-intro::{seed}", intros)
|
| 398 |
+
closer = self._choose_novel(f"story-close::{seed}", closers)
|
| 399 |
+
body = " ".join(cleaned)
|
| 400 |
+
return f"{intro} {body} {closer}"
|
| 401 |
+
|
| 402 |
+
def _compose_abstract_answer(self, result: QueryResult) -> str:
|
| 403 |
+
explanation = self._compose_abstract_explanation(result)
|
| 404 |
+
if explanation:
|
| 405 |
+
return explanation
|
| 406 |
+
return f"I generalize the answer as {self._humanize(result.answer)}."
|
| 407 |
+
|
| 408 |
+
def _compose_abstract_explanation(self, result: QueryResult) -> str:
|
| 409 |
+
if not result.proof:
|
| 410 |
+
return f"I generalize the answer as {self._humanize(result.answer)}."
|
| 411 |
+
pieces: list[str] = []
|
| 412 |
+
seen: set[str] = set()
|
| 413 |
+
for step in result.proof:
|
| 414 |
+
sentence = self._abstract_proof_to_sentence(step)
|
| 415 |
+
if sentence and sentence not in seen:
|
| 416 |
+
pieces.append(sentence)
|
| 417 |
+
seen.add(sentence)
|
| 418 |
+
if not pieces:
|
| 419 |
+
return f"I generalize the answer as {self._humanize(result.answer)}."
|
| 420 |
+
closers = (
|
| 421 |
+
f"So I answer {self._humanize(result.answer)}.",
|
| 422 |
+
f"That is why I generalize the answer as {self._humanize(result.answer)}.",
|
| 423 |
+
f"So my generalized answer is {self._humanize(result.answer)}.",
|
| 424 |
+
f"That chain lets me answer with {self._humanize(result.answer)}.",
|
| 425 |
+
)
|
| 426 |
+
candidates = []
|
| 427 |
+
for offset in range(min(max(len(closers), 1), 6)):
|
| 428 |
+
ordered = list(pieces[offset % len(pieces):]) + list(pieces[:offset % len(pieces)])
|
| 429 |
+
rendered = " ".join(self._sentence(piece) for piece in ordered)
|
| 430 |
+
candidates.append(f"{rendered} {closers[offset % len(closers)]}")
|
| 431 |
+
return self._choose_novel(f"abstract::{result.answer}::{''.join(result.proof)}", tuple(candidates))
|
| 432 |
+
|
| 433 |
+
def _abstract_proof_to_sentence(self, proof_line: str) -> str | None:
|
| 434 |
+
if proof_line.startswith("abstract:"):
|
| 435 |
+
parsed = self._parse_edge(proof_line)
|
| 436 |
+
if parsed is None:
|
| 437 |
+
return None
|
| 438 |
+
subject, relation, object_value = parsed
|
| 439 |
+
return f"I know that {self._humanize(subject)} {relation.replace('_', ' ')} {self._humanize(object_value)}"
|
| 440 |
+
if proof_line.startswith("subject:") or proof_line.startswith("object:"):
|
| 441 |
+
payload = proof_line.split(":", 1)[1]
|
| 442 |
+
concept, _, parent = payload.partition("->is_a->")
|
| 443 |
+
if concept and parent:
|
| 444 |
+
return f"{self._humanize(concept)} belongs to {self._humanize(parent)}"
|
| 445 |
+
if proof_line.startswith("ontology:"):
|
| 446 |
+
payload = proof_line.split(":", 1)[1]
|
| 447 |
+
concept, _, parent = payload.partition("->is_a->")
|
| 448 |
+
if concept and parent:
|
| 449 |
+
return f"{self._humanize(concept)} belongs to {self._humanize(parent)}"
|
| 450 |
+
return None
|
| 451 |
+
|
| 452 |
+
@staticmethod
|
| 453 |
+
def _choose(seed: str, options: tuple[str, ...]) -> str:
|
| 454 |
+
if not options:
|
| 455 |
+
return ""
|
| 456 |
+
return options[sum(ord(char) for char in seed) % len(options)]
|
| 457 |
+
|
| 458 |
+
def _choose_novel(self, key: str, candidates: tuple[str, ...]) -> str:
|
| 459 |
+
if not candidates:
|
| 460 |
+
return ""
|
| 461 |
+
history = self.response_history.setdefault(key, [])
|
| 462 |
+
for candidate in candidates:
|
| 463 |
+
if candidate not in history:
|
| 464 |
+
history.append(candidate)
|
| 465 |
+
if len(history) > 24:
|
| 466 |
+
del history[:-24]
|
| 467 |
+
return candidate
|
| 468 |
+
choice = candidates[self.response_index % len(candidates)]
|
| 469 |
+
history.append(choice)
|
| 470 |
+
if len(history) > 24:
|
| 471 |
+
del history[:-24]
|
| 472 |
+
return choice
|
| 473 |
+
|
| 474 |
+
def _compose_relation_sentence(self, subject: str, relation: str, object_value: str) -> str:
|
| 475 |
+
subject_text = self._humanize(subject)
|
| 476 |
+
object_text = self._humanize(object_value)
|
| 477 |
+
tokens = relation.split("_")
|
| 478 |
+
head = tokens[0] if tokens else relation
|
| 479 |
+
tail = " ".join(tokens[1:])
|
| 480 |
+
|
| 481 |
+
if relation in {"is_a", "be"}:
|
| 482 |
+
return f"{subject_text} is {self._article(object_text)} {object_text}"
|
| 483 |
+
if relation.endswith("_in"):
|
| 484 |
+
verb = head if head not in {"located", "lives", "work"} else {
|
| 485 |
+
"located": "is located",
|
| 486 |
+
"lives": "lives",
|
| 487 |
+
"work": "works",
|
| 488 |
+
}.get(head, head)
|
| 489 |
+
return f"{subject_text} {verb} in {object_text}"
|
| 490 |
+
if relation in {"return", "return_value"}:
|
| 491 |
+
return f"{subject_text} returns {object_text}"
|
| 492 |
+
if relation == "depend_on":
|
| 493 |
+
return f"{subject_text} depends on {object_text}"
|
| 494 |
+
if relation == "prefer":
|
| 495 |
+
return f"{subject_text} prefers {object_text}"
|
| 496 |
+
if relation == "not_like_anymore":
|
| 497 |
+
return f"{subject_text} no longer likes {object_text}"
|
| 498 |
+
if relation == "contains":
|
| 499 |
+
return f"{subject_text} contains {object_text}"
|
| 500 |
+
if relation == "home_of":
|
| 501 |
+
return f"{subject_text} is the home of {object_text}"
|
| 502 |
+
if relation == "purchase_site_of":
|
| 503 |
+
return f"{subject_text} is where {object_text} was bought"
|
| 504 |
+
if relation == "chased_by":
|
| 505 |
+
return f"{subject_text} is chased by {object_text}"
|
| 506 |
+
if relation == "attacked_by":
|
| 507 |
+
return f"{subject_text} is attacked by {object_text}"
|
| 508 |
+
if relation == "watched_by":
|
| 509 |
+
return f"{subject_text} is watched by {object_text}"
|
| 510 |
+
if relation == "kept_by":
|
| 511 |
+
return f"{subject_text} is kept by {object_text}"
|
| 512 |
+
if relation == "carried_by":
|
| 513 |
+
return f"{subject_text} is carried by {object_text}"
|
| 514 |
+
if tail:
|
| 515 |
+
return f"{subject_text} {head} {tail} {object_text}"
|
| 516 |
+
return f"{subject_text} {head}s {object_text}" if not head.endswith("s") else f"{subject_text} {head} {object_text}"
|
| 517 |
+
|
| 518 |
+
def _compose_self_sentence(self, relation: str, object_value: str) -> str:
|
| 519 |
+
object_text = self._humanize(object_value)
|
| 520 |
+
if relation in {"is_a", "be"}:
|
| 521 |
+
return f"I am {self._article(object_text)} {object_text}"
|
| 522 |
+
if relation == "use":
|
| 523 |
+
return f"I use {object_text}"
|
| 524 |
+
if relation == "report":
|
| 525 |
+
return f"I report {object_text}"
|
| 526 |
+
if relation == "depend_on":
|
| 527 |
+
return f"I depend on {object_text}"
|
| 528 |
+
if relation == "prefer":
|
| 529 |
+
return f"I prefer {object_text}"
|
| 530 |
+
if relation == "like":
|
| 531 |
+
return f"I like {object_text}"
|
| 532 |
+
if relation == "equals":
|
| 533 |
+
return f"My value is {object_text}"
|
| 534 |
+
if relation.endswith("_in"):
|
| 535 |
+
head = relation.split("_", 1)[0]
|
| 536 |
+
return f"I {head} in {object_text}"
|
| 537 |
+
tokens = relation.split("_")
|
| 538 |
+
if len(tokens) > 1:
|
| 539 |
+
return f"I {' '.join(tokens)} {object_text}"
|
| 540 |
+
return f"I {relation} {object_text}"
|
| 541 |
+
|
| 542 |
+
@staticmethod
|
| 543 |
+
def _article(text: str) -> str:
|
| 544 |
+
if not text:
|
| 545 |
+
return "a"
|
| 546 |
+
return "an" if text[0].lower() in {"a", "e", "i", "o", "u"} else "a"
|
runtime/aethon/rfi_surface_lexicon.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class NativeSurfaceLexicon:
|
| 8 |
+
"""Loads Aethon surface templates and phrasing variants from native data."""
|
| 9 |
+
|
| 10 |
+
DEFAULT_PATH = Path(__file__).resolve().parents[1] / "data" / "native" / "surface" / "aethon_surface_lexicon_v1.json"
|
| 11 |
+
|
| 12 |
+
def __init__(self, path: str | Path | None = None, payload: dict | None = None) -> None:
|
| 13 |
+
self.path = Path(path) if path is not None else self.DEFAULT_PATH
|
| 14 |
+
self.payload = self._load_payload(payload)
|
| 15 |
+
self.relation_templates: dict[str, str] = self.payload.get("relation_templates", {})
|
| 16 |
+
self.self_relation_templates: dict[str, str] = self.payload.get("self_relation_templates", {})
|
| 17 |
+
self.direct_fact_variants: tuple[str, ...] = tuple(self.payload.get("direct_fact_variants", ()))
|
| 18 |
+
self.direct_self_variants: tuple[str, ...] = tuple(self.payload.get("direct_self_variants", ()))
|
| 19 |
+
self.direct_emoji_variants: tuple[str, ...] = tuple(self.payload.get("direct_emoji_variants", ()))
|
| 20 |
+
self.direct_unknown_variants: tuple[str, ...] = tuple(self.payload.get("direct_unknown_variants", ()))
|
| 21 |
+
self.derived_math_variants: tuple[str, ...] = tuple(self.payload.get("derived_math_variants", ()))
|
| 22 |
+
self.derived_fact_variants: tuple[str, ...] = tuple(self.payload.get("derived_fact_variants", ()))
|
| 23 |
+
self.derived_fallback_variants: tuple[str, ...] = tuple(self.payload.get("derived_fallback_variants", ()))
|
| 24 |
+
self.composed_variants: tuple[str, ...] = tuple(self.payload.get("composed_variants", ()))
|
| 25 |
+
self.path_variants: tuple[str, ...] = tuple(self.payload.get("path_variants", ()))
|
| 26 |
+
self.abstract_variants: tuple[str, ...] = tuple(self.payload.get("abstract_variants", ()))
|
| 27 |
+
self.explain_unknown_variants: tuple[str, ...] = tuple(self.payload.get("explain_unknown_variants", ()))
|
| 28 |
+
self.explain_emoji_variants: tuple[str, ...] = tuple(self.payload.get("explain_emoji_variants", ()))
|
| 29 |
+
self.explain_math_variants: tuple[str, ...] = tuple(self.payload.get("explain_math_variants", ()))
|
| 30 |
+
self.support_connectors: tuple[str, ...] = tuple(self.payload.get("support_connectors", ()))
|
| 31 |
+
self.support_summary_variants: tuple[str, ...] = tuple(self.payload.get("support_summary_variants", ()))
|
| 32 |
+
self.path_explain_variants: tuple[str, ...] = tuple(self.payload.get("path_explain_variants", ()))
|
| 33 |
+
self.reasoning_intro_variants: tuple[str, ...] = tuple(self.payload.get("reasoning_intro_variants", ()))
|
| 34 |
+
|
| 35 |
+
def _load_payload(self, payload: dict | None) -> dict:
|
| 36 |
+
if payload is not None:
|
| 37 |
+
return payload
|
| 38 |
+
if not self.path.exists():
|
| 39 |
+
return {}
|
| 40 |
+
return json.loads(self.path.read_text(encoding="utf-8"))
|
| 41 |
+
|
| 42 |
+
def to_payload(self) -> dict:
|
| 43 |
+
return dict(self.payload)
|