OkeyMeta commited on
Commit
5027724
·
verified ·
1 Parent(s): add8505

Ship bundle-native Open Structure runtime in public release

Browse files
README.md CHANGED
@@ -208,7 +208,7 @@ Aethon generalizes by reusing learned structure across:
208
  The intended public experience is model-like:
209
 
210
  - load the bundle
211
- - create a runtime object
212
  - call `ask(...)`
213
  - get natural text back
214
 
@@ -229,10 +229,12 @@ finally:
229
  Portable runtime included in the release:
230
 
231
  - `examples/aethon_open_structure_python.py`
 
 
232
 
233
- That runtime is intentionally portable and model-facing.
234
 
235
- It should be treated as a real runnable Open Structure entry point, not as a storage demo.
236
 
237
  ## Prompt Examples
238
 
@@ -393,6 +395,8 @@ Additional docs in this release:
393
  - `docs/aethon_n1_bundle_schema.json`
394
  - `docs/AETHON_OPEN_STRUCTURE_RUNTIME.md`
395
 
396
- Starter runtime example:
397
 
398
  - `examples/aethon_open_structure_python.py`
 
 
 
208
  The intended public experience is model-like:
209
 
210
  - load the bundle
211
+ - create a runtime object from the shipped release
212
  - call `ask(...)`
213
  - get natural text back
214
 
 
229
  Portable runtime included in the release:
230
 
231
  - `examples/aethon_open_structure_python.py`
232
+ - `run_aethon.py`
233
+ - `runtime/aethon/...`
234
 
235
+ This release now ships a portable bundle-native runtime pack.
236
 
237
+ It should be treated as a real runnable Open Structure entry point, not as a storage demo or thin adapter.
238
 
239
  ## Prompt Examples
240
 
 
395
  - `docs/aethon_n1_bundle_schema.json`
396
  - `docs/AETHON_OPEN_STRUCTURE_RUNTIME.md`
397
 
398
+ Portable runtime entry points:
399
 
400
  - `examples/aethon_open_structure_python.py`
401
+ - `run_aethon.py`
402
+ - `runtime/aethon/...`
docs/AETHON_OPEN_STRUCTURE_HF_MODEL_CARD.md CHANGED
@@ -208,7 +208,7 @@ Aethon generalizes by reusing learned structure across:
208
  The intended public experience is model-like:
209
 
210
  - load the bundle
211
- - create a runtime object
212
  - call `ask(...)`
213
  - get natural text back
214
 
@@ -229,10 +229,12 @@ finally:
229
  Portable runtime included in the release:
230
 
231
  - `examples/aethon_open_structure_python.py`
 
 
232
 
233
- That runtime is intentionally portable and model-facing.
234
 
235
- It should be treated as a real runnable Open Structure entry point, not as a storage demo.
236
 
237
  ## Prompt Examples
238
 
 
208
  The intended public experience is model-like:
209
 
210
  - load the bundle
211
+ - create a runtime object from the shipped release
212
  - call `ask(...)`
213
  - get natural text back
214
 
 
229
  Portable runtime included in the release:
230
 
231
  - `examples/aethon_open_structure_python.py`
232
+ - `run_aethon.py`
233
+ - `runtime/aethon/...`
234
 
235
+ This release now ships a portable bundle-native runtime pack.
236
 
237
+ It should be treated as a real runnable Open Structure entry point, not as a storage demo or thin adapter.
238
 
239
  ## Prompt Examples
240
 
docs/AETHON_OPEN_STRUCTURE_RUNTIME.md CHANGED
@@ -51,15 +51,19 @@ Then load the persistent bundle memory from `graph.sqlite3`.
51
  The recommended public shape is:
52
 
53
  1. pull the bundle
54
- 2. construct a runtime object
55
  3. call `ask(...)`
56
  4. receive natural text back
57
 
58
  Starter example in this repo:
59
 
60
  - `examples/aethon_open_structure_python.py`
 
 
61
 
62
- That runtime hides storage details behind a model-facing class so developers interact with Aethon as a model rather than as a data store.
 
 
63
 
64
  ## Minimum Read Path
65
 
 
51
  The recommended public shape is:
52
 
53
  1. pull the bundle
54
+ 2. construct a runtime object from the shipped release
55
  3. call `ask(...)`
56
  4. receive natural text back
57
 
58
  Starter example in this repo:
59
 
60
  - `examples/aethon_open_structure_python.py`
61
+ - `run_aethon.py`
62
+ - `runtime/aethon/...`
63
 
64
+ The release now ships a portable bundle-native runtime pack.
65
+
66
+ That runtime hides storage details behind a model-facing interface so developers interact with Aethon as a model rather than as a data store.
67
 
68
  ## Minimum Read Path
69
 
examples/aethon_open_structure_python.py CHANGED
@@ -1,532 +1,92 @@
1
  from __future__ import annotations
2
 
3
- import json
4
- import re
5
- import sqlite3
6
- from collections import deque
7
  from dataclasses import dataclass
8
  from pathlib import Path
9
 
10
- from huggingface_hub import hf_hub_download
11
 
12
 
13
  @dataclass(frozen=True)
14
  class AethonOpenStructureResponse:
15
  answer: str
16
  text: str
 
 
 
17
  mode: str
18
 
19
 
20
  class AethonOpenStructureModel:
21
- """Portable model-facing Open Structure runner."""
22
 
23
- _RELATION_WORDS = {
24
- "live": "lives_in",
25
- "lives": "lives_in",
26
- "stay": "lives_in",
27
- "sleep": "lives_in",
28
- "located": "located_in",
29
- "depend": "depend_on",
30
- "depends": "depend_on",
31
- "return": "return",
32
- "returns": "return",
33
- "watch": "watch",
34
- "watches": "watch",
35
- "buy": "bought",
36
- "bought": "bought",
37
- "prefer": "prefer",
38
- "prefers": "prefer",
39
- "like": "like",
40
- "likes": "like",
41
- "chase": "chase",
42
- "chases": "chase",
43
- "keep": "keeps",
44
- "keeps": "keeps",
45
- "carry": "carrying",
46
- "carries": "carrying",
47
- "carrying": "carrying",
48
- "study": "study",
49
- "studies": "study",
50
- "use": "use",
51
- "uses": "use",
52
- "call": "call",
53
- "calls": "call",
54
- "import": "import",
55
- "imports": "import",
56
- }
57
 
58
- def __init__(self, bundle_dir: str | Path) -> None:
59
- root = Path(bundle_dir)
60
- self.bundle_dir = root
61
- self.metadata = json.loads((root / "metadata.json").read_text(encoding="utf-8"))
62
- self.conn = sqlite3.connect(root / self.metadata["graph_file"])
63
- self.conn.row_factory = sqlite3.Row
64
- self.semantic_lexicon = self.metadata.get("semantic_lexicon", [])
65
- self.surface_lexicon = self.metadata.get("surface_lexicon", {})
66
- self.query_forms = list(self.metadata.get("query_forms", []))
67
- self.reasoning_rules = self.metadata.get("reasoning_rules", {})
 
 
 
 
 
68
 
69
  @classmethod
70
  def from_hub(
71
  cls,
72
  repo_id: str,
73
  *,
74
- local_dir: str | Path = "aethon_open_structure_bundle",
75
  ) -> "AethonOpenStructureModel":
76
- local_root = Path(local_dir)
77
- local_root.mkdir(parents=True, exist_ok=True)
78
- metadata_path = hf_hub_download(
79
- repo_id=repo_id,
80
- filename="bundle/metadata.json",
81
- local_dir=local_root,
82
- local_dir_use_symlinks=False,
83
- )
84
- hf_hub_download(
85
  repo_id=repo_id,
86
- filename="bundle/graph.sqlite3",
87
- local_dir=local_root,
88
  local_dir_use_symlinks=False,
89
  )
90
- return cls(Path(metadata_path).parent)
91
 
92
  def ask(self, question: str) -> AethonOpenStructureResponse:
93
- parts = self._split_prompt(question)
94
- if len(parts) > 1:
95
- replies = [self._ask_one(part) for part in parts]
96
- text = " ".join(reply.text for reply in replies if reply.text)
97
- answer = " | ".join(reply.answer for reply in replies)
98
- return AethonOpenStructureResponse(answer=answer, text=text, mode="multi")
99
- return self._ask_one(question)
100
-
101
- def close(self) -> None:
102
- self.conn.close()
103
-
104
- def _ask_one(self, question: str) -> AethonOpenStructureResponse:
105
- normalized = self._normalize(question)
106
- tokens = normalized.split()
107
- match = self._match_query_form(tokens)
108
- if match is not None:
109
- response = self._answer_query_form(question, normalized, tokens, match)
110
- if response is not None:
111
- return response
112
- return self._unknown(question, normalized)
113
-
114
- def _split_prompt(self, question: str) -> list[str]:
115
- chunks = re.split(r"\?\s+|\?\s*$|\.\s+(?=[A-Z])|\bthen\b|;", question)
116
- return [chunk.strip(" ?.") for chunk in chunks if chunk.strip(" ?.")]
117
-
118
- def _match_query_form(self, tokens: list[str]) -> dict | None:
119
- best: dict | None = None
120
- best_score = -1
121
- for form in self.query_forms:
122
- prefix = [str(token).lower() for token in form.get("prefix", [])]
123
- requires = [str(token).lower() for token in form.get("requires", [])]
124
- if len(tokens) < len(prefix):
125
- continue
126
- if tokens[: len(prefix)] != prefix:
127
- continue
128
- if any(req not in tokens for req in requires):
129
- continue
130
- score = len(prefix) * 10 + len(requires)
131
- if score > best_score:
132
- best = form
133
- best_score = score
134
- return best
135
-
136
- def _answer_query_form(
137
- self,
138
- question: str,
139
- normalized: str,
140
- tokens: list[str],
141
- form: dict,
142
- ) -> AethonOpenStructureResponse | None:
143
- intent = str(form.get("intent", ""))
144
- if intent == "where_entity":
145
- subject = self._extract_subject(tokens, form)
146
- if subject:
147
- answer = self._latest_object(subject, ("located_in", "lives_in"))
148
- if answer:
149
- return AethonOpenStructureResponse(
150
- answer=self._display(answer),
151
- text=f"{self._display(subject)} is currently in {self._display(answer)}.",
152
- mode="direct",
153
- )
154
- return self._unknown(question, normalized)
155
-
156
- if intent == "what_changed":
157
- subject = self._extract_subject(tokens, form)
158
- if subject:
159
- row = self.conn.execute(
160
- """
161
- SELECT relation, previous_object, new_object
162
- FROM contradictions
163
- WHERE subject = ?
164
- ORDER BY contradiction_id DESC
165
- LIMIT 1
166
- """,
167
- (subject,),
168
- ).fetchone()
169
- if row is not None:
170
- relation = self._display_relation(str(row["relation"]))
171
- previous_value = self._display(str(row["previous_object"]))
172
- new_value = self._display(str(row["new_object"]))
173
- text = f"{self._display(subject)} changed in {relation} from {previous_value} to {new_value}."
174
- return AethonOpenStructureResponse(
175
- answer=f"{previous_value} -> {new_value}",
176
- text=text,
177
- mode="revision",
178
- )
179
- return self._unknown(question, normalized)
180
-
181
- if intent == "has_contradiction":
182
- subject = self._extract_subject(tokens, form)
183
- if subject:
184
- row = self.conn.execute(
185
- """
186
- SELECT 1
187
- FROM contradictions
188
- WHERE subject = ?
189
- LIMIT 1
190
- """,
191
- (subject,),
192
- ).fetchone()
193
- if row is not None:
194
- return AethonOpenStructureResponse(
195
- answer="yes",
196
- text=f"Yes, I know conflicting or revised information about {self._display(subject)}.",
197
- mode="contradiction",
198
- )
199
- return AethonOpenStructureResponse(
200
- answer="no",
201
- text=f"I do not currently see a contradiction about {self._display(subject)}.",
202
- mode="contradiction",
203
- )
204
-
205
- if intent == "relation_path":
206
- subject = self._extract_subject(tokens, form)
207
- object_value = self._extract_object(tokens, form)
208
- if subject and object_value:
209
- path = self._find_relation_path(subject, object_value)
210
- if path:
211
- path_text = " -> ".join(self._display_relation(step) for step in path)
212
- text = f"{self._display(subject)} connects to {self._display(object_value)} through {path_text}."
213
- return AethonOpenStructureResponse(answer=path_text, text=text, mode="path")
214
- return self._unknown(question, normalized)
215
-
216
- if intent in {"relation_object", "keep_location_lookup"}:
217
- subject = self._extract_subject(tokens, form)
218
- relation = self._relation_from_form(tokens, form)
219
- if subject and relation:
220
- if relation == "keeps":
221
- kept = self._latest_object(subject, ("keeps",))
222
- if kept:
223
- answer = self._latest_object(kept, ("located_in", "lives_in"))
224
- if answer:
225
- text = f"{self._display(subject)} keeps it in {self._display(answer)}."
226
- return AethonOpenStructureResponse(answer=self._display(answer), text=text, mode="derived")
227
- else:
228
- answer = self._latest_object(subject, (relation,))
229
- if answer:
230
- text = self._compose_relation_answer(subject, relation, answer)
231
- return AethonOpenStructureResponse(answer=self._display(answer), text=text, mode="direct")
232
- return self._unknown(question, normalized)
233
-
234
- if intent == "relation_subject":
235
- relation = self._relation_from_form(tokens, form)
236
- object_value = self._extract_object(tokens, form)
237
- if relation and object_value:
238
- answer = self._latest_subject(object_value, relation)
239
- if answer:
240
- text = self._compose_reverse_relation_answer(answer, relation, object_value)
241
- return AethonOpenStructureResponse(answer=self._display(answer), text=text, mode="direct")
242
- return self._unknown(question, normalized)
243
-
244
- if intent == "classify":
245
- subject = self._extract_subject(tokens, form)
246
- if subject:
247
- answer = self._classify(subject)
248
- if answer:
249
- text = f"{self._display(subject)} is {self._article_for(self._display(answer))} {self._display(answer)}."
250
- return AethonOpenStructureResponse(answer=self._display(answer), text=text, mode="classification")
251
- return self._unknown(question, normalized)
252
-
253
- if intent in {"plan_first", "plan_next", "plan_previous"}:
254
- target = self._extract_subject(tokens, form)
255
- if target:
256
- relation = {
257
- "plan_first": "plan_first",
258
- "plan_next": "plan_next",
259
- "plan_previous": "plan_previous",
260
- }[intent]
261
- answer = self._latest_object(target, (relation,))
262
- if answer:
263
- text = self._plan_text(intent, target, answer)
264
- return AethonOpenStructureResponse(answer=self._display(answer), text=text, mode="plan")
265
- return self._unknown(question, normalized)
266
-
267
- if intent == "story_query":
268
- subject = self._extract_subject(tokens, form)
269
- anchor = self._extract_object(tokens, form)
270
- story = self._story_for(subject, anchor)
271
- if story:
272
- return AethonOpenStructureResponse(answer=story, text=story, mode="story")
273
- return self._unknown(question, normalized)
274
-
275
- return None
276
-
277
- def _normalize(self, text: str) -> str:
278
- lowered = text.lower()
279
- lowered = re.sub(r"[^\w\s]", " ", lowered)
280
- lowered = re.sub(r"\s+", " ", lowered).strip()
281
- for entry in self.semantic_lexicon:
282
- symbol = str(entry.get("symbol", "")).strip().lower()
283
- meaning = str(entry.get("meaning", "")).strip().lower()
284
- if symbol and meaning:
285
- lowered = re.sub(rf"\b{re.escape(symbol)}\b", meaning, lowered)
286
- return lowered
287
-
288
- def _extract_subject(self, tokens: list[str], form: dict) -> str | None:
289
- mode = str(form.get("subject_mode", ""))
290
- prefix_len = len(form.get("prefix", []))
291
- end_anchor = str(form.get("subject_end_anchor", "")).lower()
292
- if mode == "tail":
293
- return self._join_tokens(tokens[prefix_len:])
294
- if mode == "tail_without_last":
295
- return self._join_tokens(tokens[prefix_len:-1])
296
- if mode == "single_after_prefix":
297
- return tokens[prefix_len] if len(tokens) > prefix_len else None
298
- if mode == "between_indexes":
299
- subject_start = int(form.get("subject_start", prefix_len))
300
- if subject_start < prefix_len:
301
- subject_start = prefix_len
302
- end_index = len(tokens)
303
- if end_anchor and end_anchor in tokens[subject_start:]:
304
- end_index = tokens.index(end_anchor, subject_start)
305
- return self._join_tokens(tokens[subject_start:end_index])
306
- if mode == "between_anchors":
307
- subject_start = int(form.get("subject_start", prefix_len))
308
- end_index = len(tokens)
309
- if end_anchor and end_anchor in tokens[subject_start:]:
310
- end_index = tokens.index(end_anchor, subject_start)
311
- return self._join_tokens(tokens[subject_start:end_index])
312
- return self._join_tokens(tokens[prefix_len:])
313
-
314
- def _extract_object(self, tokens: list[str], form: dict) -> str | None:
315
- object_mode = str(form.get("object_mode", ""))
316
- prefix_len = len(form.get("prefix", []))
317
- anchor = str(form.get("object_start_anchor", "")).lower()
318
- if object_mode == "after_anchor" and anchor:
319
- if anchor in tokens:
320
- start = tokens.index(anchor) + 1
321
- return self._join_tokens(tokens[start:])
322
- if object_mode == "tail_after_prefix":
323
- return self._join_tokens(tokens[prefix_len:])
324
- if object_mode == "from_relation_words":
325
- relation = self._relation_from_form(tokens, form)
326
- if relation is None:
327
- return None
328
- for index, token in enumerate(tokens):
329
- if self._RELATION_WORDS.get(token) == relation:
330
- return self._join_tokens(tokens[index + 1 :])
331
- return None
332
-
333
- def _relation_from_form(self, tokens: list[str], form: dict) -> str | None:
334
- relation_mode = str(form.get("relation_mode", "fixed"))
335
- if relation_mode == "fixed":
336
- relation = str(form.get("relation", "")).strip()
337
- return relation or None
338
- if relation_mode == "from_words":
339
- for token in tokens:
340
- relation = self._RELATION_WORDS.get(token)
341
- if relation is not None:
342
- return relation
343
- return None
344
-
345
- def _latest_object(self, subject: str, relations: tuple[str, ...]) -> str | None:
346
- if not relations:
347
- return None
348
- placeholders = ", ".join("?" for _ in relations)
349
- row = self.conn.execute(
350
- f"""
351
- SELECT object
352
- FROM edges
353
- WHERE subject = ?
354
- AND relation IN ({placeholders})
355
- AND is_active = 1
356
- ORDER BY edge_id DESC
357
- LIMIT 1
358
- """,
359
- (subject, *relations),
360
- ).fetchone()
361
- return None if row is None else str(row["object"])
362
-
363
- def _latest_subject(self, object_value: str, relation: str) -> str | None:
364
- row = self.conn.execute(
365
- """
366
- SELECT subject
367
- FROM edges
368
- WHERE object = ?
369
- AND relation = ?
370
- AND is_active = 1
371
- ORDER BY edge_id DESC
372
- LIMIT 1
373
- """,
374
- (object_value, relation),
375
- ).fetchone()
376
- return None if row is None else str(row["subject"])
377
-
378
- def _classify(self, subject: str) -> str | None:
379
- direct = self._latest_object(subject, ("is_a", "equals"))
380
- if direct is None:
381
- return None
382
- promoted = self._latest_object(direct, ("is_a",))
383
- return promoted or direct
384
-
385
- def _find_relation_path(self, start: str, goal: str, max_depth: int = 4) -> list[str] | None:
386
- queue: deque[tuple[str, list[str], int]] = deque([(start, [], 0)])
387
- seen = {start}
388
- while queue:
389
- node, path, depth = queue.popleft()
390
- if depth >= max_depth:
391
- continue
392
- rows = self.conn.execute(
393
- """
394
- SELECT relation, object
395
- FROM edges
396
- WHERE subject = ?
397
- AND is_active = 1
398
- ORDER BY edge_id DESC
399
- """,
400
- (node,),
401
- ).fetchall()
402
- for row in rows:
403
- relation = str(row["relation"])
404
- nxt = str(row["object"])
405
- new_path = path + [relation]
406
- if nxt == goal:
407
- return new_path
408
- if nxt not in seen:
409
- seen.add(nxt)
410
- queue.append((nxt, new_path, depth + 1))
411
- return None
412
-
413
- def _story_for(self, subject: str | None, anchor: str | None) -> str | None:
414
- if not subject:
415
- return None
416
- rows = self.conn.execute(
417
- """
418
- SELECT relation, object
419
- FROM edges
420
- WHERE subject = ?
421
- AND is_active = 1
422
- ORDER BY edge_id ASC
423
- LIMIT 6
424
- """,
425
- (subject,),
426
- ).fetchall()
427
- if not rows:
428
- return None
429
- sentences = [self._compose_relation_answer(subject, str(row["relation"]), str(row["object"])) for row in rows]
430
- if anchor:
431
- return " ".join(sentences) + f" After {self._display(anchor)}, the story keeps moving through what Aethon already knows."
432
- return " ".join(sentences)
433
-
434
- def _compose_relation_answer(self, subject: str, relation: str, answer: str) -> str:
435
- relation_text = self._display_relation(relation)
436
- templates = self.surface_lexicon.get("relation_templates", {})
437
- if relation in templates:
438
- template = str(templates[relation])
439
- return template.format(subject=self._display(subject), object=self._display(answer))
440
- return f"{self._display(subject)} {relation_text} {self._display(answer)}."
441
-
442
- def _compose_reverse_relation_answer(self, subject: str, relation: str, object_value: str) -> str:
443
- if relation == "chase":
444
- return f"{self._display(subject)} chases {self._display(object_value)}."
445
- relation_text = self._display_relation(relation)
446
- return f"{self._display(subject)} {relation_text} {self._display(object_value)}."
447
-
448
- def _plan_text(self, intent: str, target: str, answer: str) -> str:
449
- target_display = self._display(target)
450
- answer_display = self._display(answer)
451
- if intent == "plan_first":
452
- return f"For {target_display}, the grounded first step is {answer_display}."
453
- if intent == "plan_next":
454
- return f"After that point, the next grounded step is {answer_display}."
455
- return f"Before that point, the grounded earlier step is {answer_display}."
456
-
457
- def _unknown(self, question: str, normalized: str) -> AethonOpenStructureResponse:
458
- subject = self._salient_subject(normalized)
459
- subject_text = self._display(subject) if subject else "that"
460
- if normalized.startswith("where "):
461
- variants = [
462
- f"I cannot ground a reliable location for {subject_text}, and I would rather not invent one.",
463
- f"{subject_text.capitalize()} is not something I can place confidently from what this bundle can currently support.",
464
- f"I do not have enough grounded structure to place {subject_text} anywhere without guessing.",
465
- ]
466
- elif normalized.startswith(("who ", "what ")):
467
- variants = [
468
- f"I cannot ground a reliable answer about {subject_text} strongly enough yet.",
469
- f"I would rather stay careful than pretend I know more about {subject_text} than I can support.",
470
- f"{subject_text.capitalize()} goes beyond what I can answer faithfully from the current structure.",
471
- ]
472
- else:
473
- variants = [
474
- f"I cannot support a reliable answer for {subject_text} right now.",
475
- f"I would rather stay careful than guess about {subject_text}.",
476
- f"I do not have enough grounded structure to answer that faithfully about {subject_text}.",
477
- ]
478
- index = len(normalized) % len(variants) if normalized else 0
479
- return AethonOpenStructureResponse(answer="<unknown>", text=variants[index], mode="unknown")
480
-
481
- def _display(self, concept: str) -> str:
482
- row = self.conn.execute(
483
- """
484
- SELECT display_name
485
- FROM concepts
486
- WHERE concept_id = ?
487
- """,
488
- (concept,),
489
- ).fetchone()
490
- if row is None:
491
- return concept.replace("_", " ")
492
- display = str(row["display_name"]).strip()
493
- return display or concept.replace("_", " ")
494
-
495
- @staticmethod
496
- def _display_relation(relation: str) -> str:
497
- return relation.replace("_", " ")
498
-
499
- @staticmethod
500
- def _join_tokens(tokens: list[str]) -> str | None:
501
- cleaned = " ".join(token for token in tokens if token).strip()
502
- return cleaned or None
503
 
504
- @staticmethod
505
- def _article_for(text: str) -> str:
506
- return "an" if text[:1].lower() in {"a", "e", "i", "o", "u"} else "a"
507
 
508
- @staticmethod
509
- def _salient_subject(normalized: str) -> str | None:
510
- tokens = normalized.split()
511
- for stopword in ("what", "where", "who", "how", "is", "does", "did", "the", "a", "an", "about", "to", "after", "before"):
512
- tokens = [token for token in tokens if token != stopword]
513
- if not tokens:
514
- return None
515
- return " ".join(tokens[:3])
516
 
517
 
518
  if __name__ == "__main__":
519
  model = AethonOpenStructureModel.from_hub("OkeyMetaLtd/Aethon-N1-Base-Open-Structure")
520
  try:
521
  prompts = [
522
- "Where does Amina live now, and what changed from before?",
523
- "How is Amina related to Nigeria?",
524
- "Tunde has a client call at 2 PM and lunch at 2 PM. What should happen first, and what should be rescheduled?",
525
  ]
526
  for prompt in prompts:
527
  reply = model.ask(prompt)
528
  print(f"Q: {prompt}")
529
  print(f"A: {reply.text}")
 
 
 
 
530
  print()
531
  finally:
532
  model.close()
 
1
  from __future__ import annotations
2
 
3
+ import sys
 
 
 
4
  from dataclasses import dataclass
5
  from pathlib import Path
6
 
7
+ from huggingface_hub import snapshot_download
8
 
9
 
10
  @dataclass(frozen=True)
11
  class AethonOpenStructureResponse:
12
  answer: str
13
  text: str
14
+ explanation: str
15
+ proof: tuple[str, ...]
16
+ reasoning: tuple[str, ...]
17
  mode: str
18
 
19
 
20
  class AethonOpenStructureModel:
21
+ """Portable bundle-native Aethon runtime wrapper.
22
 
23
+ This wrapper downloads the public Open Structure release, loads the bundled
24
+ native runtime, and exposes a simple model-facing API:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ - `from_hub(...)`
27
+ - `ask(...)`
28
+ - `learn(...)`
29
+ """
30
+
31
+ def __init__(self, release_dir: str | Path) -> None:
32
+ self.release_dir = Path(release_dir)
33
+ runtime_root = self.release_dir / "runtime"
34
+ if str(runtime_root) not in sys.path:
35
+ sys.path.insert(0, str(runtime_root))
36
+
37
+ from aethon.rfi_bundle import NativeBundleManager # type: ignore
38
+
39
+ self._runtime = NativeBundleManager.load(self.release_dir / "bundle")
40
+ self.metadata = getattr(self._runtime, "metadata", None)
41
 
42
  @classmethod
43
  def from_hub(
44
  cls,
45
  repo_id: str,
46
  *,
47
+ local_dir: str | Path = "aethon_open_structure_release",
48
  ) -> "AethonOpenStructureModel":
49
+ release_dir = snapshot_download(
 
 
 
 
 
 
 
 
50
  repo_id=repo_id,
51
+ local_dir=str(local_dir),
 
52
  local_dir_use_symlinks=False,
53
  )
54
+ return cls(release_dir)
55
 
56
  def ask(self, question: str) -> AethonOpenStructureResponse:
57
+ response = self._runtime.ask(question)
58
+ return AethonOpenStructureResponse(
59
+ answer=response.answer,
60
+ text=response.text,
61
+ explanation=response.explanation,
62
+ proof=tuple(response.proof),
63
+ reasoning=tuple(response.reasoning),
64
+ mode=response.mode,
65
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
+ def learn(self, text: str) -> dict[str, object]:
68
+ return self._runtime.learn(text)
 
69
 
70
+ def close(self) -> None:
71
+ self._runtime.close()
 
 
 
 
 
 
72
 
73
 
74
  if __name__ == "__main__":
75
  model = AethonOpenStructureModel.from_hub("OkeyMetaLtd/Aethon-N1-Base-Open-Structure")
76
  try:
77
  prompts = [
78
+ "Amina used to live in Lagos, but now she lives in Accra. What changed about her location?",
79
+ "Tell me the story of Zainab after she misses the last train and discovers the map was outdated.",
80
+ "If the meeting conflicts with lunch and the report must finish before the client call, what should happen first and what should be rescheduled?",
81
  ]
82
  for prompt in prompts:
83
  reply = model.ask(prompt)
84
  print(f"Q: {prompt}")
85
  print(f"A: {reply.text}")
86
+ if reply.reasoning:
87
+ print("reasoning:")
88
+ for step in reply.reasoning:
89
+ print(f" - {step}")
90
  print()
91
  finally:
92
  model.close()
run_aethon.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ ROOT = Path(__file__).resolve().parent
8
+ sys.path.insert(0, str(ROOT / "runtime"))
9
+
10
+ from aethon.rfi_bundle import NativeBundleManager
11
+
12
+
13
+ def main() -> None:
14
+ parser = argparse.ArgumentParser(description="Run Aethon Open Structure bundle.")
15
+ parser.add_argument("--ask", action="append", default=[])
16
+ parser.add_argument("--learn", action="append", default=[])
17
+ args = parser.parse_args()
18
+
19
+ runtime = NativeBundleManager.load(ROOT / "bundle")
20
+ try:
21
+ for fact in args.learn:
22
+ runtime.learn(fact)
23
+ for query in args.ask:
24
+ response = runtime.ask(query)
25
+ print(f"Q: {query}")
26
+ print(f"A: {response.text}")
27
+ if response.reasoning:
28
+ print("reasoning:")
29
+ for step in response.reasoning:
30
+ print(f" - {step}")
31
+ finally:
32
+ runtime.close()
33
+
34
+
35
+ if __name__ == "__main__":
36
+ main()
runtime/aethon/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Portable Aethon Open Structure runtime package."""
runtime/aethon/rfi_abstraction.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from collections import Counter
5
+
6
+ from .rfi_graph import RelationalGraphStore
7
+ from .rfi_ontology import ConceptOntology
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class AbstractionRule:
12
+ subject_class: str
13
+ relation: str
14
+ object_class: str
15
+ support: int
16
+
17
+
18
+ class AbstractionEngine:
19
+ """Lifts repeated graph motifs into class-level rules."""
20
+
21
+ _IGNORED_RELATIONS = {
22
+ "is_a",
23
+ "has_instance",
24
+ "contains",
25
+ "home_of",
26
+ "liked_by",
27
+ "preferred_by",
28
+ "chased_by",
29
+ "attacked_by",
30
+ "watched_by",
31
+ "purchase_site_of",
32
+ "kept_by",
33
+ "carried_by",
34
+ }
35
+
36
+ def __init__(self, graph: RelationalGraphStore, ontology: ConceptOntology | None = None) -> None:
37
+ self.graph = graph
38
+ self.ontology = ontology or ConceptOntology()
39
+
40
+ def derive_rules(self, min_support: int = 2) -> list[AbstractionRule]:
41
+ counts: Counter[tuple[str, str, str]] = Counter()
42
+ for edge in self.graph.iter_active_edges():
43
+ if edge.source_kind == "derived":
44
+ continue
45
+ if edge.relation in self._IGNORED_RELATIONS:
46
+ continue
47
+ subject_parents = self.ontology.lift(edge.subject)
48
+ object_parents = self.ontology.lift(edge.object)
49
+ for subject_parent in subject_parents:
50
+ for object_parent in object_parents:
51
+ counts[(subject_parent, edge.relation, object_parent)] += 1
52
+
53
+ rules: list[AbstractionRule] = []
54
+ for (subject_class, relation, object_class), support in sorted(counts.items()):
55
+ if support >= min_support:
56
+ rules.append(
57
+ AbstractionRule(
58
+ subject_class=subject_class,
59
+ relation=relation,
60
+ object_class=object_class,
61
+ support=support,
62
+ )
63
+ )
64
+ return rules
65
+
66
+ def materialize_rules(self, min_support: int = 2) -> list[AbstractionRule]:
67
+ rules = self.derive_rules(min_support=min_support)
68
+ for rule in rules:
69
+ self.graph.add_derived_fact(
70
+ subject=rule.subject_class,
71
+ relation=rule.relation,
72
+ object=rule.object_class,
73
+ source_text=f"abstraction:{rule.subject_class}:{rule.relation}:{rule.object_class}:{rule.support}",
74
+ commit=False,
75
+ )
76
+ if rules:
77
+ self.graph.commit()
78
+ return rules
runtime/aethon/rfi_bundle.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ import shutil
6
+ import sqlite3
7
+ from typing import TYPE_CHECKING
8
+
9
+ from .rfi_metrics import StructuralCapacityMeter
10
+
11
+ if TYPE_CHECKING:
12
+ from .rfi_runtime import AethonNativeBase
13
+
14
+
15
+ class NativeBundleManager:
16
+ """Persists and reloads Aethon N1 as a portable native base bundle."""
17
+
18
+ METADATA_FILE = "metadata.json"
19
+ GRAPH_FILE = "graph.sqlite3"
20
+ BUNDLE_FORMAT = 2
21
+ PUBLIC_CONTRACT = "aethon.n1.bundle.v1"
22
+ ASSETS_DIR = "assets"
23
+ SEMANTIC_FILE = "semantic_lexicon.jsonl"
24
+ SURFACE_FILE = "surface_lexicon.json"
25
+ REASONING_RULES_FILE = "reasoning_rules.json"
26
+
27
+ @classmethod
28
+ def save(cls, runtime: "AethonNativeBase", bundle_dir: str | Path) -> Path:
29
+ target = Path(bundle_dir)
30
+ target.mkdir(parents=True, exist_ok=True)
31
+ graph_path = target / cls.GRAPH_FILE
32
+ metadata_path = target / cls.METADATA_FILE
33
+
34
+ cls._write_graph(runtime, graph_path)
35
+ capacity = StructuralCapacityMeter.from_sqlite(graph_path)
36
+ metadata = {
37
+ "bundle_format": cls.BUNDLE_FORMAT,
38
+ "public_contract": cls.PUBLIC_CONTRACT,
39
+ "release_class": "open-structure",
40
+ "name": runtime.NAME,
41
+ "family": runtime.FAMILY,
42
+ "tokenizer": runtime.TOKENIZER,
43
+ "size_unit": "Structural Capacity (SC)",
44
+ "capacity": capacity.to_metadata(),
45
+ "bundle_files": [cls.METADATA_FILE, cls.GRAPH_FILE],
46
+ "graph_file": cls.GRAPH_FILE,
47
+ "graph_dialect": "sqlite3",
48
+ "sqlite_schema": {
49
+ "required_tables": ["concepts", "edges", "contradictions"],
50
+ "optional_tables": ["raw_units"],
51
+ "edge_active_flag": "is_active",
52
+ "edge_support_pointer": "supports_edge_id",
53
+ },
54
+ "semantic_lexicon": runtime.ontology.semantic_lexicon.to_payload(),
55
+ "surface_lexicon": runtime.surface.lexicon.to_payload(),
56
+ "query_forms": runtime.query_forms.to_payload(),
57
+ "reasoning_rules": runtime.reasoner.rule_set.to_payload(),
58
+ }
59
+ metadata_path.write_text(json.dumps(metadata, indent=2), encoding="utf-8")
60
+ return target
61
+
62
+ @classmethod
63
+ def load(cls, bundle_dir: str | Path) -> "AethonNativeBase":
64
+ from .rfi_runtime import AethonNativeBase
65
+
66
+ source = Path(bundle_dir)
67
+ graph_path = source / cls.GRAPH_FILE
68
+ metadata_path = source / cls.METADATA_FILE
69
+ if not graph_path.exists():
70
+ raise FileNotFoundError(f"Missing bundle graph: {graph_path}")
71
+ metadata = json.loads(metadata_path.read_text(encoding="utf-8")) if metadata_path.exists() else {}
72
+ if metadata.get("bundle_format", 1) >= cls.BUNDLE_FORMAT:
73
+ return AethonNativeBase(
74
+ db_path=str(graph_path),
75
+ semantic_lexicon_payload=metadata.get("semantic_lexicon"),
76
+ surface_lexicon_payload=metadata.get("surface_lexicon"),
77
+ query_form_payload=metadata.get("query_forms"),
78
+ reasoning_rules_payload=metadata.get("reasoning_rules"),
79
+ )
80
+ assets_dir = source / cls.ASSETS_DIR
81
+ return AethonNativeBase(
82
+ db_path=str(graph_path),
83
+ semantic_lexicon_path=str(assets_dir / cls.SEMANTIC_FILE),
84
+ surface_lexicon_path=str(assets_dir / cls.SURFACE_FILE),
85
+ reasoning_rules_path=str(assets_dir / cls.REASONING_RULES_FILE),
86
+ )
87
+
88
+ @staticmethod
89
+ def _write_graph(runtime: "AethonNativeBase", graph_path: Path) -> None:
90
+ runtime.graph.conn.commit()
91
+ if runtime.graph.db_path != ":memory:":
92
+ source = Path(runtime.graph.db_path)
93
+ if source.resolve() != graph_path.resolve():
94
+ shutil.copy2(source, graph_path)
95
+ return
96
+
97
+ destination = sqlite3.connect(str(graph_path))
98
+ try:
99
+ runtime.graph.conn.backup(destination)
100
+ finally:
101
+ destination.close()
runtime/aethon/rfi_document_filter.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+
6
+ @dataclass(frozen=True)
7
+ class DocumentSelection:
8
+ accepted: bool
9
+ reason: str
10
+ units: tuple[str, ...]
11
+
12
+
13
+ class DocumentQualityGate:
14
+ """Filters low-value corpus documents and extracts higher-value learning units."""
15
+
16
+ _BLOCKLIST = (
17
+ "cookie policy",
18
+ "privacy policy",
19
+ "terms of service",
20
+ "all rights reserved",
21
+ "sign up",
22
+ "subscribe",
23
+ "javascript",
24
+ "enable cookies",
25
+ )
26
+
27
+ def select(self, title: str, text: str) -> DocumentSelection:
28
+ title = title.strip()
29
+ text = text.strip()
30
+ merged = "\n".join(part for part in (title, text) if part)
31
+ if not merged:
32
+ return DocumentSelection(False, "empty", ())
33
+ lowered = merged.lower()
34
+ if any(marker in lowered for marker in self._BLOCKLIST):
35
+ return DocumentSelection(False, "boilerplate", ())
36
+ if lowered.count("http") > 3:
37
+ return DocumentSelection(False, "too_many_urls", ())
38
+ alpha_chars = sum(1 for char in merged if char.isalpha())
39
+ if alpha_chars < 24:
40
+ return DocumentSelection(False, "too_little_language", ())
41
+ units = self._extract_units(title, text)
42
+ if not units:
43
+ return DocumentSelection(False, "no_viable_units", ())
44
+ return DocumentSelection(True, "accepted", tuple(units))
45
+
46
+ def _extract_units(self, title: str, text: str) -> list[str]:
47
+ units: list[str] = []
48
+ if title and self._is_viable_sentence(title):
49
+ units.append(title.strip())
50
+
51
+ for raw_line in text.splitlines():
52
+ line = raw_line.strip().strip("`")
53
+ if not line:
54
+ continue
55
+ if self._looks_like_assignment(line) or self._looks_like_return(line) or self._looks_like_equation(line):
56
+ units.append(line)
57
+
58
+ prose = text.replace("\r", "\n")
59
+ for sentence in self._split_prose_units(prose):
60
+ cleaned = sentence.strip(" -:;,\t")
61
+ if self._is_viable_sentence(cleaned):
62
+ units.append(cleaned)
63
+ if len(units) >= 64:
64
+ break
65
+ return list(dict.fromkeys(units))
66
+
67
+ def _is_viable_sentence(self, sentence: str) -> bool:
68
+ if not sentence:
69
+ return False
70
+ lowered = sentence.lower()
71
+ if any(marker in lowered for marker in self._BLOCKLIST):
72
+ return False
73
+ tokens = sentence.split()
74
+ if len(tokens) < 3 or len(tokens) > 24:
75
+ return False
76
+ if sentence.count("http") or sentence.count("@") > 1:
77
+ return False
78
+ if sum(1 for token in tokens if token.isupper() and len(token) > 3) > max(3, len(tokens) // 3):
79
+ return False
80
+ return True
81
+
82
+ def _looks_like_assignment(self, line: str) -> bool:
83
+ if sum(line.count(mark) for mark in ".!?") > 0:
84
+ return False
85
+ if "=" not in line or "==" in line:
86
+ return False
87
+ left, right = (part.strip() for part in line.split("=", 1))
88
+ return self._is_identifier(left) and bool(right) and not right.startswith("=")
89
+
90
+ def _looks_like_return(self, line: str) -> bool:
91
+ if sum(line.count(mark) for mark in ".!?") > 0:
92
+ return False
93
+ tokens = line.split()
94
+ if len(tokens) < 3:
95
+ return False
96
+ start = 0
97
+ if tokens[0] in {"def", "function"}:
98
+ if len(tokens) < 4:
99
+ return False
100
+ start = 1
101
+ name = tokens[start]
102
+ verb = tokens[start + 1]
103
+ value = " ".join(tokens[start + 2 :]).strip()
104
+ return self._is_identifier(name) and verb in {"return", "returns"} and bool(value)
105
+
106
+ def _looks_like_equation(self, line: str) -> bool:
107
+ if sum(line.count(mark) for mark in ".!?") > 0:
108
+ return False
109
+ if "=" not in line or "==" in line:
110
+ return False
111
+ left, right = (part.strip() for part in line.split("=", 1))
112
+ if not left or not right:
113
+ return False
114
+ allowed = set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_+-*/() ")
115
+ if any(char not in allowed for char in left + right):
116
+ return False
117
+ return any(op in line for op in "+-*/")
118
+
119
+ @staticmethod
120
+ def _split_prose_units(prose: str) -> list[str]:
121
+ units: list[str] = []
122
+ current: list[str] = []
123
+ length = len(prose)
124
+ for index, char in enumerate(prose):
125
+ if char in "\n":
126
+ if current:
127
+ units.append("".join(current).strip())
128
+ current = []
129
+ continue
130
+ current.append(char)
131
+ if char not in ".!?":
132
+ continue
133
+ next_char = prose[index + 1] if index + 1 < length else ""
134
+ if not next_char or next_char.isspace():
135
+ units.append("".join(current).strip())
136
+ current = []
137
+ if current:
138
+ units.append("".join(current).strip())
139
+ return [unit for unit in units if unit]
140
+
141
+ @staticmethod
142
+ def _is_identifier(value: str) -> bool:
143
+ if not value:
144
+ return False
145
+ if value[0] != "_" and not value[0].isalpha():
146
+ return False
147
+ return all(char == "_" or char.isalnum() for char in value)
runtime/aethon/rfi_graph.py ADDED
@@ -0,0 +1,769 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from collections import deque
5
+ import sqlite3
6
+ from pathlib import Path
7
+ import shutil
8
+
9
+ from .rfi_ingest import Triple
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class EdgeRecord:
14
+ edge_id: int
15
+ subject: str
16
+ relation: str
17
+ object: str
18
+ source_kind: str
19
+ source_text: str
20
+ is_active: bool
21
+ supports_edge_id: int | None = None
22
+
23
+
24
+ @dataclass(frozen=True)
25
+ class ContradictionRecord:
26
+ contradiction_id: int
27
+ subject: str
28
+ relation: str
29
+ previous_object: str
30
+ new_object: str
31
+ previous_edge_id: int
32
+ new_edge_id: int
33
+ source_text: str
34
+
35
+
36
+ class RelationalGraphStore:
37
+ """SQLite-backed sparse graph for one-shot structural learning."""
38
+
39
+ INVERSE_RELATIONS = {
40
+ "located_in": "contains",
41
+ "contains": "located_in",
42
+ "chase": "chased_by",
43
+ "chased_by": "chase",
44
+ "attack": "attacked_by",
45
+ "attacked_by": "attack",
46
+ "watch": "watched_by",
47
+ "watched_by": "watch",
48
+ "lives_in": "home_of",
49
+ "home_of": "lives_in",
50
+ "like": "liked_by",
51
+ "liked_by": "like",
52
+ "prefer": "preferred_by",
53
+ "preferred_by": "prefer",
54
+ "bought_in": "purchase_site_of",
55
+ "purchase_site_of": "bought_in",
56
+ "is_a": "has_instance",
57
+ "has_instance": "is_a",
58
+ "keeps": "kept_by",
59
+ "kept_by": "keeps",
60
+ "carrying": "carried_by",
61
+ "carried_by": "carrying",
62
+ }
63
+
64
+ def __init__(self, db_path: str | Path = ":memory:") -> None:
65
+ self.db_path = str(db_path)
66
+ self.conn = sqlite3.connect(self.db_path)
67
+ self.conn.row_factory = sqlite3.Row
68
+ self.mutation_version = 0
69
+ self._configure_connection()
70
+ self._init_schema()
71
+
72
+ def _configure_connection(self) -> None:
73
+ self.conn.execute("PRAGMA journal_mode=WAL")
74
+ self.conn.execute("PRAGMA synchronous=NORMAL")
75
+ self.conn.execute("PRAGMA temp_store=MEMORY")
76
+ self.conn.execute("PRAGMA foreign_keys=OFF")
77
+ self.conn.execute("PRAGMA cache_size=-200000")
78
+
79
+ def _init_schema(self) -> None:
80
+ self.conn.executescript(
81
+ """
82
+ CREATE TABLE IF NOT EXISTS concepts (
83
+ concept_id TEXT PRIMARY KEY,
84
+ display_name TEXT NOT NULL DEFAULT ''
85
+ );
86
+
87
+ CREATE TABLE IF NOT EXISTS edges (
88
+ edge_id INTEGER PRIMARY KEY AUTOINCREMENT,
89
+ subject TEXT NOT NULL,
90
+ relation TEXT NOT NULL,
91
+ object TEXT NOT NULL,
92
+ source_kind TEXT NOT NULL,
93
+ source_text TEXT NOT NULL,
94
+ is_active INTEGER NOT NULL DEFAULT 1,
95
+ supports_edge_id INTEGER,
96
+ created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
97
+ );
98
+
99
+ CREATE INDEX IF NOT EXISTS idx_edges_subject_relation_active
100
+ ON edges(subject, relation, is_active);
101
+
102
+ CREATE INDEX IF NOT EXISTS idx_edges_object_relation_active
103
+ ON edges(object, relation, is_active);
104
+
105
+ CREATE TABLE IF NOT EXISTS contradictions (
106
+ contradiction_id INTEGER PRIMARY KEY AUTOINCREMENT,
107
+ subject TEXT NOT NULL,
108
+ relation TEXT NOT NULL,
109
+ previous_object TEXT NOT NULL,
110
+ new_object TEXT NOT NULL,
111
+ previous_edge_id INTEGER NOT NULL,
112
+ new_edge_id INTEGER NOT NULL,
113
+ source_text TEXT NOT NULL,
114
+ created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
115
+ );
116
+
117
+ CREATE TABLE IF NOT EXISTS raw_units (
118
+ unit_id INTEGER PRIMARY KEY AUTOINCREMENT,
119
+ lane TEXT NOT NULL,
120
+ source TEXT NOT NULL,
121
+ text TEXT NOT NULL,
122
+ normalized_text TEXT NOT NULL,
123
+ is_digested INTEGER NOT NULL DEFAULT 0,
124
+ created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
125
+ );
126
+
127
+ CREATE INDEX IF NOT EXISTS idx_raw_units_lane
128
+ ON raw_units(lane);
129
+ """
130
+ )
131
+ columns = {
132
+ str(row["name"])
133
+ for row in self.conn.execute("PRAGMA table_info(concepts)").fetchall()
134
+ }
135
+ if "display_name" not in columns:
136
+ self.conn.execute("ALTER TABLE concepts ADD COLUMN display_name TEXT NOT NULL DEFAULT ''")
137
+ self.conn.execute("UPDATE concepts SET display_name = concept_id WHERE display_name = ''")
138
+ raw_unit_columns = {
139
+ str(row["name"])
140
+ for row in self.conn.execute("PRAGMA table_info(raw_units)").fetchall()
141
+ }
142
+ if "is_digested" not in raw_unit_columns:
143
+ self.conn.execute("ALTER TABLE raw_units ADD COLUMN is_digested INTEGER NOT NULL DEFAULT 0")
144
+ self.conn.commit()
145
+
146
+ def ingest_triples(self, triples: list[Triple], *, commit: bool = True) -> list[int]:
147
+ edge_ids: list[int] = []
148
+ for triple in triples:
149
+ edge_ids.append(self.add_fact(triple, commit=False))
150
+ if commit and edge_ids:
151
+ self.conn.commit()
152
+ return edge_ids
153
+
154
+ def ingest_triples_fast(self, triples: list[Triple], *, commit: bool = True) -> int:
155
+ if not triples:
156
+ return 0
157
+
158
+ concepts: dict[str, str] = {}
159
+ edge_rows: list[tuple[str, str, str, str, str, int, int | None]] = []
160
+ for triple in triples:
161
+ if triple.subject not in concepts:
162
+ concepts[triple.subject] = triple.subject_surface.strip() or triple.subject
163
+ if triple.object not in concepts:
164
+ concepts[triple.object] = triple.object_surface.strip() or triple.object
165
+ edge_rows.append(
166
+ (
167
+ triple.subject,
168
+ triple.relation,
169
+ triple.object,
170
+ triple.source_kind,
171
+ triple.source_text,
172
+ 1,
173
+ None,
174
+ )
175
+ )
176
+ inverse_relation = self.INVERSE_RELATIONS.get(triple.relation)
177
+ if inverse_relation is not None:
178
+ edge_rows.append(
179
+ (
180
+ triple.object,
181
+ inverse_relation,
182
+ triple.subject,
183
+ triple.source_kind,
184
+ triple.source_text,
185
+ 1,
186
+ None,
187
+ )
188
+ )
189
+
190
+ self.conn.executemany(
191
+ """
192
+ INSERT OR IGNORE INTO concepts(concept_id, display_name)
193
+ VALUES (?, ?)
194
+ """,
195
+ tuple(concepts.items()),
196
+ )
197
+ self.conn.executemany(
198
+ """
199
+ INSERT INTO edges(subject, relation, object, source_kind, source_text, is_active, supports_edge_id)
200
+ VALUES (?, ?, ?, ?, ?, ?, ?)
201
+ """,
202
+ edge_rows,
203
+ )
204
+ self._touch()
205
+ if commit:
206
+ self.conn.commit()
207
+ return len(edge_rows)
208
+
209
+ def ingest_raw_units(
210
+ self,
211
+ units: list[tuple[str, str, str]],
212
+ *,
213
+ commit: bool = True,
214
+ ) -> int:
215
+ if not units:
216
+ return 0
217
+ rows = [
218
+ (
219
+ lane,
220
+ source,
221
+ text,
222
+ " ".join(text.lower().split()),
223
+ )
224
+ for lane, source, text in units
225
+ if text.strip()
226
+ ]
227
+ if not rows:
228
+ return 0
229
+ self.conn.executemany(
230
+ """
231
+ INSERT INTO raw_units(lane, source, text, normalized_text, is_digested)
232
+ VALUES (?, ?, ?, ?, 0)
233
+ """,
234
+ rows,
235
+ )
236
+ self._touch()
237
+ if commit:
238
+ self.conn.commit()
239
+ return len(rows)
240
+
241
+ def fetch_undigested_raw_units(
242
+ self,
243
+ *,
244
+ limit: int,
245
+ ) -> list[dict[str, object]]:
246
+ rows = self.conn.execute(
247
+ """
248
+ SELECT unit_id, lane, source, text
249
+ FROM raw_units
250
+ WHERE is_digested = 0
251
+ ORDER BY unit_id ASC
252
+ LIMIT ?
253
+ """,
254
+ (limit,),
255
+ ).fetchall()
256
+ return [
257
+ {
258
+ "unit_id": int(row["unit_id"]),
259
+ "lane": str(row["lane"]),
260
+ "source": str(row["source"]),
261
+ "text": str(row["text"]),
262
+ }
263
+ for row in rows
264
+ ]
265
+
266
+ def mark_raw_units_digested(self, unit_ids: list[int], *, commit: bool = True) -> int:
267
+ if not unit_ids:
268
+ return 0
269
+ placeholders = ",".join("?" for _ in unit_ids)
270
+ self.conn.execute(
271
+ f"UPDATE raw_units SET is_digested = 1 WHERE unit_id IN ({placeholders})",
272
+ tuple(unit_ids),
273
+ )
274
+ self._touch()
275
+ if commit:
276
+ self.conn.commit()
277
+ return len(unit_ids)
278
+
279
+ def count_undigested_raw_units(self) -> int:
280
+ row = self.conn.execute(
281
+ "SELECT COUNT(*) AS count FROM raw_units WHERE is_digested = 0"
282
+ ).fetchone()
283
+ return int(row["count"]) if row else 0
284
+
285
+ def purge_digested_raw_units(self, *, commit: bool = True) -> int:
286
+ before = self.conn.total_changes
287
+ self.conn.execute("DELETE FROM raw_units WHERE is_digested = 1")
288
+ removed = self.conn.total_changes - before
289
+ if removed:
290
+ self._touch()
291
+ if commit:
292
+ self.conn.commit()
293
+ return int(removed)
294
+
295
+ def restore_from_db(self, source_db_path: str | Path) -> None:
296
+ source_path = Path(source_db_path)
297
+ self.conn.close()
298
+ if self.db_path == ":memory:":
299
+ source = sqlite3.connect(str(source_path))
300
+ source.row_factory = sqlite3.Row
301
+ try:
302
+ self.conn = sqlite3.connect(":memory:")
303
+ self.conn.row_factory = sqlite3.Row
304
+ self._configure_connection()
305
+ source.backup(self.conn)
306
+ finally:
307
+ source.close()
308
+ return
309
+
310
+ target_path = Path(self.db_path)
311
+ target_path.parent.mkdir(parents=True, exist_ok=True)
312
+ shutil.copy2(source_path, target_path)
313
+ self.conn = sqlite3.connect(self.db_path)
314
+ self.conn.row_factory = sqlite3.Row
315
+ self._configure_connection()
316
+ self._touch()
317
+
318
+ def add_fact(self, triple: Triple, *, commit: bool = True) -> int:
319
+ self._ensure_concept(triple.subject, triple.subject_surface)
320
+ self._ensure_concept(triple.object, triple.object_surface)
321
+ prior_edge = self.get_active_edge(triple.subject, triple.relation)
322
+ self._deactivate_conflicting_edges(triple.subject, triple.relation)
323
+ cursor = self.conn.execute(
324
+ """
325
+ INSERT INTO edges(subject, relation, object, source_kind, source_text, is_active)
326
+ VALUES (?, ?, ?, ?, ?, 1)
327
+ """,
328
+ (triple.subject, triple.relation, triple.object, triple.source_kind, triple.source_text),
329
+ )
330
+ edge_id = int(cursor.lastrowid)
331
+ if prior_edge is not None and prior_edge.object != triple.object:
332
+ self.conn.execute(
333
+ """
334
+ INSERT INTO contradictions(
335
+ subject, relation, previous_object, new_object, previous_edge_id, new_edge_id, source_text
336
+ )
337
+ VALUES (?, ?, ?, ?, ?, ?, ?)
338
+ """,
339
+ (
340
+ triple.subject,
341
+ triple.relation,
342
+ prior_edge.object,
343
+ triple.object,
344
+ prior_edge.edge_id,
345
+ edge_id,
346
+ triple.source_text,
347
+ ),
348
+ )
349
+ self._upsert_inverse_edge(
350
+ subject=triple.subject,
351
+ relation=triple.relation,
352
+ object_value=triple.object,
353
+ source_kind=triple.source_kind,
354
+ source_text=triple.source_text,
355
+ supports_edge_id=edge_id,
356
+ )
357
+ self._touch()
358
+ if commit:
359
+ self.conn.commit()
360
+ return edge_id
361
+
362
+ def add_derived_fact(
363
+ self,
364
+ *,
365
+ subject: str,
366
+ relation: str,
367
+ object: str,
368
+ source_text: str,
369
+ supports_edge_id: int | None = None,
370
+ commit: bool = True,
371
+ ) -> int:
372
+ self._ensure_concept(subject)
373
+ self._ensure_concept(object)
374
+ existing = self.get_active_edge(subject, relation)
375
+ if existing is not None and existing.object == object and existing.source_kind == "derived":
376
+ return existing.edge_id
377
+ cursor = self.conn.execute(
378
+ """
379
+ INSERT INTO edges(subject, relation, object, source_kind, source_text, is_active, supports_edge_id)
380
+ VALUES (?, ?, ?, 'derived', ?, 1, ?)
381
+ """,
382
+ (subject, relation, object, source_text, supports_edge_id),
383
+ )
384
+ edge_id = int(cursor.lastrowid)
385
+ self._upsert_inverse_edge(
386
+ subject=subject,
387
+ relation=relation,
388
+ object_value=object,
389
+ source_kind="derived",
390
+ source_text=source_text,
391
+ supports_edge_id=edge_id,
392
+ )
393
+ self._touch()
394
+ if commit:
395
+ self.conn.commit()
396
+ return edge_id
397
+
398
+ def get_active_edge(self, subject: str, relation: str) -> EdgeRecord | None:
399
+ row = self.conn.execute(
400
+ """
401
+ SELECT * FROM edges
402
+ WHERE subject = ? AND relation = ? AND is_active = 1
403
+ ORDER BY edge_id DESC
404
+ LIMIT 1
405
+ """,
406
+ (subject, relation),
407
+ ).fetchone()
408
+ return self._row_to_edge(row) if row else None
409
+
410
+ def get_objects(self, subject: str, relation: str) -> list[EdgeRecord]:
411
+ rows = self.conn.execute(
412
+ """
413
+ SELECT * FROM edges
414
+ WHERE subject = ? AND relation = ? AND is_active = 1
415
+ ORDER BY edge_id DESC
416
+ """,
417
+ (subject, relation),
418
+ ).fetchall()
419
+ return [self._row_to_edge(row) for row in rows]
420
+
421
+ def get_subjects(self, relation: str, object_value: str) -> list[EdgeRecord]:
422
+ rows = self.conn.execute(
423
+ """
424
+ SELECT * FROM edges
425
+ WHERE relation = ? AND object = ? AND is_active = 1
426
+ ORDER BY edge_id DESC
427
+ """,
428
+ (relation, object_value),
429
+ ).fetchall()
430
+ return [self._row_to_edge(row) for row in rows]
431
+
432
+ def iter_active_edges(self) -> list[EdgeRecord]:
433
+ rows = self.conn.execute(
434
+ """
435
+ SELECT * FROM edges
436
+ WHERE is_active = 1
437
+ ORDER BY edge_id ASC
438
+ """
439
+ ).fetchall()
440
+ return [self._row_to_edge(row) for row in rows]
441
+
442
+ def list_concepts(self) -> list[str]:
443
+ rows = self.conn.execute(
444
+ """
445
+ SELECT concept_id FROM concepts
446
+ ORDER BY concept_id ASC
447
+ """
448
+ ).fetchall()
449
+ return [str(row["concept_id"]) for row in rows]
450
+
451
+ def iter_outgoing_edges(self, subject: str) -> list[EdgeRecord]:
452
+ rows = self.conn.execute(
453
+ """
454
+ SELECT * FROM edges
455
+ WHERE subject = ? AND is_active = 1
456
+ ORDER BY edge_id ASC
457
+ """,
458
+ (subject,),
459
+ ).fetchall()
460
+ return [self._row_to_edge(row) for row in rows]
461
+
462
+ def find_path(
463
+ self,
464
+ start: str,
465
+ goal: str,
466
+ max_hops: int = 4,
467
+ *,
468
+ include_derived: bool = True,
469
+ ) -> list[EdgeRecord] | None:
470
+ if start == goal:
471
+ return []
472
+ queue: deque[tuple[str, list[EdgeRecord]]] = deque([(start, [])])
473
+ visited = {start}
474
+ while queue:
475
+ node, path = queue.popleft()
476
+ if len(path) >= max_hops:
477
+ continue
478
+ for edge in self.iter_outgoing_edges(node):
479
+ if not include_derived and edge.source_kind == "derived":
480
+ continue
481
+ if edge.object == goal:
482
+ return path + [edge]
483
+ if edge.object not in visited:
484
+ visited.add(edge.object)
485
+ queue.append((edge.object, path + [edge]))
486
+ return None
487
+
488
+ def get_contradictions(self, subject: str | None = None, relation: str | None = None) -> list[ContradictionRecord]:
489
+ where_parts: list[str] = []
490
+ params: list[str] = []
491
+ if subject is not None:
492
+ where_parts.append("subject = ?")
493
+ params.append(subject)
494
+ if relation is not None:
495
+ where_parts.append("relation = ?")
496
+ params.append(relation)
497
+ where_sql = f"WHERE {' AND '.join(where_parts)}" if where_parts else ""
498
+ rows = self.conn.execute(
499
+ f"""
500
+ SELECT * FROM contradictions
501
+ {where_sql}
502
+ ORDER BY contradiction_id DESC
503
+ """,
504
+ params,
505
+ ).fetchall()
506
+ return [self._row_to_contradiction(row) for row in rows]
507
+
508
+ def close(self) -> None:
509
+ self.conn.close()
510
+
511
+ def commit(self) -> None:
512
+ self.conn.commit()
513
+
514
+ def merge_from_db(self, source_db_path: str | Path, *, fast: bool = False) -> dict[str, int]:
515
+ source = sqlite3.connect(str(source_db_path))
516
+ source.row_factory = sqlite3.Row
517
+ edge_id_map: dict[int, int] = {}
518
+ merged_edges = 0
519
+ merged_concepts = 0
520
+ merged_contradictions = 0
521
+ merged_raw_units = 0
522
+ try:
523
+ for row in source.execute("SELECT concept_id FROM concepts ORDER BY concept_id ASC"):
524
+ before = self.conn.total_changes
525
+ display_name = str(row["display_name"]) if "display_name" in row.keys() else ""
526
+ self._ensure_concept(str(row["concept_id"]), display_name)
527
+ if self.conn.total_changes > before:
528
+ merged_concepts += 1
529
+
530
+ for row in source.execute("SELECT * FROM edges ORDER BY edge_id ASC"):
531
+ original_edge_id = int(row["edge_id"])
532
+ if not fast:
533
+ existing_edge_id = self._find_matching_edge(
534
+ subject=str(row["subject"]),
535
+ relation=str(row["relation"]),
536
+ object_value=str(row["object"]),
537
+ source_kind=str(row["source_kind"]),
538
+ source_text=str(row["source_text"]),
539
+ is_active=bool(row["is_active"]),
540
+ )
541
+ if existing_edge_id is not None:
542
+ edge_id_map[original_edge_id] = existing_edge_id
543
+ continue
544
+
545
+ self._ensure_concept(str(row["subject"]))
546
+ self._ensure_concept(str(row["object"]))
547
+ supports_edge_id = row["supports_edge_id"]
548
+ mapped_support = edge_id_map.get(int(supports_edge_id)) if supports_edge_id is not None else None
549
+ cursor = self.conn.execute(
550
+ """
551
+ INSERT INTO edges(subject, relation, object, source_kind, source_text, is_active, supports_edge_id)
552
+ VALUES (?, ?, ?, ?, ?, ?, ?)
553
+ """,
554
+ (
555
+ str(row["subject"]),
556
+ str(row["relation"]),
557
+ str(row["object"]),
558
+ str(row["source_kind"]),
559
+ str(row["source_text"]),
560
+ int(row["is_active"]),
561
+ mapped_support,
562
+ ),
563
+ )
564
+ edge_id = int(cursor.lastrowid)
565
+ edge_id_map[original_edge_id] = edge_id
566
+ merged_edges += 1
567
+
568
+ for row in source.execute("SELECT * FROM contradictions ORDER BY contradiction_id ASC"):
569
+ previous_edge_id = edge_id_map.get(int(row["previous_edge_id"]))
570
+ new_edge_id = edge_id_map.get(int(row["new_edge_id"]))
571
+ if previous_edge_id is None or new_edge_id is None:
572
+ continue
573
+ if not fast:
574
+ if self._contradiction_exists(
575
+ subject=str(row["subject"]),
576
+ relation=str(row["relation"]),
577
+ previous_object=str(row["previous_object"]),
578
+ new_object=str(row["new_object"]),
579
+ previous_edge_id=previous_edge_id,
580
+ new_edge_id=new_edge_id,
581
+ ):
582
+ continue
583
+ self.conn.execute(
584
+ """
585
+ INSERT INTO contradictions(
586
+ subject, relation, previous_object, new_object, previous_edge_id, new_edge_id, source_text
587
+ )
588
+ VALUES (?, ?, ?, ?, ?, ?, ?)
589
+ """,
590
+ (
591
+ str(row["subject"]),
592
+ str(row["relation"]),
593
+ str(row["previous_object"]),
594
+ str(row["new_object"]),
595
+ previous_edge_id,
596
+ new_edge_id,
597
+ str(row["source_text"]),
598
+ ),
599
+ )
600
+ merged_contradictions += 1
601
+ for row in source.execute("SELECT * FROM raw_units ORDER BY unit_id ASC"):
602
+ self.conn.execute(
603
+ """
604
+ INSERT INTO raw_units(lane, source, text, normalized_text, is_digested)
605
+ VALUES (?, ?, ?, ?, ?)
606
+ """,
607
+ (
608
+ str(row["lane"]),
609
+ str(row["source"]),
610
+ str(row["text"]),
611
+ str(row["normalized_text"]),
612
+ int(row["is_digested"]) if "is_digested" in row.keys() else 0,
613
+ ),
614
+ )
615
+ merged_raw_units += 1
616
+ self.conn.commit()
617
+ if merged_concepts or merged_edges or merged_contradictions or merged_raw_units:
618
+ self._touch()
619
+ finally:
620
+ source.close()
621
+ return {
622
+ "concepts": merged_concepts,
623
+ "edges": merged_edges,
624
+ "contradictions": merged_contradictions,
625
+ "raw_units": merged_raw_units,
626
+ }
627
+
628
+ def _ensure_concept(self, concept_id: str, display_name: str = "") -> None:
629
+ cleaned_display = display_name.strip()
630
+ self.conn.execute(
631
+ "INSERT OR IGNORE INTO concepts(concept_id, display_name) VALUES (?, ?)",
632
+ (concept_id, cleaned_display or concept_id),
633
+ )
634
+ if cleaned_display:
635
+ current = self.conn.execute(
636
+ "SELECT display_name FROM concepts WHERE concept_id = ? LIMIT 1",
637
+ (concept_id,),
638
+ ).fetchone()
639
+ current_name = str(current["display_name"]).strip() if current else ""
640
+ if self._prefer_display_name(cleaned_display, current_name):
641
+ self.conn.execute(
642
+ "UPDATE concepts SET display_name = ? WHERE concept_id = ?",
643
+ (cleaned_display, concept_id),
644
+ )
645
+
646
+ def _touch(self) -> None:
647
+ self.mutation_version += 1
648
+
649
+ def _deactivate_conflicting_edges(self, subject: str, relation: str) -> None:
650
+ if relation in {"is_a"}:
651
+ return
652
+ self.conn.execute(
653
+ """
654
+ UPDATE edges
655
+ SET is_active = 0
656
+ WHERE subject = ? AND relation = ? AND is_active = 1
657
+ """,
658
+ (subject, relation),
659
+ )
660
+
661
+ def _upsert_inverse_edge(
662
+ self,
663
+ *,
664
+ subject: str,
665
+ relation: str,
666
+ object_value: str,
667
+ source_kind: str,
668
+ source_text: str,
669
+ supports_edge_id: int | None,
670
+ ) -> None:
671
+ inverse_relation = self.INVERSE_RELATIONS.get(relation)
672
+ if inverse_relation is None:
673
+ return
674
+ self._ensure_concept(object_value)
675
+ self._ensure_concept(subject)
676
+ if inverse_relation not in {"has_instance", "is_a"}:
677
+ self._deactivate_conflicting_edges(object_value, inverse_relation)
678
+ self.conn.execute(
679
+ """
680
+ INSERT INTO edges(subject, relation, object, source_kind, source_text, is_active, supports_edge_id)
681
+ VALUES (?, ?, ?, ?, ?, 1, ?)
682
+ """,
683
+ (object_value, inverse_relation, subject, source_kind, source_text, supports_edge_id),
684
+ )
685
+
686
+ def _find_matching_edge(
687
+ self,
688
+ *,
689
+ subject: str,
690
+ relation: str,
691
+ object_value: str,
692
+ source_kind: str,
693
+ source_text: str,
694
+ is_active: bool,
695
+ ) -> int | None:
696
+ row = self.conn.execute(
697
+ """
698
+ SELECT edge_id FROM edges
699
+ WHERE subject = ? AND relation = ? AND object = ? AND source_kind = ? AND source_text = ? AND is_active = ?
700
+ ORDER BY edge_id DESC
701
+ LIMIT 1
702
+ """,
703
+ (subject, relation, object_value, source_kind, source_text, int(is_active)),
704
+ ).fetchone()
705
+ return int(row["edge_id"]) if row else None
706
+
707
+ def _contradiction_exists(
708
+ self,
709
+ *,
710
+ subject: str,
711
+ relation: str,
712
+ previous_object: str,
713
+ new_object: str,
714
+ previous_edge_id: int,
715
+ new_edge_id: int,
716
+ ) -> bool:
717
+ row = self.conn.execute(
718
+ """
719
+ SELECT contradiction_id FROM contradictions
720
+ WHERE subject = ? AND relation = ? AND previous_object = ? AND new_object = ?
721
+ AND previous_edge_id = ? AND new_edge_id = ?
722
+ LIMIT 1
723
+ """,
724
+ (subject, relation, previous_object, new_object, previous_edge_id, new_edge_id),
725
+ ).fetchone()
726
+ return row is not None
727
+
728
+ def get_display_name(self, concept_id: str) -> str:
729
+ row = self.conn.execute(
730
+ "SELECT display_name FROM concepts WHERE concept_id = ? LIMIT 1",
731
+ (concept_id,),
732
+ ).fetchone()
733
+ if row and str(row["display_name"]).strip():
734
+ return str(row["display_name"]).strip()
735
+ return concept_id.replace("_", " ")
736
+
737
+ @staticmethod
738
+ def _prefer_display_name(candidate: str, current: str) -> bool:
739
+ if not current:
740
+ return True
741
+ candidate_score = (sum(1 for char in candidate if char.isupper()), len(candidate))
742
+ current_score = (sum(1 for char in current if char.isupper()), len(current))
743
+ return candidate_score > current_score
744
+
745
+ @staticmethod
746
+ def _row_to_edge(row: sqlite3.Row) -> EdgeRecord:
747
+ return EdgeRecord(
748
+ edge_id=int(row["edge_id"]),
749
+ subject=str(row["subject"]),
750
+ relation=str(row["relation"]),
751
+ object=str(row["object"]),
752
+ source_kind=str(row["source_kind"]),
753
+ source_text=str(row["source_text"]),
754
+ is_active=bool(row["is_active"]),
755
+ supports_edge_id=int(row["supports_edge_id"]) if row["supports_edge_id"] is not None else None,
756
+ )
757
+
758
+ @staticmethod
759
+ def _row_to_contradiction(row: sqlite3.Row) -> ContradictionRecord:
760
+ return ContradictionRecord(
761
+ contradiction_id=int(row["contradiction_id"]),
762
+ subject=str(row["subject"]),
763
+ relation=str(row["relation"]),
764
+ previous_object=str(row["previous_object"]),
765
+ new_object=str(row["new_object"]),
766
+ previous_edge_id=int(row["previous_edge_id"]),
767
+ new_edge_id=int(row["new_edge_id"]),
768
+ source_text=str(row["source_text"]),
769
+ )
runtime/aethon/rfi_ingest.py ADDED
@@ -0,0 +1,445 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+ from .rfi_ontology import ConceptOntology
6
+
7
+
8
+ @dataclass(frozen=True)
9
+ class Triple:
10
+ subject: str
11
+ relation: str
12
+ object: str
13
+ source_text: str
14
+ source_kind: str = "direct_assertion"
15
+ subject_surface: str = ""
16
+ object_surface: str = ""
17
+
18
+
19
+ class DeterministicTripleExtractor:
20
+ """Controlled no-weight ingestion for declarative fact sentences."""
21
+
22
+ _GENERIC_BLOCKLIST = {
23
+ "is",
24
+ "are",
25
+ "was",
26
+ "were",
27
+ "be",
28
+ "been",
29
+ "being",
30
+ "do",
31
+ "does",
32
+ "did",
33
+ "have",
34
+ "has",
35
+ "had",
36
+ "say",
37
+ "says",
38
+ "said",
39
+ }
40
+
41
+ def __init__(self, ontology: ConceptOntology | None = None) -> None:
42
+ self.ontology = ontology or ConceptOntology()
43
+ self._patterns: tuple[tuple[str, str, bool], ...] = (
44
+ (" is located in ", "located_in", False),
45
+ (" is in ", "located_in", False),
46
+ (" lives in ", "lives_in", False),
47
+ (" stays in ", "lives_in", False),
48
+ (" resides in ", "lives_in", False),
49
+ (" works in ", "work_in", False),
50
+ (" studies ", "study", False),
51
+ (" now likes ", "like", False),
52
+ (" likes ", "like", False),
53
+ (" now prefers ", "prefer", False),
54
+ (" prefers ", "prefer", False),
55
+ (" chases ", "chase", False),
56
+ (" attacks ", "attack", False),
57
+ (" hunts ", "hunt", False),
58
+ (" uses ", "use", False),
59
+ (" calls ", "call", False),
60
+ (" imports ", "import", False),
61
+ (" depends on ", "depend_on", False),
62
+ (" is used by ", "use", True),
63
+ (" inherits from ", "is_a", False),
64
+ (" extends ", "is_a", False),
65
+ (" solves ", "solve", False),
66
+ (" bought ", "bought", False),
67
+ (" carries ", "carrying", False),
68
+ (" is carrying ", "carrying", False),
69
+ (" visited ", "visited", False),
70
+ (" reached ", "reached", False),
71
+ (" saw ", "saw", False),
72
+ (" returns ", "return", False),
73
+ (" equals ", "equals", False),
74
+ (" is a ", "is_a", False),
75
+ (" is an ", "is_a", False),
76
+ )
77
+
78
+ def extract(self, text: str) -> list[Triple]:
79
+ triples: list[Triple] = []
80
+ for sentence in self._split_sentences(text):
81
+ try:
82
+ triples.extend(self._extract_sentence(sentence))
83
+ except ValueError:
84
+ continue
85
+ return triples
86
+
87
+ def extract_ultra(self, text: str) -> list[Triple]:
88
+ triples: list[Triple] = []
89
+ for sentence in self._split_sentences(text):
90
+ normalized_sentence = self._cleanup(sentence)
91
+ if not normalized_sentence:
92
+ continue
93
+ try:
94
+ triples.extend(self._extract_sentence_ultra(sentence, normalized_sentence))
95
+ except ValueError:
96
+ continue
97
+ return triples
98
+
99
+ def _extract_sentence(self, sentence: str) -> list[Triple]:
100
+ normalized_sentence = self._cleanup(sentence)
101
+ if not normalized_sentence:
102
+ return []
103
+
104
+ triples: list[Triple] = []
105
+
106
+ keep_parts = self._split_multi(normalized_sentence, (" keeps ", " in "))
107
+ if keep_parts is not None:
108
+ subject, obj, loc = keep_parts
109
+ obj = self._strip_leading_articles(obj)
110
+ triples.append(self._make_triple(subject, "keeps", obj, sentence))
111
+ triples.append(self._make_triple(subject, "located_in", loc, sentence))
112
+ return triples
113
+
114
+ watch_parts = self._split_once(normalized_sentence, " was asked to watch ")
115
+ if watch_parts is not None:
116
+ subject, obj = watch_parts
117
+ triples.append(self._make_triple(subject, "watch", self._strip_leading_articles(obj), sentence))
118
+ return triples
119
+
120
+ negative_preference_parts = self._split_once(normalized_sentence, " does not like ")
121
+ if negative_preference_parts is not None and normalized_sentence.endswith(" anymore"):
122
+ subject, obj = negative_preference_parts
123
+ obj = obj[: -len(" anymore")].strip()
124
+ triples.append(
125
+ self._make_triple(
126
+ subject,
127
+ "not_like_anymore",
128
+ obj,
129
+ sentence,
130
+ )
131
+ )
132
+ return triples
133
+
134
+ carry_parts = self._split_multi(normalized_sentence, (" reached ", " carrying "))
135
+ if carry_parts is not None:
136
+ subject, loc, obj = carry_parts
137
+ triples.append(self._make_triple(subject, "reached", loc, sentence))
138
+ triples.append(self._make_triple(subject, "carrying", self._strip_leading_articles(obj), sentence))
139
+ return triples
140
+
141
+ buy_location_parts = self._split_multi(normalized_sentence, (" bought ", " in "))
142
+ if buy_location_parts is None:
143
+ buy_location_parts = self._split_multi(normalized_sentence, (" bought ", " at "))
144
+ if buy_location_parts is not None:
145
+ subject, obj, loc = buy_location_parts
146
+ triples.append(self._make_triple(subject, "bought", obj, sentence))
147
+ triples.append(self._make_triple(subject, "bought_in", loc, sentence))
148
+ return triples
149
+
150
+ later_buy_parts = self._split_multi(normalized_sentence, (" later bought ", " before leaving "))
151
+ if later_buy_parts is not None:
152
+ subject, obj, loc = later_buy_parts
153
+ triples.append(self._make_triple(subject, "bought", obj, sentence))
154
+ triples.append(self._make_triple(subject, "bought_in", loc, sentence))
155
+ return triples
156
+
157
+ for phrase, relation, reverse in self._iter_patterns():
158
+ parts = self._split_once(normalized_sentence, phrase)
159
+ if parts is None:
160
+ continue
161
+ left, right = parts
162
+ subject = right if reverse else left
163
+ obj = left if reverse else right
164
+ if subject and obj:
165
+ triples.append(self._make_triple(subject, relation, obj, sentence))
166
+ return triples
167
+
168
+ generic_locative = self._parse_generic_locative(normalized_sentence)
169
+ if generic_locative is not None and self._is_simple_clause(normalized_sentence):
170
+ subject, verb, obj = generic_locative
171
+ relation = self.ontology.normalize_relation(f"{verb}_in")
172
+ if relation not in self._GENERIC_BLOCKLIST:
173
+ triples.append(self._make_triple(subject, relation, obj, sentence))
174
+ return triples
175
+
176
+ generic_transitive = self._parse_generic_transitive(normalized_sentence)
177
+ if generic_transitive is not None and self._is_simple_clause(normalized_sentence):
178
+ subject, verb, object_value = generic_transitive
179
+ if verb not in self._GENERIC_BLOCKLIST and subject and object_value:
180
+ relation = self.ontology.normalize_relation(verb)
181
+ triples.append(self._make_triple(subject, relation, object_value, sentence))
182
+ return triples
183
+
184
+ assignment = self._parse_assignment(normalized_sentence)
185
+ if assignment is not None:
186
+ left, right = assignment
187
+ triples.append(self._make_triple(left, "equals", right, sentence))
188
+ return triples
189
+
190
+ return_match = self._parse_return_statement(normalized_sentence)
191
+ if return_match is not None:
192
+ name, value = return_match
193
+ triples.append(self._make_triple(name, "return", value, sentence))
194
+ return triples
195
+
196
+ passive_by_match = self._parse_passive_by(normalized_sentence)
197
+ if passive_by_match is not None and self._is_simple_clause(normalized_sentence):
198
+ obj, verb, subject = passive_by_match
199
+ relation = self.ontology.normalize_relation(verb)
200
+ if relation not in self._GENERIC_BLOCKLIST:
201
+ triples.append(self._make_triple(subject, relation, obj, sentence))
202
+ return triples
203
+
204
+ return triples
205
+
206
+ def _extract_sentence_ultra(self, sentence: str, normalized_sentence: str) -> list[Triple]:
207
+ triples: list[Triple] = []
208
+
209
+ for phrase, relation, reverse in self._iter_patterns():
210
+ parts = self._split_once(normalized_sentence, phrase)
211
+ if parts is None:
212
+ continue
213
+ left, right = parts
214
+ subject = right if reverse else left
215
+ obj = left if reverse else right
216
+ if subject and obj:
217
+ triples.append(self._make_triple(subject, relation, obj, sentence))
218
+ return triples
219
+
220
+ assignment = self._parse_assignment(normalized_sentence)
221
+ if assignment is not None:
222
+ left, right = assignment
223
+ triples.append(self._make_triple(left, "equals", right, sentence))
224
+ return triples
225
+
226
+ return_match = self._parse_return_statement(normalized_sentence)
227
+ if return_match is not None:
228
+ name, value = return_match
229
+ triples.append(self._make_triple(name, "return", value, sentence))
230
+ return triples
231
+
232
+ if self._is_simple_clause(normalized_sentence):
233
+ generic_locative = self._parse_generic_locative(normalized_sentence)
234
+ if generic_locative is not None:
235
+ subject, verb, obj = generic_locative
236
+ relation = self.ontology.normalize_relation(f"{verb}_in")
237
+ if relation not in self._GENERIC_BLOCKLIST:
238
+ triples.append(self._make_triple(subject, relation, obj, sentence))
239
+ return triples
240
+
241
+ generic_transitive = self._parse_generic_transitive(normalized_sentence)
242
+ if generic_transitive is not None:
243
+ subject, verb, object_value = generic_transitive
244
+ if verb not in self._GENERIC_BLOCKLIST and subject and object_value:
245
+ relation = self.ontology.normalize_relation(verb)
246
+ triples.append(self._make_triple(subject, relation, object_value, sentence))
247
+ return triples
248
+
249
+ return triples
250
+
251
+ def _iter_patterns(self) -> tuple[tuple[str, str, bool], ...]:
252
+ learned: list[tuple[str, str, bool]] = []
253
+ for phrase, meaning in self.ontology.semantic_lexicon.phrase_alias_map.items():
254
+ if "_" not in meaning:
255
+ continue
256
+ learned.append((f" {phrase} ", self.ontology.normalize_relation(meaning), False))
257
+ return tuple(dict.fromkeys((*self._patterns, *learned)))
258
+
259
+ def _make_triple(self, subject: str, relation: str, obj: str, source_text: str) -> Triple:
260
+ subject_surface = self._recover_surface(subject, source_text)
261
+ object_surface = self._recover_surface(obj, source_text)
262
+ return Triple(
263
+ subject=self.ontology.resolve(subject).concept_id,
264
+ relation=self.ontology.normalize_relation(relation),
265
+ object=self.ontology.resolve(obj).concept_id,
266
+ source_text=source_text.strip(),
267
+ subject_surface=subject_surface,
268
+ object_surface=object_surface,
269
+ )
270
+
271
+ @staticmethod
272
+ def _recover_surface(fragment: str, source_text: str) -> str:
273
+ cleaned = fragment.strip()
274
+ if not cleaned:
275
+ return cleaned
276
+ source_lower = source_text.lower()
277
+ fragment_lower = cleaned.lower()
278
+ index = source_lower.find(fragment_lower)
279
+ if index >= 0:
280
+ return source_text[index : index + len(cleaned)].strip()
281
+ return cleaned
282
+
283
+ @staticmethod
284
+ def _split_sentences(text: str) -> list[str]:
285
+ clean = text.replace("\r", "\n")
286
+ parts: list[str] = []
287
+ current: list[str] = []
288
+ for char in clean:
289
+ if char in "\n.!?":
290
+ chunk = "".join(current).strip(" -:;,\t")
291
+ if chunk:
292
+ parts.append(chunk)
293
+ current = []
294
+ continue
295
+ current.append(char)
296
+ chunk = "".join(current).strip(" -:;,\t")
297
+ if chunk:
298
+ parts.append(chunk)
299
+ return parts
300
+
301
+ @staticmethod
302
+ def _cleanup(sentence: str) -> str:
303
+ sentence = sentence.strip()
304
+ lowered = sentence.lower()
305
+ prefixes = (
306
+ "record:",
307
+ "latest record:",
308
+ "correction:",
309
+ "update:",
310
+ "first note:",
311
+ "distractor:",
312
+ "delay note:",
313
+ "target record:",
314
+ "briefing:",
315
+ )
316
+ for prefix in prefixes:
317
+ if lowered.startswith(prefix):
318
+ sentence = sentence[len(prefix) :].strip()
319
+ break
320
+ return " ".join(sentence.split()).lower()
321
+
322
+ @staticmethod
323
+ def _is_simple_clause(sentence: str) -> bool:
324
+ tokens = sentence.split()
325
+ if len(tokens) < 3 or len(tokens) > 12:
326
+ return False
327
+ if any(marker in sentence for marker in (",", ";", " that ", " which ", " because ", " while ", " although ")):
328
+ return False
329
+ return True
330
+
331
+ @staticmethod
332
+ def _split_once(text: str, phrase: str) -> tuple[str, str] | None:
333
+ if phrase not in text:
334
+ return None
335
+ left, right = text.split(phrase, 1)
336
+ left = left.strip()
337
+ right = right.strip()
338
+ if not left or not right:
339
+ return None
340
+ return left, right
341
+
342
+ @classmethod
343
+ def _split_multi(cls, text: str, phrases: tuple[str, ...]) -> tuple[str, ...] | None:
344
+ parts: list[str] = []
345
+ remainder = text
346
+ for phrase in phrases:
347
+ split = cls._split_once(remainder, phrase)
348
+ if split is None:
349
+ return None
350
+ left, remainder = split
351
+ parts.append(left)
352
+ remainder = remainder.strip()
353
+ if not remainder:
354
+ return None
355
+ parts.append(remainder)
356
+ return tuple(parts)
357
+
358
+ @staticmethod
359
+ def _strip_leading_articles(text: str) -> str:
360
+ for article in ("the ", "a ", "an "):
361
+ if text.startswith(article):
362
+ return text[len(article) :].strip()
363
+ return text.strip()
364
+
365
+ def _parse_generic_locative(self, sentence: str) -> tuple[str, str, str] | None:
366
+ tokens = sentence.split()
367
+ prepositions = {"in", "at", "inside", "within"}
368
+ for index, token in enumerate(tokens):
369
+ if token not in prepositions or index < 2 or index == len(tokens) - 1:
370
+ continue
371
+ verb = tokens[index - 1]
372
+ if not verb.isalpha():
373
+ continue
374
+ subject = " ".join(tokens[: index - 1]).strip()
375
+ obj = " ".join(tokens[index + 1 :]).strip()
376
+ if subject and obj:
377
+ return subject, verb, obj
378
+ return None
379
+
380
+ def _parse_generic_transitive(self, sentence: str) -> tuple[str, str, str] | None:
381
+ tokens = sentence.split()
382
+ if len(tokens) < 3:
383
+ return None
384
+ for index, token in enumerate(tokens):
385
+ if not token.isalpha():
386
+ continue
387
+ subject = " ".join(tokens[:index]).strip()
388
+ obj_tokens = tokens[index + 1 :]
389
+ if not subject or not obj_tokens:
390
+ continue
391
+ object_value = " ".join(obj_tokens).strip()
392
+ object_value = self._strip_leading_articles(object_value)
393
+ if object_value:
394
+ return subject, token, object_value
395
+ return None
396
+
397
+ @staticmethod
398
+ def _is_identifier(value: str) -> bool:
399
+ if not value:
400
+ return False
401
+ if value[0] != "_" and not value[0].isalpha():
402
+ return False
403
+ return all(char == "_" or char.isalnum() for char in value)
404
+
405
+ def _parse_assignment(self, sentence: str) -> tuple[str, str] | None:
406
+ if "=" not in sentence or "==" in sentence:
407
+ return None
408
+ left, right = (part.strip() for part in sentence.split("=", 1))
409
+ if not self._is_identifier(left) or not right or right.startswith("="):
410
+ return None
411
+ return left, right
412
+
413
+ def _parse_return_statement(self, sentence: str) -> tuple[str, str] | None:
414
+ tokens = sentence.split()
415
+ if len(tokens) < 3:
416
+ return None
417
+ start = 0
418
+ if tokens[0] in {"def", "function"}:
419
+ if len(tokens) < 4:
420
+ return None
421
+ start = 1
422
+ name = tokens[start]
423
+ verb = tokens[start + 1]
424
+ value = " ".join(tokens[start + 2 :]).strip()
425
+ if not self._is_identifier(name) or verb not in {"return", "returns"} or not value:
426
+ return None
427
+ return name, value
428
+
429
+ def _parse_passive_by(self, sentence: str) -> tuple[str, str, str] | None:
430
+ tokens = sentence.split()
431
+ if len(tokens) < 5:
432
+ return None
433
+ if " is " not in f" {sentence} " or " by " not in f" {sentence} ":
434
+ return None
435
+ left_right = self._split_once(sentence, " is ")
436
+ if left_right is None:
437
+ return None
438
+ obj, tail = left_right
439
+ verb_subject = self._split_once(tail, " by ")
440
+ if verb_subject is None:
441
+ return None
442
+ verb, subject = verb_subject
443
+ if not verb.isalpha() or not subject:
444
+ return None
445
+ return obj, verb, subject
runtime/aethon/rfi_interpreter.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+ from .rfi_lexicon import AethonNativeConceptCodec
6
+ from .rfi_math import ExactMathReasoner
7
+ from .rfi_query_forms import NativeQueryFormSet
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class ParsedQuery:
12
+ intent: str
13
+ subject: str | None = None
14
+ relation: str | None = None
15
+ object_value: str | None = None
16
+ expression: str | None = None
17
+ attribute: str | None = None
18
+ raw: str = ""
19
+
20
+
21
+ class NativeQueryInterpreter:
22
+ """Interprets queries through Aethon's native lexical codec and token structure."""
23
+
24
+ def __init__(
25
+ self,
26
+ codec: AethonNativeConceptCodec,
27
+ query_form_path: str | None = None,
28
+ query_form_payload: list[dict[str, object]] | None = None,
29
+ ) -> None:
30
+ self.codec = codec
31
+ self.math = ExactMathReasoner()
32
+ self.query_forms = NativeQueryFormSet(query_form_path, query_form_payload)
33
+
34
+ def parse(self, query: str) -> ParsedQuery | None:
35
+ math_expression = self.math.extract_expression(query)
36
+ if math_expression is not None:
37
+ return ParsedQuery(intent="math_eval", expression=math_expression, raw=query)
38
+
39
+ raw_words = self._clean_query_words(self.codec.ontology.normalize(query).split())
40
+ if not raw_words:
41
+ return None
42
+
43
+ form_parsed = self._parse_from_forms(raw_words, raw=query)
44
+ if form_parsed is not None:
45
+ return form_parsed
46
+
47
+ return None
48
+
49
+ def _clean_query_words(self, words: list[str]) -> list[str]:
50
+ cleaned = list(words)
51
+ filler_words = self.codec.ontology.semantic_lexicon.query_fillers
52
+ while cleaned and cleaned[-1] in filler_words:
53
+ cleaned.pop()
54
+ return cleaned
55
+
56
+ def _parse_from_forms(self, words: list[str], *, raw: str) -> ParsedQuery | None:
57
+ for form in self.query_forms.forms:
58
+ prefix = list(form.prefix)
59
+ if len(words) < len(prefix) or words[: len(prefix)] != prefix:
60
+ continue
61
+ if form.requires and not all(token in words for token in form.requires):
62
+ continue
63
+ subject = self._resolve_form_subject(words, form)
64
+ object_value = self._resolve_form_object(words, form)
65
+ relation = self._resolve_form_relation(words, form)
66
+ if form.intent == "used_emoji":
67
+ return ParsedQuery(intent=form.intent, raw=raw)
68
+ if form.intent in {"what_changed", "has_contradiction", "where_entity", "classify", "plan_first", "plan_next", "plan_previous"} and subject:
69
+ return ParsedQuery(intent=form.intent, subject=subject, raw=raw)
70
+ if form.intent == "story_query" and subject:
71
+ return ParsedQuery(intent=form.intent, subject=subject, object_value=object_value, raw=raw)
72
+ if form.intent == "relation_path" and subject and object_value:
73
+ return ParsedQuery(intent=form.intent, subject=subject, object_value=object_value, raw=raw)
74
+ if form.intent == "keep_location_lookup" and subject and object_value:
75
+ return ParsedQuery(intent=form.intent, subject=subject, object_value=object_value, raw=raw)
76
+ if form.intent == "relation_object" and subject and relation:
77
+ return ParsedQuery(intent=form.intent, subject=subject, relation=relation, raw=raw)
78
+ if form.intent == "relation_subject" and object_value and relation:
79
+ return ParsedQuery(intent=form.intent, relation=relation, object_value=object_value, raw=raw)
80
+ return None
81
+
82
+ def _resolve_form_subject(self, words: list[str], form) -> str | None:
83
+ subject_words: list[str] = []
84
+ if form.subject_mode == "tail":
85
+ subject_words = words[len(form.prefix) :]
86
+ elif form.subject_mode == "tail_without_last":
87
+ subject_words = words[len(form.prefix) : -1]
88
+ elif form.subject_mode == "single_after_prefix":
89
+ if len(words) > len(form.prefix):
90
+ subject_words = [words[len(form.prefix)]]
91
+ elif form.subject_mode == "between_indexes":
92
+ if form.subject_end_anchor and form.subject_end_anchor in words:
93
+ end_index = words.index(form.subject_end_anchor)
94
+ subject_words = words[form.subject_start : end_index]
95
+ elif form.subject_mode == "between_anchors":
96
+ if form.subject_end_anchor and form.subject_end_anchor in words:
97
+ end_index = words.index(form.subject_end_anchor)
98
+ subject_words = words[form.subject_start : end_index]
99
+ if not subject_words:
100
+ return None
101
+ return self.codec.ontology.resolve(" ".join(subject_words)).concept_id
102
+
103
+ def _resolve_form_object(self, words: list[str], form) -> str | None:
104
+ object_words: list[str] = []
105
+ if form.object_mode == "after_anchor":
106
+ if form.object_start_anchor and form.object_start_anchor in words:
107
+ anchor_index = words.index(form.object_start_anchor)
108
+ start_index = anchor_index + 1
109
+ if start_index < len(words) and words[start_index] in self._relation_prepositions():
110
+ start_index += 1
111
+ object_words = words[start_index:]
112
+ elif form.object_mode == "tail_after_prefix":
113
+ object_words = words[len(form.prefix) :]
114
+ elif form.object_mode == "from_relation_words":
115
+ start_index = len(form.prefix) + 1
116
+ if start_index < len(words) and words[start_index] in self._relation_prepositions():
117
+ start_index += 1
118
+ object_words = words[start_index:]
119
+ if not object_words:
120
+ return None
121
+ return self.codec.ontology.resolve(" ".join(object_words)).concept_id
122
+
123
+ def _relation_prepositions(self) -> set[str]:
124
+ return self.codec.ontology.semantic_lexicon.relation_prepositions
125
+
126
+ def _resolve_form_relation(self, words: list[str], form) -> str | None:
127
+ if form.relation_mode == "fixed":
128
+ return form.relation or None
129
+ if form.relation_mode == "from_words":
130
+ start_index = len(form.prefix)
131
+ if form.subject_mode == "single_after_prefix":
132
+ start_index += 1
133
+ return self._relation_from_words(words, start_index=start_index)
134
+ return None
135
+
136
+ def _relation_from_words(self, words: list[str], *, start_index: int) -> str | None:
137
+ if start_index >= len(words):
138
+ return None
139
+ relation = words[start_index]
140
+ if relation in {"is", "are"}:
141
+ return None
142
+ if start_index + 1 < len(words) and words[start_index + 1] in self._relation_prepositions():
143
+ relation = f"{relation}_{words[start_index + 1]}"
144
+ return self.codec.ontology.normalize_relation(relation)
runtime/aethon/rfi_lexicon.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ from dataclasses import dataclass
5
+ import json
6
+
7
+ from .rfi_ontology import ConceptOntology
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class LexicalUnit:
12
+ surface: str
13
+ normalized: str
14
+ role: str
15
+ features: tuple[str, ...] = ()
16
+
17
+
18
+ class AethonNativeConceptCodec:
19
+ """Aethon's graph-native tokenizer/codec for post-weight intelligence."""
20
+
21
+ NAME = "Aethon Native Concept Codec"
22
+ SHORT_NAME = "ANCC"
23
+
24
+ _RELATION_PHRASES = (
25
+ "is located in",
26
+ "does not like anymore",
27
+ "was asked to watch",
28
+ "later bought",
29
+ "bought in",
30
+ "bought at",
31
+ "lives in",
32
+ "stays in",
33
+ "located in",
34
+ "keeps",
35
+ "prefers",
36
+ "likes",
37
+ "chases",
38
+ "attacks",
39
+ "hunts",
40
+ "bought",
41
+ "carries",
42
+ "carrying",
43
+ "visited",
44
+ "reached",
45
+ "saw",
46
+ "returns",
47
+ "equals",
48
+ "is a",
49
+ "is an",
50
+ "is in",
51
+ )
52
+
53
+ _QUERY_WORDS = {"what", "where", "who", "which", "how", "is", "does", "did", "there", "about", "after"}
54
+ _STRUCTURE_WORDS = {"the", "a", "an", "in", "at", "to", "of", "now", "later", "before", "and"}
55
+ _SUFFIX_MAP = {
56
+ "ing": "progressive",
57
+ "ed": "past",
58
+ "er": "agentive",
59
+ "or": "agentive",
60
+ "ous": "property",
61
+ "ly": "adverbial",
62
+ "tion": "abstract_noun",
63
+ "s": "plural_or_third_person",
64
+ }
65
+
66
+ def __init__(self, ontology: ConceptOntology | None = None) -> None:
67
+ self.ontology = ontology or ConceptOntology()
68
+
69
+ def encode(self, text: str) -> list[LexicalUnit]:
70
+ normalized = self.ontology.normalize(text)
71
+ if not normalized:
72
+ return []
73
+ words = normalized.split()
74
+ relation_match = self._extract_relation_chunk(words)
75
+ if relation_match is None:
76
+ return self._encode_segment(normalized, allow_empty=False)
77
+ start, end, phrase = relation_match
78
+ units: list[LexicalUnit] = []
79
+ before = " ".join(words[:start])
80
+ after = " ".join(words[end:])
81
+ units.extend(self._encode_segment(before, allow_empty=True))
82
+ relation = self.ontology.normalize_relation(phrase)
83
+ units.append(LexicalUnit(surface=phrase, normalized=relation, role="relation", features=("typed_edge",)))
84
+ if after:
85
+ units.extend(self.encode(after))
86
+ return units
87
+
88
+ def concept_signature(self, text: str) -> tuple[str, ...]:
89
+ normalized = self.ontology.resolve(text).concept_id
90
+ parts = normalized.split("_")
91
+ features: list[str] = [f"concept:{normalized}"]
92
+ if len(parts) > 1:
93
+ for part in parts:
94
+ features.append(f"compound_part:{part}")
95
+ for suffix, tag in self._SUFFIX_MAP.items():
96
+ if suffix == "s" and normalized.endswith(("os", "is", "us", "ss")):
97
+ continue
98
+ if normalized.endswith(suffix) and len(normalized) > len(suffix) + 1:
99
+ root = normalized[: -len(suffix)]
100
+ features.append(f"root:{root}")
101
+ features.append(f"suffix:{tag}")
102
+ break
103
+ for parent in self.ontology.lift(normalized):
104
+ features.append(f"parent:{parent}")
105
+ return tuple(dict.fromkeys(features))
106
+
107
+ def export_tokens(self, text: str) -> list[dict[str, object]]:
108
+ return [
109
+ {
110
+ "surface": unit.surface,
111
+ "normalized": unit.normalized,
112
+ "role": unit.role,
113
+ "features": list(unit.features),
114
+ }
115
+ for unit in self.encode(text)
116
+ ]
117
+
118
+ def _encode_segment(self, text: str, *, allow_empty: bool) -> list[LexicalUnit]:
119
+ normalized = self.ontology.normalize(text)
120
+ if not normalized:
121
+ return [] if allow_empty else []
122
+ units: list[LexicalUnit] = []
123
+ for token in normalized.split():
124
+ if token in self._STRUCTURE_WORDS:
125
+ units.append(LexicalUnit(surface=token, normalized=token, role="structure", features=("grammar",)))
126
+ continue
127
+ if token in self._QUERY_WORDS:
128
+ units.append(LexicalUnit(surface=token, normalized=token, role="query", features=("control",)))
129
+ continue
130
+ if token.isdigit():
131
+ units.append(LexicalUnit(surface=token, normalized=token, role="number", features=("scalar",)))
132
+ continue
133
+ concept = self.ontology.resolve(token)
134
+ units.append(
135
+ LexicalUnit(
136
+ surface=token,
137
+ normalized=concept.concept_id,
138
+ role="concept",
139
+ features=self.concept_signature(token),
140
+ )
141
+ )
142
+ return units
143
+
144
+ def _extract_relation_chunk(self, words: list[str]) -> tuple[int, int, str] | None:
145
+ best: tuple[int, int, str] | None = None
146
+ for phrase in self._relation_phrases():
147
+ phrase_words = phrase.split()
148
+ phrase_len = len(phrase_words)
149
+ if phrase_len == 0 or phrase_len > len(words):
150
+ continue
151
+ for start in range(0, len(words) - phrase_len + 1):
152
+ if words[start : start + phrase_len] == phrase_words:
153
+ candidate = (start, start + phrase_len, phrase)
154
+ if best is None or candidate[0] < best[0] or (candidate[0] == best[0] and phrase_len > (best[1] - best[0])):
155
+ best = candidate
156
+ break
157
+ return best
158
+
159
+ def _relation_phrases(self) -> tuple[str, ...]:
160
+ learned: list[str] = []
161
+ for phrase, meaning in self.ontology.semantic_lexicon.phrase_alias_map.items():
162
+ if "_" not in meaning:
163
+ continue
164
+ learned.append(phrase)
165
+ learned.append(meaning)
166
+ return tuple(dict.fromkeys((*self._RELATION_PHRASES, *learned)))
167
+
168
+
169
+ def parse_args() -> argparse.Namespace:
170
+ parser = argparse.ArgumentParser(description="Inspect Aethon's native no-weight lexical codec.")
171
+ parser.add_argument("--text", type=str, required=True)
172
+ return parser.parse_args()
173
+
174
+
175
+ def main() -> None:
176
+ args = parse_args()
177
+ codec = AethonNativeConceptCodec()
178
+ print(json.dumps(codec.export_tokens(args.text), indent=2))
179
+
180
+
181
+ if __name__ == "__main__":
182
+ main()
runtime/aethon/rfi_math.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import ast
4
+ from dataclasses import dataclass
5
+ from fractions import Fraction
6
+
7
+
8
+ @dataclass(frozen=True)
9
+ class MathResult:
10
+ expression: str
11
+ value: str
12
+ proof: tuple[str, ...]
13
+ reasoning: tuple[str, ...]
14
+
15
+
16
+ class ExactMathReasoner:
17
+ """Exact arithmetic evaluator for the native no-weight Aethon core."""
18
+
19
+ _NUMBER_WORDS = {
20
+ "zero": 0,
21
+ "one": 1,
22
+ "two": 2,
23
+ "three": 3,
24
+ "four": 4,
25
+ "five": 5,
26
+ "six": 6,
27
+ "seven": 7,
28
+ "eight": 8,
29
+ "nine": 9,
30
+ "ten": 10,
31
+ "eleven": 11,
32
+ "twelve": 12,
33
+ "thirteen": 13,
34
+ "fourteen": 14,
35
+ "fifteen": 15,
36
+ "sixteen": 16,
37
+ "seventeen": 17,
38
+ "eighteen": 18,
39
+ "nineteen": 19,
40
+ "twenty": 20,
41
+ "thirty": 30,
42
+ "forty": 40,
43
+ "fifty": 50,
44
+ "sixty": 60,
45
+ "seventy": 70,
46
+ "eighty": 80,
47
+ "ninety": 90,
48
+ "hundred": 100,
49
+ }
50
+ _NUMBER_CONNECTORS = {"and"}
51
+ _OPERATOR_PHRASES = (
52
+ ("multiplied by", "*"),
53
+ ("divided by", "/"),
54
+ ("plus", "+"),
55
+ ("minus", "-"),
56
+ ("times", "*"),
57
+ ("over", "/"),
58
+ ("modulo", "%"),
59
+ ("mod", "%"),
60
+ )
61
+ _TRAILING_CHATTER = (
62
+ "please",
63
+ "thanks",
64
+ "thank you",
65
+ "laughing",
66
+ "happy",
67
+ "thinking",
68
+ "by the way",
69
+ )
70
+
71
+ _ALLOWED_BINARY = {
72
+ ast.Add: ("add", lambda left, right: left + right),
73
+ ast.Sub: ("subtract", lambda left, right: left - right),
74
+ ast.Mult: ("multiply", lambda left, right: left * right),
75
+ ast.Div: ("divide", lambda left, right: left / right),
76
+ ast.FloorDiv: ("floor divide", lambda left, right: left // right),
77
+ ast.Mod: ("modulo", lambda left, right: left % right),
78
+ ast.Pow: ("power", lambda left, right: left**right),
79
+ }
80
+ _ALLOWED_UNARY = {
81
+ ast.UAdd: ("keep", lambda value: value),
82
+ ast.USub: ("negate", lambda value: -value),
83
+ }
84
+
85
+ def can_evaluate(self, text: str) -> bool:
86
+ expression = self.extract_expression(text)
87
+ return expression is not None
88
+
89
+ def extract_expression(self, text: str) -> str | None:
90
+ normalized = text.strip()
91
+ lowered = normalized.lower().strip(" ?")
92
+ prefixes = ("what is ", "solve ", "compute ", "evaluate ")
93
+ expression = lowered
94
+ for prefix in prefixes:
95
+ if lowered.startswith(prefix):
96
+ expression = lowered[len(prefix) :]
97
+ break
98
+ expression = expression.strip()
99
+ changed = True
100
+ while changed:
101
+ changed = False
102
+ for suffix in self._TRAILING_CHATTER:
103
+ if expression.endswith(f" {suffix}"):
104
+ expression = expression[: -len(suffix)].strip()
105
+ changed = True
106
+ if not expression:
107
+ return None
108
+ if self._is_symbolic_expression(expression):
109
+ return expression if any(symbol in expression for symbol in "+-*/%") else None
110
+ textual = self._textual_to_expression(expression)
111
+ if textual is None or not any(symbol in textual for symbol in "+-*/%"):
112
+ return None
113
+ return textual
114
+
115
+ def evaluate(self, text: str) -> MathResult | None:
116
+ expression = self.extract_expression(text)
117
+ if expression is None:
118
+ return None
119
+ tree = ast.parse(expression, mode="eval")
120
+ steps: list[str] = []
121
+ value = self._eval_node(tree.body, steps)
122
+ rendered = self._render_value(value)
123
+ proof = (f"math:{expression}={rendered}",)
124
+ reasoning = tuple(f"Step {index}: {step}" for index, step in enumerate(steps, start=1))
125
+ if not reasoning:
126
+ reasoning = (f"Step 1: evaluate {expression} = {rendered}.",)
127
+ return MathResult(expression=expression, value=rendered, proof=proof, reasoning=reasoning)
128
+
129
+ def _eval_node(self, node: ast.AST, steps: list[str]) -> Fraction:
130
+ if isinstance(node, ast.Constant) and isinstance(node.value, (int, float)):
131
+ return Fraction(str(node.value))
132
+ if isinstance(node, ast.Num):
133
+ return Fraction(str(node.n))
134
+ if isinstance(node, ast.BinOp):
135
+ operator = type(node.op)
136
+ if operator not in self._ALLOWED_BINARY:
137
+ raise ValueError(f"Unsupported math operator: {operator.__name__}")
138
+ label, operation = self._ALLOWED_BINARY[operator]
139
+ left = self._eval_node(node.left, steps)
140
+ right = self._eval_node(node.right, steps)
141
+ result = operation(left, right)
142
+ steps.append(
143
+ f"{label} {self._render_value(left)} and {self._render_value(right)} to get {self._render_value(result)}."
144
+ )
145
+ return result
146
+ if isinstance(node, ast.UnaryOp):
147
+ operator = type(node.op)
148
+ if operator not in self._ALLOWED_UNARY:
149
+ raise ValueError(f"Unsupported unary operator: {operator.__name__}")
150
+ label, operation = self._ALLOWED_UNARY[operator]
151
+ value = self._eval_node(node.operand, steps)
152
+ result = operation(value)
153
+ steps.append(f"{label} {self._render_value(value)} to get {self._render_value(result)}.")
154
+ return result
155
+ raise ValueError(f"Unsupported math syntax: {type(node).__name__}")
156
+
157
+ def _textual_to_expression(self, expression: str) -> str | None:
158
+ normalized = f" {expression} "
159
+ for phrase, symbol in self._OPERATOR_PHRASES:
160
+ normalized = normalized.replace(f" {phrase} ", f" {symbol} ")
161
+ tokens = normalized.split()
162
+ converted: list[str] = []
163
+ index = 0
164
+ while index < len(tokens):
165
+ token = tokens[index]
166
+ if token in "+-*/%()":
167
+ converted.append(token)
168
+ index += 1
169
+ continue
170
+ if token in self._NUMBER_WORDS or token in self._NUMBER_CONNECTORS:
171
+ number_tokens: list[str] = []
172
+ while index < len(tokens) and (tokens[index] in self._NUMBER_WORDS or tokens[index] in self._NUMBER_CONNECTORS):
173
+ number_tokens.append(tokens[index])
174
+ index += 1
175
+ value = self._parse_number_words(number_tokens)
176
+ if value is None:
177
+ return None
178
+ converted.append(str(value))
179
+ continue
180
+ if self._is_number_token(token):
181
+ converted.append(token)
182
+ index += 1
183
+ continue
184
+ return None
185
+ return " ".join(converted) if converted else None
186
+
187
+ def _parse_number_words(self, tokens: list[str]) -> int | None:
188
+ cleaned = [token for token in tokens if token not in self._NUMBER_CONNECTORS]
189
+ if not cleaned:
190
+ return None
191
+ total = 0
192
+ current = 0
193
+ for token in cleaned:
194
+ value = self._NUMBER_WORDS.get(token)
195
+ if value is None:
196
+ return None
197
+ if token == "hundred":
198
+ current = max(current, 1) * 100
199
+ else:
200
+ current += value
201
+ total += current
202
+ return total
203
+
204
+ @staticmethod
205
+ def _render_value(value: Fraction) -> str:
206
+ if value.denominator == 1:
207
+ return str(value.numerator)
208
+ return str(float(value))
209
+
210
+ @staticmethod
211
+ def _is_symbolic_expression(expression: str) -> bool:
212
+ allowed = set("0123456789.+-*/%() ")
213
+ return all(char in allowed for char in expression)
214
+
215
+ @staticmethod
216
+ def _is_number_token(token: str) -> bool:
217
+ if not token:
218
+ return False
219
+ dot_seen = False
220
+ for char in token:
221
+ if char == ".":
222
+ if dot_seen:
223
+ return False
224
+ dot_seen = True
225
+ continue
226
+ if not char.isdigit():
227
+ return False
228
+ return any(char.isdigit() for char in token)
runtime/aethon/rfi_metrics.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ from dataclasses import asdict, dataclass
5
+ import json
6
+ from pathlib import Path
7
+ import sqlite3
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class StructuralCapacityReport:
12
+ structural_capacity: int
13
+ concept_count: int
14
+ explicit_edge_count: int
15
+ abstraction_count: int
16
+ revision_count: int
17
+ raw_unit_count: int
18
+
19
+ def to_metadata(self) -> dict[str, int]:
20
+ return {
21
+ "sc": self.structural_capacity,
22
+ "concept_count": self.concept_count,
23
+ "explicit_edge_count": self.explicit_edge_count,
24
+ "abstraction_count": self.abstraction_count,
25
+ "revision_count": self.revision_count,
26
+ "raw_unit_count": self.raw_unit_count,
27
+ }
28
+
29
+
30
+ class StructuralCapacityMeter:
31
+ """Measures Aethon's live structural size instead of frozen parameter count."""
32
+
33
+ @staticmethod
34
+ def from_sqlite(db_path: str | Path) -> StructuralCapacityReport:
35
+ conn = sqlite3.connect(str(db_path))
36
+ try:
37
+ concept_count = StructuralCapacityMeter._count(conn, "SELECT COUNT(*) FROM concepts")
38
+ explicit_edge_count = StructuralCapacityMeter._count(
39
+ conn,
40
+ "SELECT COUNT(*) FROM edges WHERE is_active = 1 AND source_kind != 'derived'",
41
+ )
42
+ abstraction_count = StructuralCapacityMeter._count(
43
+ conn,
44
+ "SELECT COUNT(*) FROM edges WHERE is_active = 1 AND source_kind = 'derived'",
45
+ )
46
+ revision_count = StructuralCapacityMeter._count(conn, "SELECT COUNT(*) FROM contradictions")
47
+ raw_unit_count = StructuralCapacityMeter._count(conn, "SELECT COUNT(*) FROM raw_units")
48
+ finally:
49
+ conn.close()
50
+ return StructuralCapacityReport(
51
+ structural_capacity=concept_count + explicit_edge_count + abstraction_count + revision_count + raw_unit_count,
52
+ concept_count=concept_count,
53
+ explicit_edge_count=explicit_edge_count,
54
+ abstraction_count=abstraction_count,
55
+ revision_count=revision_count,
56
+ raw_unit_count=raw_unit_count,
57
+ )
58
+
59
+ @staticmethod
60
+ def _count(conn: sqlite3.Connection, sql: str) -> int:
61
+ row = conn.execute(sql).fetchone()
62
+ return int(row[0]) if row else 0
63
+
64
+
65
+ def parse_args() -> argparse.Namespace:
66
+ parser = argparse.ArgumentParser(description="Report Aethon Structural Capacity from a native graph store.")
67
+ parser.add_argument("--graph", type=str, required=True)
68
+ return parser.parse_args()
69
+
70
+
71
+ def main() -> None:
72
+ args = parse_args()
73
+ report = StructuralCapacityMeter.from_sqlite(args.graph)
74
+ print(json.dumps(asdict(report), indent=2))
75
+
76
+
77
+ if __name__ == "__main__":
78
+ main()
runtime/aethon/rfi_ontology.py ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import Iterable
6
+
7
+ from .rfi_semantics import NativeSemanticLexicon
8
+
9
+
10
+ def _safe_wordnet():
11
+ try:
12
+ from nltk.corpus import wordnet as wn # type: ignore
13
+
14
+ wn.ensure_loaded()
15
+ return wn
16
+ except Exception:
17
+ return None
18
+
19
+
20
+ @dataclass(frozen=True)
21
+ class ConceptRecord:
22
+ concept_id: str
23
+ lemma: str
24
+ parents: tuple[str, ...]
25
+ aliases: tuple[str, ...] = ()
26
+
27
+
28
+ class ConceptOntology:
29
+ """Ontology-backed concept resolver for the post-weight Aethon core."""
30
+
31
+ _FALLBACK_PARENT_MAP = {
32
+ "bobcat": ("feline", "predator"),
33
+ "cat": ("feline", "animal"),
34
+ "dog": ("canine", "animal"),
35
+ "wolf": ("canine", "predator"),
36
+ "fox": ("canine", "predator"),
37
+ "lion": ("feline", "predator"),
38
+ "zebra": ("prey", "animal"),
39
+ "rabbit": ("prey", "animal"),
40
+ "mouse": ("prey", "animal"),
41
+ "hen": ("prey", "animal"),
42
+ "lagos": ("city", "location"),
43
+ "accra": ("city", "location"),
44
+ "nigeria": ("country", "location"),
45
+ "ghana": ("country", "location"),
46
+ "professor": ("person",),
47
+ "developer": ("person",),
48
+ "teacher": ("person",),
49
+ "student": ("person",),
50
+ "predator": ("animal",),
51
+ "prey": ("animal",),
52
+ "feline": ("animal",),
53
+ "canine": ("animal",),
54
+ }
55
+
56
+ _ALIASES = {
57
+ "likes": "like",
58
+ "liked": "like",
59
+ "does_not_like_anymore": "not_like_anymore",
60
+ "prefers": "prefer",
61
+ "preferred": "prefer",
62
+ "chases": "chase",
63
+ "chased": "chase",
64
+ "attacks": "attack",
65
+ "attacked": "attack",
66
+ "hunts": "hunt",
67
+ "works_in": "work_in",
68
+ "studies": "study",
69
+ "uses": "use",
70
+ "calls": "call",
71
+ "imports": "import",
72
+ "depends_on": "depend_on",
73
+ "solves": "solve",
74
+ "bought": "bought",
75
+ "bought_in": "bought_in",
76
+ "lives_in": "lives_in",
77
+ "visited": "visited",
78
+ "reached": "reached",
79
+ "saw": "saw",
80
+ "carries": "carrying",
81
+ "returns": "return",
82
+ "equals": "equals",
83
+ "located": "locate",
84
+ "located_in": "located_in",
85
+ "is_in": "located_in",
86
+ "is_located_in": "located_in",
87
+ "is": "be",
88
+ "are": "be",
89
+ }
90
+
91
+ def __init__(
92
+ self,
93
+ semantic_lexicon_path: str | Path | None = None,
94
+ semantic_lexicon_payload: list[dict[str, str]] | None = None,
95
+ ) -> None:
96
+ self._wn = _safe_wordnet()
97
+ self.semantic_lexicon = NativeSemanticLexicon(semantic_lexicon_path, semantic_lexicon_payload)
98
+
99
+ def expand_semantics(self, text: str) -> str:
100
+ expanded = text
101
+ for emoji, meaning in self.semantic_lexicon.emoji_map.items():
102
+ expanded = expanded.replace(emoji, f" {meaning} ")
103
+ return expanded
104
+
105
+ def extract_emojis(self, text: str) -> list[tuple[str, str]]:
106
+ return self.semantic_lexicon.describe_emojis(text)
107
+
108
+ def normalize(self, text: str) -> str:
109
+ text = self.expand_semantics(text)
110
+ lowered = text.strip().lower()
111
+ chars: list[str] = []
112
+ previous_space = False
113
+ for char in lowered:
114
+ allowed = char.isalnum() or char in {"_", "-", " "}
115
+ next_char = char if allowed else " "
116
+ if next_char == " ":
117
+ if previous_space:
118
+ continue
119
+ previous_space = True
120
+ chars.append(" ")
121
+ continue
122
+ previous_space = False
123
+ chars.append(next_char)
124
+ normalized = "".join(chars).strip()
125
+ normalized = self._apply_aliases(normalized)
126
+ return normalized
127
+
128
+ def normalize_relation(self, relation: str) -> str:
129
+ key = self.normalize(relation).replace(" ", "_")
130
+ aliased = self._ALIASES.get(key)
131
+ if aliased is not None:
132
+ return aliased
133
+ if "_" in key:
134
+ head, tail = key.split("_", 1)
135
+ return f"{self._canonicalize_relation_head(head)}_{tail}"
136
+ return self._canonicalize_relation_head(key)
137
+
138
+ @staticmethod
139
+ def _canonicalize_relation_head(head: str) -> str:
140
+ if head.endswith("ies") and len(head) > 3:
141
+ return head[:-3] + "y"
142
+ if head.endswith("s") and len(head) > 3 and not head.endswith(("ss", "us", "is")):
143
+ return head[:-1]
144
+ return head
145
+
146
+ def resolve(self, text: str) -> ConceptRecord:
147
+ lemma = self.normalize(text)
148
+ lemma = self._drop_leading_article(lemma)
149
+ if not lemma:
150
+ raise ValueError("Cannot resolve an empty concept.")
151
+
152
+ parents = list(dict.fromkeys(self._parents_from_wordnet(lemma) + list(self._FALLBACK_PARENT_MAP.get(lemma, ()))))
153
+ return ConceptRecord(
154
+ concept_id=lemma.replace(" ", "_"),
155
+ lemma=lemma,
156
+ parents=tuple(parents),
157
+ aliases=(lemma,),
158
+ )
159
+
160
+ def lift(self, text: str) -> tuple[str, ...]:
161
+ return self.resolve(text).parents
162
+
163
+ def nearest_shared_parent(self, left: str, right: str) -> str | None:
164
+ left_parents = set(self.lift(left))
165
+ for candidate in self.lift(right):
166
+ if candidate in left_parents:
167
+ return candidate
168
+ return None
169
+
170
+ def _parents_from_wordnet(self, lemma: str) -> list[str]:
171
+ if self._wn is None:
172
+ return []
173
+ synsets = self._wn.synsets(lemma)
174
+ parents: list[str] = []
175
+ for synset in synsets[:3]:
176
+ for hypernym in synset.hypernyms()[:3]:
177
+ name = hypernym.lemmas()[0].name().replace("_", " ").lower()
178
+ if name != lemma:
179
+ parents.append(self.normalize(name).replace(" ", "_"))
180
+ return list(dict.fromkeys(parents))
181
+
182
+ def expand_with_parents(self, concepts: Iterable[str]) -> set[str]:
183
+ expanded: set[str] = set()
184
+ for concept in concepts:
185
+ normalized = self.normalize(concept)
186
+ normalized = self._drop_leading_article(normalized)
187
+ if not normalized:
188
+ continue
189
+ expanded.add(normalized.replace(" ", "_"))
190
+ expanded.update(self.lift(normalized))
191
+ return expanded
192
+
193
+ @staticmethod
194
+ def _drop_leading_article(text: str) -> str:
195
+ words = text.strip().split()
196
+ if words and words[0] in {"the", "a", "an"}:
197
+ return " ".join(words[1:]).strip()
198
+ return text.strip()
199
+
200
+ @staticmethod
201
+ def _replace_word_ci(text: str, target: str, replacement: str) -> str:
202
+ words = text.split()
203
+ replaced: list[str] = []
204
+ target_lower = target.lower()
205
+ for word in words:
206
+ if word.lower() == target_lower:
207
+ replaced.append(replacement)
208
+ else:
209
+ replaced.append(word)
210
+ return " ".join(replaced)
211
+
212
+ def _apply_aliases(self, text: str) -> str:
213
+ words = text.split()
214
+ if not words:
215
+ return text
216
+
217
+ lower_words = [word.lower().strip(" ?!.,;:") for word in words]
218
+ replaced_words: list[str] = []
219
+ index = 0
220
+ phrase_items = sorted(
221
+ self.semantic_lexicon.phrase_alias_map.items(),
222
+ key=lambda item: len(item[0].split()),
223
+ reverse=True,
224
+ )
225
+ while index < len(words):
226
+ matched = False
227
+ for phrase, meaning in phrase_items:
228
+ phrase_words = phrase.split()
229
+ end_index = index + len(phrase_words)
230
+ if end_index > len(words):
231
+ continue
232
+ if lower_words[index:end_index] == phrase_words:
233
+ replaced_words.extend(meaning.split())
234
+ index = end_index
235
+ matched = True
236
+ break
237
+ if matched:
238
+ continue
239
+ token = lower_words[index]
240
+ replaced_words.extend(self.semantic_lexicon.alias_map.get(token, token).split())
241
+ index += 1
242
+ return " ".join(replaced_words)
runtime/aethon/rfi_query.py ADDED
@@ -0,0 +1,1079 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ import difflib
5
+ from collections import deque
6
+
7
+ from .rfi_abstraction import AbstractionEngine
8
+ from .rfi_graph import EdgeRecord, RelationalGraphStore
9
+ from .rfi_interpreter import NativeQueryInterpreter, ParsedQuery
10
+ from .rfi_lexicon import AethonNativeConceptCodec
11
+ from .rfi_math import ExactMathReasoner
12
+ from .rfi_ontology import ConceptOntology
13
+ from .rfi_query_forms import NativeQueryFormSet
14
+ from .rfi_reasoner import StructuralReasoner
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class QueryResult:
19
+ answer: str
20
+ proof: tuple[str, ...]
21
+ confidence: float
22
+ mode: str
23
+ reasoning: tuple[str, ...] = ()
24
+
25
+
26
+ class ProofQueryEngine:
27
+ """Proof-backed query engine over the explicit Aethon relation graph."""
28
+
29
+ _SALIENT_CLASSES = {
30
+ "predator",
31
+ "prey",
32
+ "person",
33
+ "city",
34
+ "country",
35
+ "teacher",
36
+ "student",
37
+ "developer",
38
+ "professor",
39
+ }
40
+
41
+ _PROTECTED_QUERY_TOKENS = {
42
+ "a",
43
+ "about",
44
+ "after",
45
+ "and",
46
+ "animal",
47
+ "are",
48
+ "asked",
49
+ "at",
50
+ "bought",
51
+ "buy",
52
+ "carrying",
53
+ "changed",
54
+ "chases",
55
+ "color",
56
+ "come",
57
+ "comes",
58
+ "contradiction",
59
+ "continue",
60
+ "did",
61
+ "does",
62
+ "emoji",
63
+ "first",
64
+ "happen",
65
+ "happens",
66
+ "how",
67
+ "i",
68
+ "in",
69
+ "is",
70
+ "keep",
71
+ "like",
72
+ "live",
73
+ "now",
74
+ "next",
75
+ "object",
76
+ "plan",
77
+ "prefer",
78
+ "related",
79
+ "return",
80
+ "schedule",
81
+ "should",
82
+ "story",
83
+ "there",
84
+ "tell",
85
+ "to",
86
+ "use",
87
+ "watch",
88
+ "what",
89
+ "where",
90
+ "which",
91
+ "who",
92
+ "your",
93
+ "you",
94
+ }
95
+
96
+ def __init__(
97
+ self,
98
+ graph: RelationalGraphStore,
99
+ ontology: ConceptOntology | None = None,
100
+ abstraction: AbstractionEngine | None = None,
101
+ reasoner: StructuralReasoner | None = None,
102
+ query_forms: NativeQueryFormSet | None = None,
103
+ ) -> None:
104
+ self.graph = graph
105
+ self.ontology = ontology or ConceptOntology()
106
+ self.abstraction = abstraction or AbstractionEngine(graph, self.ontology)
107
+ self.reasoner = reasoner or StructuralReasoner(graph)
108
+ self.interpreter = NativeQueryInterpreter(
109
+ AethonNativeConceptCodec(self.ontology),
110
+ query_form_payload=query_forms.to_payload() if query_forms is not None else None,
111
+ )
112
+ self.math = ExactMathReasoner()
113
+ self._reasoned_version = self.graph.mutation_version
114
+ self._query_vocabulary_cache: list[str] | None = None
115
+ self._query_vocabulary_version = self.graph.mutation_version
116
+
117
+ def answer(self, query: str) -> QueryResult | None:
118
+ original_query = query
119
+ query = self.ontology.expand_semantics(query)
120
+ query, corrections = self._soft_correct_query(query)
121
+ query = self._normalize_self_reference(query)
122
+ query, later_corrections = self._soft_correct_query(query)
123
+ corrections.extend(later_corrections)
124
+ surface_normalized = " ".join(query.lower().replace("?", " ").replace("!", " ").replace(".", " ").split())
125
+ metadata = self._answer_metadata_value(surface_normalized)
126
+ if metadata is not None:
127
+ return self._with_query_awareness(metadata, original_query, corrections)
128
+ normalized = self.ontology.normalize(query)
129
+ if not normalized:
130
+ return None
131
+ if self._reasoned_version != self.graph.mutation_version:
132
+ self._reasoned_version = self.graph.mutation_version
133
+ metadata = self._answer_metadata_value(normalized)
134
+ if metadata is not None:
135
+ return self._with_query_awareness(metadata, original_query, corrections)
136
+ keep_lookup = self._answer_keep_location_from_query(normalized)
137
+ if keep_lookup is not None:
138
+ return self._with_query_awareness(keep_lookup, original_query, corrections)
139
+ parsed = self.interpreter.parse(query)
140
+ if parsed is not None:
141
+ interpreted = self._answer_interpreted(parsed)
142
+ if interpreted is not None:
143
+ return self._with_query_awareness(interpreted, original_query, corrections)
144
+ return None
145
+
146
+ def _answer_used_emoji(self, raw_query: str) -> QueryResult | None:
147
+ emojis = self.ontology.extract_emojis(raw_query)
148
+ if not emojis:
149
+ return QueryResult(
150
+ answer="none",
151
+ proof=("emoji:none",),
152
+ confidence=1.0,
153
+ mode="direct",
154
+ reasoning=("I do not find any emoji in your message.",),
155
+ )
156
+ parts = [f"{emoji} ({meaning})" for emoji, meaning in emojis]
157
+ return QueryResult(
158
+ answer=", ".join(parts),
159
+ proof=tuple(f"emoji:{emoji}->{meaning}" for emoji, meaning in emojis),
160
+ confidence=1.0,
161
+ mode="direct",
162
+ reasoning=tuple(f"I find {emoji} and interpret it as {meaning}." for emoji, meaning in emojis),
163
+ )
164
+
165
+ def _answer_metadata_value(self, query: str) -> QueryResult | None:
166
+ subject = None
167
+ if "tokenizer" in query and ("aethon" in query or "your" in query):
168
+ subject = "aethon_tokenizer"
169
+ elif "size unit" in query and ("aethon" in query or "your" in query):
170
+ subject = "aethon_size_unit"
171
+ if subject is not None:
172
+ equals = self._best_edge(subject, "equals")
173
+ if equals is None:
174
+ return None
175
+ return QueryResult(
176
+ answer=self.graph.get_display_name(equals.object).replace("_", " ").lower(),
177
+ proof=(self._edge_to_proof(equals),),
178
+ confidence=1.0,
179
+ mode="direct",
180
+ reasoning=self._reasoning_from_proof((self._edge_to_proof(equals),)),
181
+ )
182
+ return None
183
+
184
+ def _answer_interpreted(self, parsed: ParsedQuery) -> QueryResult | None:
185
+ if parsed.intent == "used_emoji":
186
+ return self._answer_used_emoji(parsed.raw)
187
+ if parsed.intent == "what_changed" and parsed.subject:
188
+ return self._answer_what_changed_subject(parsed.subject)
189
+ if parsed.intent == "has_contradiction" and parsed.subject:
190
+ return self._answer_has_contradiction_subject(parsed.subject)
191
+ if parsed.intent == "math_eval" and parsed.expression:
192
+ result = self.math.evaluate(parsed.expression)
193
+ if result is None:
194
+ return None
195
+ return QueryResult(
196
+ answer=result.value,
197
+ proof=result.proof,
198
+ confidence=1.0,
199
+ mode="derived",
200
+ reasoning=result.reasoning,
201
+ )
202
+ if parsed.intent == "plan_first" and parsed.subject:
203
+ return self._answer_plan_first_subject(parsed.subject)
204
+ if parsed.intent == "plan_next" and parsed.subject:
205
+ return self._answer_plan_next_anchor(parsed.subject)
206
+ if parsed.intent == "plan_previous" and parsed.subject:
207
+ return self._answer_plan_previous_target(parsed.subject)
208
+ if parsed.intent == "story_query" and parsed.subject:
209
+ return self._answer_story_subject(parsed.subject, anchor=parsed.object_value or "")
210
+ if parsed.intent == "where_entity" and parsed.subject:
211
+ location = self._direct_or_abstract(parsed.subject, "located_in")
212
+ if location is not None:
213
+ return location
214
+ carried = self._infer_carried_object_location(parsed.subject)
215
+ if carried is not None:
216
+ return carried
217
+ return None
218
+ if parsed.intent == "classify" and parsed.subject:
219
+ equals = self._best_edge(parsed.subject, "equals")
220
+ if equals is not None:
221
+ return QueryResult(
222
+ answer=equals.object,
223
+ proof=(self._edge_to_proof(equals),),
224
+ confidence=1.0,
225
+ mode="derived" if equals.source_kind == "derived" else "direct",
226
+ reasoning=self._reasoning_from_proof((self._edge_to_proof(equals),)),
227
+ )
228
+ direct = self.graph.get_active_edge(parsed.subject, "is_a")
229
+ if direct is not None:
230
+ return QueryResult(
231
+ answer=direct.object,
232
+ proof=(self._edge_to_proof(direct),),
233
+ confidence=1.0,
234
+ mode="direct",
235
+ reasoning=self._reasoning_from_proof((self._edge_to_proof(direct),)),
236
+ )
237
+ parents = self.ontology.lift(parsed.subject)
238
+ if parents:
239
+ chosen_parent = self._select_ontology_parent(tuple(parents))
240
+ return QueryResult(
241
+ answer=chosen_parent,
242
+ proof=(f"ontology:{parsed.subject}->is_a->{chosen_parent}",),
243
+ confidence=0.7,
244
+ mode="ontology",
245
+ reasoning=self._reasoning_from_proof((f"ontology:{parsed.subject}->is_a->{chosen_parent}",)),
246
+ )
247
+ if parsed.intent == "relation_object" and parsed.subject and parsed.relation:
248
+ if parsed.raw.lower().startswith("where does ") and " keep " in parsed.raw.lower():
249
+ keep_object = self._object_after_phrase(parsed.raw, " keep ")
250
+ if keep_object is not None:
251
+ keep_location = self._answer_keep_location_lookup(parsed.subject, keep_object)
252
+ if keep_location is not None:
253
+ return keep_location
254
+ if parsed.relation == "return":
255
+ value = self._direct_or_abstract(parsed.subject, "return_value")
256
+ if value is not None:
257
+ return value
258
+ return self._direct_or_abstract(parsed.subject, parsed.relation)
259
+ if parsed.intent == "keep_location_lookup" and parsed.subject and parsed.object_value:
260
+ return self._answer_keep_location_lookup(parsed.subject, parsed.object_value)
261
+ if parsed.intent == "relation_subject" and parsed.object_value and parsed.relation:
262
+ direct_subjects = self.graph.get_subjects(parsed.relation, parsed.object_value)
263
+ if direct_subjects:
264
+ edge = direct_subjects[0]
265
+ return QueryResult(
266
+ answer=edge.subject,
267
+ proof=(self._edge_to_proof(edge),),
268
+ confidence=1.0,
269
+ mode="direct",
270
+ reasoning=self._reasoning_from_proof((self._edge_to_proof(edge),)),
271
+ )
272
+ hierarchical_subject = self._subject_from_object_hierarchy(parsed.relation, parsed.object_value)
273
+ if hierarchical_subject is not None:
274
+ return hierarchical_subject
275
+ if parsed.intent == "relation_path" and parsed.subject and parsed.object_value:
276
+ resolved_path = self._resolve_relation_path(parsed.subject, parsed.object_value)
277
+ if resolved_path is not None:
278
+ answer, proof, confidence = resolved_path
279
+ return QueryResult(answer=answer, proof=proof, confidence=confidence, mode="path", reasoning=self._reasoning_from_proof(proof))
280
+ return None
281
+
282
+ def _answer_how_related_pair(self, left: str, right: str) -> QueryResult | None:
283
+ resolved_path = self._resolve_relation_path(left, right)
284
+ if resolved_path is not None:
285
+ answer, proof, confidence = resolved_path
286
+ return QueryResult(answer=answer, proof=proof, confidence=confidence, mode="path", reasoning=self._reasoning_from_proof(proof))
287
+ left_parents = self._concept_hierarchy(left)
288
+ right_parents = self._concept_hierarchy(right)
289
+ shared = next((parent for parent in left_parents if parent in set(right_parents)), None)
290
+ if shared is not None:
291
+ return QueryResult(
292
+ answer=shared,
293
+ proof=(f"ontology:{left}->is_a->{shared}", f"ontology:{right}->is_a->{shared}"),
294
+ confidence=0.7,
295
+ mode="path",
296
+ reasoning=self._reasoning_from_proof((f"ontology:{left}->is_a->{shared}", f"ontology:{right}->is_a->{shared}")),
297
+ )
298
+ return None
299
+
300
+ def _answer_what_changed_subject(self, subject: str) -> QueryResult | None:
301
+ contradictions = self.graph.get_contradictions(subject=subject)
302
+ if not contradictions:
303
+ return None
304
+ latest = contradictions[0]
305
+ return QueryResult(
306
+ answer=f"{latest.relation}:{latest.previous_object}->{latest.new_object}",
307
+ proof=(
308
+ f"revision:{latest.subject}-[{latest.relation}]->{latest.previous_object}",
309
+ f"revision:{latest.subject}-[{latest.relation}]->{latest.new_object}",
310
+ ),
311
+ confidence=1.0,
312
+ mode="revision",
313
+ reasoning=self._reasoning_from_proof((
314
+ f"revision:{latest.subject}-[{latest.relation}]->{latest.previous_object}",
315
+ f"revision:{latest.subject}-[{latest.relation}]->{latest.new_object}",
316
+ )),
317
+ )
318
+
319
+ def _answer_has_contradiction_subject(self, subject: str) -> QueryResult | None:
320
+ contradictions = self.graph.get_contradictions(subject=subject)
321
+ if not contradictions:
322
+ return QueryResult(
323
+ answer="no",
324
+ proof=(f"no_contradiction:{subject}",),
325
+ confidence=1.0,
326
+ mode="direct",
327
+ reasoning=(f"I do not find any stored contradiction for {self.graph.get_display_name(subject)}.",),
328
+ )
329
+ latest = contradictions[0]
330
+ return QueryResult(
331
+ answer="yes",
332
+ proof=(
333
+ f"contradiction:{latest.subject}-[{latest.relation}]->{latest.previous_object}",
334
+ f"contradiction:{latest.subject}-[{latest.relation}]->{latest.new_object}",
335
+ ),
336
+ confidence=1.0,
337
+ mode="contradiction",
338
+ reasoning=self._reasoning_from_proof((
339
+ f"contradiction:{latest.subject}-[{latest.relation}]->{latest.previous_object}",
340
+ f"contradiction:{latest.subject}-[{latest.relation}]->{latest.new_object}",
341
+ )),
342
+ )
343
+
344
+ def _answer_keep_location_lookup(self, subject: str, object_value: str) -> QueryResult | None:
345
+ keep_edge = self.graph.get_active_edge(subject, "keep")
346
+ if keep_edge is None:
347
+ keep_edge = self.graph.get_active_edge(subject, "keeps")
348
+ location_edges = self._location_candidates(subject)
349
+ if keep_edge is None or not location_edges or keep_edge.object != object_value:
350
+ return None
351
+ location_edge = max(location_edges, key=self._edge_rank)
352
+ return QueryResult(
353
+ answer=location_edge.object,
354
+ proof=(
355
+ self._edge_to_proof(keep_edge),
356
+ self._edge_to_proof(location_edge),
357
+ f"compose:{subject}-[keeps]->{object_value}; {subject}-[located_in]->{location_edge.object}",
358
+ ),
359
+ confidence=1.0,
360
+ mode="composed",
361
+ reasoning=self._reasoning_from_proof((
362
+ self._edge_to_proof(keep_edge),
363
+ self._edge_to_proof(location_edge),
364
+ f"compose:{subject}-[keeps]->{object_value}; {subject}-[located_in]->{location_edge.object}",
365
+ )),
366
+ )
367
+
368
+ def _answer_keep_location_from_query(self, query: str) -> QueryResult | None:
369
+ tokens = query.split()
370
+ if len(tokens) < 4 or tokens[0] != "where" or "keep" not in tokens:
371
+ return None
372
+ keep_index = tokens.index("keep")
373
+ subject_tokens = [token for token in tokens[1:keep_index] if token not in {"does", "is", "the", "a", "an"}]
374
+ object_tokens = [token for token in tokens[keep_index + 1 :] if token not in {"the", "a", "an"}]
375
+ if not subject_tokens or not object_tokens:
376
+ return None
377
+ subject = self.ontology.resolve(" ".join(subject_tokens)).concept_id
378
+ object_value = self.ontology.resolve(" ".join(object_tokens)).concept_id
379
+ return self._answer_keep_location_lookup(subject, object_value)
380
+
381
+ def _answer_plan_first_subject(self, target: str) -> QueryResult | None:
382
+ proof: list[str] = []
383
+ current = target
384
+ seen = {current}
385
+ while True:
386
+ edge = self._best_edge(current, "depend_on")
387
+ if edge is None or edge.object in seen:
388
+ break
389
+ proof.append(self._edge_to_proof(edge))
390
+ current = edge.object
391
+ seen.add(current)
392
+ if not proof:
393
+ return None
394
+ return QueryResult(
395
+ answer=current,
396
+ proof=tuple(proof),
397
+ confidence=1.0,
398
+ mode="plan",
399
+ reasoning=self._reasoning_from_proof(tuple(proof)),
400
+ )
401
+
402
+ def _answer_plan_next_anchor(self, anchor: str) -> QueryResult | None:
403
+ dependents = self.graph.get_subjects("depend_on", anchor)
404
+ if not dependents:
405
+ return None
406
+ edge = max(dependents, key=self._edge_rank)
407
+ return QueryResult(
408
+ answer=edge.subject,
409
+ proof=(self._edge_to_proof(edge),),
410
+ confidence=1.0,
411
+ mode="plan",
412
+ reasoning=self._reasoning_from_proof((self._edge_to_proof(edge),)),
413
+ )
414
+
415
+ def _answer_plan_previous_target(self, target: str) -> QueryResult | None:
416
+ edge = self._best_edge(target, "depend_on")
417
+ if edge is None:
418
+ return None
419
+ return QueryResult(
420
+ answer=edge.object,
421
+ proof=(self._edge_to_proof(edge),),
422
+ confidence=1.0,
423
+ mode="plan",
424
+ reasoning=self._reasoning_from_proof((self._edge_to_proof(edge),)),
425
+ )
426
+
427
+ def _answer_story_subject(self, subject: str, *, anchor: str = "") -> QueryResult | None:
428
+ allowed = {
429
+ "approved",
430
+ "bought",
431
+ "bought_in",
432
+ "carrying",
433
+ "depend_on",
434
+ "found",
435
+ "inspected",
436
+ "left",
437
+ "like",
438
+ "lives_in",
439
+ "located_in",
440
+ "met",
441
+ "planned",
442
+ "visited",
443
+ "prefer",
444
+ "reached",
445
+ "returned",
446
+ "scheduled",
447
+ "saw",
448
+ "started",
449
+ "stopped",
450
+ "watch",
451
+ }
452
+ edges = [
453
+ edge
454
+ for edge in self.graph.iter_outgoing_edges(subject)
455
+ if edge.relation in allowed and edge.source_kind != "derived"
456
+ ]
457
+ if not edges:
458
+ return None
459
+ edges.sort(key=lambda edge: edge.edge_id)
460
+ if anchor.strip():
461
+ anchor_index = self._story_anchor_index(edges, anchor.strip())
462
+ if anchor_index is not None:
463
+ edges = edges[anchor_index + 1 :]
464
+ if not edges:
465
+ return None
466
+ proof = tuple(self._edge_to_proof(edge) for edge in edges[:8])
467
+ return QueryResult(
468
+ answer=subject,
469
+ proof=proof,
470
+ confidence=0.92,
471
+ mode="story",
472
+ reasoning=self._reasoning_from_proof(proof),
473
+ )
474
+
475
+ def _story_anchor_index(self, edges: list[EdgeRecord], anchor_phrase: str) -> int | None:
476
+ anchor_normalized = self.ontology.normalize(anchor_phrase)
477
+ anchor_tokens = [token for token in anchor_normalized.split() if token]
478
+ if not anchor_tokens:
479
+ return None
480
+ for index, edge in enumerate(edges):
481
+ candidates = {
482
+ self.ontology.normalize(f"{edge.relation} {edge.object}"),
483
+ self.ontology.normalize(self.graph.get_display_name(edge.relation)),
484
+ self.ontology.normalize(self.graph.get_display_name(edge.object)),
485
+ self.ontology.normalize(f"{self.graph.get_display_name(edge.relation)} {self.graph.get_display_name(edge.object)}"),
486
+ }
487
+ for candidate in candidates:
488
+ candidate_tokens = candidate.split()
489
+ if candidate_tokens and all(token in candidate_tokens for token in anchor_tokens):
490
+ return index
491
+ return None
492
+
493
+ def _infer_carried_object_location(self, object_value: str) -> QueryResult | None:
494
+ carriers = self.graph.get_subjects("carrying", object_value)
495
+ if not carriers:
496
+ return None
497
+ best_carrier = max(carriers, key=self._edge_rank)
498
+ carrier_locations = self._location_candidates(best_carrier.subject)
499
+ if not carrier_locations:
500
+ return None
501
+ location_edge = max(carrier_locations, key=self._edge_rank)
502
+ proof = (
503
+ self._edge_to_proof(best_carrier),
504
+ self._edge_to_proof(location_edge),
505
+ f"compose:{best_carrier.subject}-[carrying]->{object_value}; {best_carrier.subject}-[located_in]->{location_edge.object}",
506
+ )
507
+ return QueryResult(
508
+ answer=location_edge.object,
509
+ proof=proof,
510
+ confidence=0.92,
511
+ mode="composed",
512
+ reasoning=self._reasoning_from_proof(proof),
513
+ )
514
+
515
+ def _answer_who_chases_object(self, obj: str) -> QueryResult | None:
516
+ direct_subjects = self.graph.get_subjects("chase", obj)
517
+ if direct_subjects:
518
+ edge = direct_subjects[0]
519
+ return QueryResult(
520
+ answer=edge.subject,
521
+ proof=(f"direct:{edge.subject}-[chase]->{edge.object}",),
522
+ confidence=1.0,
523
+ mode="direct",
524
+ reasoning=self._reasoning_from_proof((f"direct:{edge.subject}-[chase]->{edge.object}",)),
525
+ )
526
+ hierarchical_subject = self._subject_from_object_hierarchy("chase", obj)
527
+ if hierarchical_subject is not None:
528
+ return hierarchical_subject
529
+ candidate_rules = [
530
+ rule
531
+ for rule in self.abstraction.derive_rules()
532
+ if rule.relation == "chase" and rule.object_class in set(self._concept_hierarchy(obj))
533
+ ]
534
+ candidate_rules.sort(key=self._rule_specificity, reverse=True)
535
+ for rule in candidate_rules:
536
+ return QueryResult(
537
+ answer=rule.subject_class,
538
+ proof=(f"abstract:{rule.subject_class}-[chase]->{rule.object_class}", f"object:{obj}->is_a->{rule.object_class}"),
539
+ confidence=0.65,
540
+ mode="abstract",
541
+ reasoning=self._reasoning_from_proof((f"abstract:{rule.subject_class}-[chase]->{rule.object_class}", f"object:{obj}->is_a->{rule.object_class}")),
542
+ )
543
+ return None
544
+
545
+ def _direct_or_abstract(self, subject: str, relation: str) -> QueryResult | None:
546
+ direct = self._best_edge(subject, relation)
547
+ if direct is not None:
548
+ return QueryResult(
549
+ answer=direct.object,
550
+ proof=(self._edge_to_proof(direct),),
551
+ confidence=1.0,
552
+ mode="derived" if direct.source_kind == "derived" else "direct",
553
+ reasoning=self._reasoning_from_proof((self._edge_to_proof(direct),)),
554
+ )
555
+ inherited = self._inherit_relation_from_parents(subject, relation)
556
+ if inherited is not None:
557
+ return inherited
558
+ exemplar_inference = self._infer_relation_from_exemplars(subject, relation)
559
+ if exemplar_inference is not None:
560
+ return exemplar_inference
561
+
562
+ subject_parents = set(self._concept_hierarchy(subject))
563
+ candidate_rules = [
564
+ rule
565
+ for rule in self.abstraction.derive_rules()
566
+ if rule.relation == relation and rule.subject_class in subject_parents
567
+ ]
568
+ candidate_rules.sort(key=self._rule_specificity, reverse=True)
569
+ for rule in candidate_rules:
570
+ return QueryResult(
571
+ answer=rule.object_class,
572
+ proof=(f"abstract:{rule.subject_class}-[{relation}]->{rule.object_class}", f"subject:{subject}->is_a->{rule.subject_class}"),
573
+ confidence=0.7,
574
+ mode="abstract",
575
+ reasoning=self._reasoning_from_proof((f"abstract:{rule.subject_class}-[{relation}]->{rule.object_class}", f"subject:{subject}->is_a->{rule.subject_class}")),
576
+ )
577
+ return None
578
+
579
+ def _inherit_relation_from_parents(self, subject: str, relation: str) -> QueryResult | None:
580
+ for parent in self._concept_hierarchy(subject):
581
+ inherited_edge = self._best_edge(parent, relation)
582
+ if inherited_edge is None:
583
+ continue
584
+ proof = (
585
+ f"subject:{subject}->is_a->{parent}",
586
+ self._edge_to_proof(inherited_edge),
587
+ )
588
+ return QueryResult(
589
+ answer=inherited_edge.object,
590
+ proof=proof,
591
+ confidence=0.76 if inherited_edge.source_kind != "derived" else 0.7,
592
+ mode="abstract",
593
+ reasoning=self._reasoning_from_proof(proof),
594
+ )
595
+ return None
596
+
597
+ def _subject_from_object_hierarchy(self, relation: str, object_value: str) -> QueryResult | None:
598
+ for parent in self._concept_hierarchy(object_value):
599
+ direct_subjects = self.graph.get_subjects(relation, parent)
600
+ if not direct_subjects:
601
+ continue
602
+ edge = max(direct_subjects, key=self._edge_rank)
603
+ proof = (
604
+ self._edge_to_proof(edge),
605
+ f"object:{object_value}->is_a->{parent}",
606
+ )
607
+ return QueryResult(
608
+ answer=edge.subject,
609
+ proof=proof,
610
+ confidence=0.74 if edge.source_kind != "derived" else 0.68,
611
+ mode="abstract",
612
+ reasoning=self._reasoning_from_proof(proof),
613
+ )
614
+ exemplar_inference = self._infer_subject_from_exemplars(relation, object_value)
615
+ if exemplar_inference is not None:
616
+ return exemplar_inference
617
+ return None
618
+
619
+ def _resolve_relation_path(self, left: str, right: str) -> tuple[str, tuple[str, ...], float] | None:
620
+ grounded_location = self._resolve_grounded_location_path(left, right)
621
+ if grounded_location is not None:
622
+ return grounded_location
623
+ anchors_left = [left, *self._concept_hierarchy(left)]
624
+ anchors_right = [right, *self._concept_hierarchy(right)]
625
+ best: tuple[int, int, int, int, int, list[EdgeRecord], str, str] | None = None
626
+ for left_anchor in anchors_left[:6]:
627
+ for right_anchor in anchors_right[:6]:
628
+ path = self.graph.find_path(left_anchor, right_anchor, max_hops=6, include_derived=False)
629
+ if path is None:
630
+ path = self.graph.find_path(left_anchor, right_anchor, max_hops=6, include_derived=True)
631
+ if path is None:
632
+ continue
633
+ hierarchy_cost = int(left_anchor != left) + int(right_anchor != right)
634
+ location_penalty = 0 if all(edge.relation in {"lives_in", "located_in"} for edge in path) else 1
635
+ reverse_penalty = sum(
636
+ 1 for edge in path if edge.relation in {"contains", "home_of", "has_instance"}
637
+ )
638
+ derived_penalty = sum(1 for edge in path if edge.source_kind == "derived")
639
+ candidate = (
640
+ hierarchy_cost,
641
+ location_penalty,
642
+ reverse_penalty,
643
+ derived_penalty,
644
+ len(path),
645
+ path,
646
+ left_anchor,
647
+ right_anchor,
648
+ )
649
+ if best is None or candidate[:5] < best[:5]:
650
+ best = candidate
651
+ if best is None:
652
+ return None
653
+ _, _, _, _, _, path, left_anchor, right_anchor = best
654
+ proof: list[str] = []
655
+ if left_anchor != left:
656
+ proof.append(f"subject:{left}->is_a->{left_anchor}")
657
+ proof.extend(self._edge_to_proof(edge) for edge in path)
658
+ if right_anchor != right:
659
+ proof.append(f"object:{right}->is_a->{right_anchor}")
660
+ answer = " -> ".join(edge.relation for edge in path)
661
+ confidence = 1.0 if left_anchor == left and right_anchor == right else 0.78
662
+ return answer, tuple(proof), confidence
663
+
664
+ def _resolve_grounded_location_path(self, left: str, right: str) -> tuple[str, tuple[str, ...], float] | None:
665
+ allowed_relations = {"lives_in", "located_in"}
666
+ queue: deque[tuple[str, list[EdgeRecord]]] = deque([(left, [])])
667
+ seen = {left}
668
+ while queue:
669
+ node, path = queue.popleft()
670
+ if len(path) >= 6:
671
+ continue
672
+ for relation in ("lives_in", "located_in"):
673
+ for edge in self.graph.get_objects(node, relation):
674
+ if edge.source_kind == "derived":
675
+ continue
676
+ next_path = [*path, edge]
677
+ if edge.object == right and all(item.relation in allowed_relations for item in next_path):
678
+ proof = tuple(self._edge_to_proof(item) for item in next_path)
679
+ answer = " -> ".join(item.relation for item in next_path)
680
+ return answer, proof, 1.0
681
+ if edge.object not in seen:
682
+ seen.add(edge.object)
683
+ queue.append((edge.object, next_path))
684
+ return None
685
+
686
+ def _infer_relation_from_exemplars(self, subject: str, relation: str) -> QueryResult | None:
687
+ for subject_class in self._concept_hierarchy(subject):
688
+ exemplars = self.graph.get_subjects("is_a", subject_class)
689
+ if not exemplars:
690
+ continue
691
+ class_votes: dict[str, list[str]] = {}
692
+ for exemplar in exemplars:
693
+ for edge in self.graph.get_objects(exemplar.subject, relation):
694
+ for object_class in self._concept_hierarchy(edge.object):
695
+ class_votes.setdefault(object_class, []).append(self._edge_to_proof(edge))
696
+ if not class_votes:
697
+ continue
698
+ object_class, proofs = max(class_votes.items(), key=lambda item: (len(item[1]), len(self._concept_hierarchy(item[0]))))
699
+ if not proofs:
700
+ continue
701
+ proof = [f"subject:{subject}->is_a->{subject_class}"]
702
+ proof.extend(proofs[:2])
703
+ proof.append(f"object:{self._parse_reasoning_edge(proofs[0])[2]}->is_a->{object_class}")
704
+ proof_tuple = tuple(proof)
705
+ return QueryResult(
706
+ answer=object_class,
707
+ proof=proof_tuple,
708
+ confidence=0.73,
709
+ mode="abstract",
710
+ reasoning=self._reasoning_from_proof(proof_tuple),
711
+ )
712
+ return None
713
+
714
+ def _infer_subject_from_exemplars(self, relation: str, object_value: str) -> QueryResult | None:
715
+ for object_class in self._concept_hierarchy(object_value):
716
+ exemplars = self.graph.get_subjects("is_a", object_class)
717
+ if not exemplars:
718
+ continue
719
+ class_votes: dict[str, list[str]] = {}
720
+ for exemplar in exemplars:
721
+ for edge in self.graph.get_subjects(relation, exemplar.subject):
722
+ for subject_class in self._concept_hierarchy(edge.subject):
723
+ class_votes.setdefault(subject_class, []).append(self._edge_to_proof(edge))
724
+ if not class_votes:
725
+ continue
726
+ subject_class, proofs = max(class_votes.items(), key=lambda item: (len(item[1]), len(self._concept_hierarchy(item[0]))))
727
+ if not proofs:
728
+ continue
729
+ proof = [f"object:{object_value}->is_a->{object_class}"]
730
+ proof.extend(proofs[:2])
731
+ proof.append(f"subject:{self._parse_reasoning_edge(proofs[0])[0]}->is_a->{subject_class}")
732
+ proof_tuple = tuple(proof)
733
+ return QueryResult(
734
+ answer=subject_class,
735
+ proof=proof_tuple,
736
+ confidence=0.73,
737
+ mode="abstract",
738
+ reasoning=self._reasoning_from_proof(proof_tuple),
739
+ )
740
+ return None
741
+
742
+ @staticmethod
743
+ def _edge_to_proof(edge: EdgeRecord) -> str:
744
+ if edge.source_kind == "derived" and edge.source_text:
745
+ return f"{edge.source_kind}:{edge.subject}-[{edge.relation}]->{edge.object}|{edge.source_text}"
746
+ return f"{edge.source_kind}:{edge.subject}-[{edge.relation}]->{edge.object}"
747
+
748
+ def _rule_specificity(self, rule) -> tuple[int, int, int]:
749
+ subject_depth = len(self._concept_hierarchy(rule.subject_class))
750
+ object_depth = len(self._concept_hierarchy(rule.object_class))
751
+ return (object_depth, subject_depth, rule.support)
752
+
753
+ def _best_edge(self, subject: str, relation: str) -> EdgeRecord | None:
754
+ candidates = self.graph.get_objects(subject, relation)
755
+ if not candidates:
756
+ return None
757
+ if relation == "located_in":
758
+ return self._best_location_edge(candidates)
759
+ return max(candidates, key=self._edge_rank)
760
+
761
+ def _edge_rank(self, edge: EdgeRecord) -> tuple[int, int, int]:
762
+ is_direct = 1 if edge.source_kind != "derived" else 0
763
+ object_depth = len(self._concept_hierarchy(edge.object))
764
+ return (is_direct, object_depth, edge.edge_id)
765
+
766
+ def _best_location_edge(self, candidates: list[EdgeRecord]) -> EdgeRecord:
767
+ def specificity(edge: EdgeRecord) -> int:
768
+ score = 0
769
+ for other in candidates:
770
+ if other.edge_id == edge.edge_id:
771
+ continue
772
+ if any(child.object == other.object for child in self.graph.get_objects(edge.object, "located_in")):
773
+ score += 1
774
+ return score
775
+
776
+ return max(candidates, key=lambda edge: (specificity(edge),) + self._edge_rank(edge))
777
+
778
+ def _location_candidates(self, subject: str) -> list[EdgeRecord]:
779
+ return self.graph.get_objects(subject, "located_in") + self.graph.get_objects(subject, "lives_in")
780
+
781
+ def _select_ontology_parent(self, parents: tuple[str, ...]) -> str:
782
+ if len(parents) == 1:
783
+ return parents[0]
784
+ derived_rules = self.abstraction.derive_rules()
785
+ scored: list[tuple[int, int, int, int, str]] = []
786
+ for index, parent in enumerate(parents):
787
+ outgoing = [
788
+ edge
789
+ for edge in self.graph.iter_outgoing_edges(parent)
790
+ if edge.relation not in {"is_a", "has_instance", "contains", "located_in"}
791
+ ]
792
+ exemplars = self.graph.get_subjects("is_a", parent)
793
+ exemplar_count = len(exemplars)
794
+ exemplar_behavior = 0
795
+ for exemplar in exemplars:
796
+ exemplar_behavior += sum(
797
+ 1
798
+ for edge in self.graph.iter_outgoing_edges(exemplar.subject)
799
+ if edge.relation not in {"is_a", "has_instance", "contains", "located_in"}
800
+ )
801
+ abstract_behavior = sum(
802
+ 1
803
+ for rule in derived_rules
804
+ if rule.subject_class == parent and rule.relation not in {"is_a", "has_instance", "contains", "located_in"}
805
+ )
806
+ scored.append((exemplar_behavior, abstract_behavior, len(outgoing), exemplar_count, -index, parent))
807
+ best = max(scored)
808
+ if best[0] == 0 and best[1] == 0 and best[2] == 0 and best[3] == 0:
809
+ salient = [parent for parent in parents if parent in self._SALIENT_CLASSES]
810
+ if salient:
811
+ return salient[-1]
812
+ return parents[0]
813
+ return best[5]
814
+
815
+ def _object_after_phrase(self, raw_query: str, phrase: str) -> str | None:
816
+ normalized = self.ontology.normalize(raw_query)
817
+ if phrase.strip() not in normalized:
818
+ return None
819
+ _, tail = normalized.split(phrase.strip(), 1)
820
+ candidate = tail.strip()
821
+ if not candidate:
822
+ return None
823
+ return self.ontology.resolve(candidate).concept_id
824
+
825
+ def _concept_hierarchy(self, concept: str) -> list[str]:
826
+ seen: set[str] = set()
827
+ ordered: list[str] = []
828
+
829
+ def visit(node: str) -> None:
830
+ for edge in self.graph.get_objects(node, "is_a"):
831
+ if edge.object not in seen:
832
+ seen.add(edge.object)
833
+ ordered.append(edge.object)
834
+ visit(edge.object)
835
+ for parent in self.ontology.lift(node):
836
+ if parent not in seen:
837
+ seen.add(parent)
838
+ ordered.append(parent)
839
+
840
+ visit(concept)
841
+ return ordered
842
+
843
+ def _reasoning_from_proof(self, proof: tuple[str, ...]) -> tuple[str, ...]:
844
+ steps: list[str] = []
845
+ openers = (
846
+ "I start from",
847
+ "Then I use",
848
+ "Next I rely on",
849
+ "After that I connect",
850
+ "From there I infer",
851
+ "Finally I conclude from",
852
+ )
853
+ expanded_proof = self._expand_reasoning_steps(proof)
854
+ for index, step in enumerate(expanded_proof, start=1):
855
+ rendered = self._render_reasoning_step(step)
856
+ opener = openers[(index - 1) % len(openers)]
857
+ steps.append(f"{opener} {rendered}.")
858
+ return tuple(steps)
859
+
860
+ def _expand_reasoning_steps(self, proof: tuple[str, ...]) -> tuple[str, ...]:
861
+ expanded: list[str] = []
862
+ for step in proof:
863
+ expanded.append(step)
864
+ if not step.startswith("derived:") or "|" not in step:
865
+ continue
866
+ _, source_text = step.split("|", 1)
867
+ for detail in source_text.split("|"):
868
+ detail = detail.strip()
869
+ if not detail:
870
+ continue
871
+ expanded.append(detail)
872
+ return tuple(expanded)
873
+
874
+ def _render_reasoning_step(self, step: str) -> str:
875
+ if "-[" in step and "]->" in step:
876
+ parsed = self._parse_reasoning_edge(step)
877
+ if parsed is not None:
878
+ subject, relation, object_value = parsed
879
+ return (
880
+ f"{self.graph.get_display_name(subject)} "
881
+ f"{relation.replace('_', ' ')} "
882
+ f"{self.graph.get_display_name(object_value)}"
883
+ )
884
+ if step.startswith("ontology:"):
885
+ payload = step.split(":", 1)[1]
886
+ subject, _, parent = payload.partition("->is_a->")
887
+ return f"{self.graph.get_display_name(subject)} belongs to {self.graph.get_display_name(parent)}"
888
+ if step.startswith("subject:"):
889
+ payload = step.split(":", 1)[1]
890
+ subject, _, parent = payload.partition("->is_a->")
891
+ return f"{self.graph.get_display_name(subject)} belongs to {self.graph.get_display_name(parent)}"
892
+ if step.startswith("object:"):
893
+ payload = step.split(":", 1)[1]
894
+ obj, _, parent = payload.partition("->is_a->")
895
+ return f"{self.graph.get_display_name(obj)} belongs to {self.graph.get_display_name(parent)}"
896
+ if step.startswith("compose:"):
897
+ payload = step.split(":", 1)[1]
898
+ return f"the composed link {payload}".replace("_", " ")
899
+ if step.startswith("revision:"):
900
+ payload = step.split(":", 1)[1]
901
+ return f"the revision {payload}".replace("_", " ")
902
+ if step.startswith("contradiction:"):
903
+ payload = step.split(":", 1)[1]
904
+ return f"the contradiction {payload}".replace("_", " ")
905
+ if step.startswith("no_contradiction:"):
906
+ payload = step.split(":", 1)[1]
907
+ return f"there is no stored contradiction for {self.graph.get_display_name(payload)}"
908
+ if step.startswith("emoji:"):
909
+ payload = step.split(":", 1)[1]
910
+ return f"the symbol {payload}".replace("_", " ")
911
+ if step.startswith("math:"):
912
+ payload = step.split(":", 1)[1]
913
+ return f"the computation {payload}".replace("_", " ")
914
+ if step.startswith("reason:"):
915
+ payload = step.split(":", 1)[1]
916
+ return f"the rule {payload}".replace("_", " ")
917
+ return step.replace("_", " ")
918
+
919
+ @staticmethod
920
+ def _parse_reasoning_edge(step: str) -> tuple[str, str, str] | None:
921
+ payload = step.split(":", 1)[-1].split("|", 1)[0]
922
+ if "-[" not in payload or "]->" not in payload:
923
+ return None
924
+ subject, rest = payload.split("-[", 1)
925
+ relation, object_value = rest.split("]->", 1)
926
+ return subject, relation, object_value
927
+
928
+ @staticmethod
929
+ def _normalize_self_reference(query: str) -> str:
930
+ normalized = query.strip()
931
+ lowered = normalized.lower()
932
+ if lowered.startswith("who are you"):
933
+ return "What is Aethon?"
934
+ if lowered.startswith("what are you"):
935
+ return "What is Aethon?"
936
+ if lowered.startswith("what is your tokenizer"):
937
+ return "What is Aethon tokenizer?"
938
+ if lowered.startswith("what is your size unit"):
939
+ return "What is Aethon size unit?"
940
+ if lowered.startswith("what is aethon tokenizer"):
941
+ return "What is Aethon tokenizer?"
942
+ if lowered.startswith("what is aethon size unit"):
943
+ return "What is Aethon size unit?"
944
+ replaced: list[str] = []
945
+ for token in normalized.split():
946
+ lower = token.lower()
947
+ if lower == "your":
948
+ replaced.append("Aethon")
949
+ elif lower == "you":
950
+ replaced.append("Aethon")
951
+ else:
952
+ replaced.append(token)
953
+ return " ".join(replaced)
954
+
955
+ def _with_query_awareness(self, result: QueryResult, original_query: str, corrections: list[tuple[str, str]]) -> QueryResult:
956
+ if not corrections:
957
+ return result
958
+ seen: set[tuple[str, str]] = set()
959
+ notes: list[str] = []
960
+ for source, target in corrections:
961
+ pair = (source.lower(), target.lower())
962
+ if pair in seen or source.lower() == target.lower():
963
+ continue
964
+ seen.add(pair)
965
+ notes.append(
966
+ f'I read "{source}" as "{self.graph.get_display_name(target.lower().replace(" ", "_")) if "_" in target or target.islower() else target}" so the prompt still stays grounded.'
967
+ )
968
+ if not notes:
969
+ return result
970
+ return QueryResult(
971
+ answer=result.answer,
972
+ proof=result.proof,
973
+ confidence=result.confidence,
974
+ mode=result.mode,
975
+ reasoning=tuple(notes) + result.reasoning,
976
+ )
977
+
978
+ def _soft_correct_query(self, query: str) -> tuple[str, list[tuple[str, str]]]:
979
+ tokens = query.split()
980
+ if not tokens:
981
+ return query, []
982
+ vocabulary = self._query_vocabulary()
983
+ corrected: list[str] = []
984
+ corrections: list[tuple[str, str]] = []
985
+ for token in tokens:
986
+ prefix_end = 0
987
+ while prefix_end < len(token) and not token[prefix_end].isalnum():
988
+ prefix_end += 1
989
+ suffix_start = len(token)
990
+ while suffix_start > prefix_end and not token[suffix_start - 1].isalnum():
991
+ suffix_start -= 1
992
+ prefix = token[:prefix_end]
993
+ suffix = token[suffix_start:]
994
+ core = token[len(prefix) : len(token) - len(suffix) if suffix else len(token)]
995
+ lower_core = core.lower()
996
+ if lower_core in self._PROTECTED_QUERY_TOKENS:
997
+ corrected.append(token)
998
+ continue
999
+ if lower_core in self.ontology.semantic_lexicon.typo_map:
1000
+ replacement = self.ontology.semantic_lexicon.typo_map[lower_core]
1001
+ if core[:1].isupper():
1002
+ replacement = replacement.capitalize()
1003
+ corrections.append((core, replacement))
1004
+ corrected.append(f"{prefix}{replacement}{suffix}")
1005
+ continue
1006
+ if len(lower_core) <= 2 or not lower_core or lower_core in vocabulary:
1007
+ corrected.append(token)
1008
+ continue
1009
+ match = difflib.get_close_matches(lower_core, vocabulary, n=1, cutoff=0.72)
1010
+ if match:
1011
+ replacement = match[0]
1012
+ if core[:1].isupper():
1013
+ replacement = replacement.capitalize()
1014
+ corrections.append((core, replacement))
1015
+ corrected.append(f"{prefix}{replacement}{suffix}")
1016
+ else:
1017
+ corrected.append(token)
1018
+ return " ".join(corrected), corrections
1019
+
1020
+ def _query_vocabulary(self) -> list[str]:
1021
+ if (
1022
+ self._query_vocabulary_cache is not None
1023
+ and self._query_vocabulary_version == self.graph.mutation_version
1024
+ ):
1025
+ return self._query_vocabulary_cache
1026
+ base_words = {
1027
+ "what",
1028
+ "who",
1029
+ "where",
1030
+ "how",
1031
+ "is",
1032
+ "are",
1033
+ "does",
1034
+ "did",
1035
+ "the",
1036
+ "aethon",
1037
+ "tokenizer",
1038
+ "size",
1039
+ "unit",
1040
+ "your",
1041
+ "you",
1042
+ "please",
1043
+ "thanks",
1044
+ "happy",
1045
+ "sad",
1046
+ "thinking",
1047
+ "love",
1048
+ "approve",
1049
+ "correct",
1050
+ "wrong",
1051
+ "related",
1052
+ "return",
1053
+ "returns",
1054
+ "depend",
1055
+ "depends",
1056
+ "on",
1057
+ "work",
1058
+ "works",
1059
+ "live",
1060
+ "like",
1061
+ "prefer",
1062
+ "carrying",
1063
+ "buy",
1064
+ "bought",
1065
+ "watch",
1066
+ "chase",
1067
+ "solve",
1068
+ "plus",
1069
+ "minus",
1070
+ "times",
1071
+ "divided",
1072
+ "by",
1073
+ }
1074
+ for concept in self.graph.list_concepts():
1075
+ base_words.update(part for part in concept.split("_") if part)
1076
+ base_words.add(concept.replace("_", " "))
1077
+ self._query_vocabulary_cache = sorted(base_words)
1078
+ self._query_vocabulary_version = self.graph.mutation_version
1079
+ return self._query_vocabulary_cache
runtime/aethon/rfi_query_forms.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ import json
5
+ from pathlib import Path
6
+
7
+
8
+ @dataclass(frozen=True)
9
+ class QueryForm:
10
+ intent: str
11
+ prefix: tuple[str, ...]
12
+ relation: str = ""
13
+ relation_mode: str = "fixed"
14
+ subject_mode: str = "tail"
15
+ subject_start: int = 0
16
+ subject_end_anchor: str = ""
17
+ object_start_anchor: str = ""
18
+ object_mode: str = "none"
19
+ requires: tuple[str, ...] = ()
20
+
21
+
22
+ class NativeQueryFormSet:
23
+ DEFAULT_PATH = Path(__file__).resolve().parents[1] / "data" / "native" / "query" / "aethon_query_forms_v1.json"
24
+
25
+ def __init__(self, path: str | Path | None = None, payload: list[dict[str, object]] | None = None) -> None:
26
+ self.path = Path(path) if path is not None else self.DEFAULT_PATH
27
+ self.forms = self._load_forms(payload)
28
+
29
+ def _load_forms(self, payload: list[dict[str, object]] | None) -> tuple[QueryForm, ...]:
30
+ if payload is not None:
31
+ return tuple(self._row_to_form(row) for row in payload)
32
+ if not self.path.exists():
33
+ return ()
34
+ data = json.loads(self.path.read_text(encoding="utf-8"))
35
+ return tuple(self._row_to_form(row) for row in data)
36
+
37
+ @staticmethod
38
+ def _row_to_form(row: dict[str, object]) -> QueryForm:
39
+ return QueryForm(
40
+ intent=str(row["intent"]).strip(),
41
+ prefix=tuple(str(item).strip() for item in row.get("prefix", [])),
42
+ relation=str(row.get("relation", "")).strip(),
43
+ relation_mode=str(row.get("relation_mode", "fixed")).strip(),
44
+ subject_mode=str(row.get("subject_mode", "tail")).strip(),
45
+ subject_start=int(row.get("subject_start", 0)),
46
+ subject_end_anchor=str(row.get("subject_end_anchor", "")).strip(),
47
+ object_start_anchor=str(row.get("object_start_anchor", "")).strip(),
48
+ object_mode=str(row.get("object_mode", "none")).strip(),
49
+ requires=tuple(str(item).strip() for item in row.get("requires", [])),
50
+ )
51
+
52
+ def to_payload(self) -> list[dict[str, object]]:
53
+ return [
54
+ {
55
+ "intent": form.intent,
56
+ "prefix": list(form.prefix),
57
+ "relation": form.relation,
58
+ "relation_mode": form.relation_mode,
59
+ "subject_mode": form.subject_mode,
60
+ "subject_start": form.subject_start,
61
+ "subject_end_anchor": form.subject_end_anchor,
62
+ "object_start_anchor": form.object_start_anchor,
63
+ "object_mode": form.object_mode,
64
+ "requires": list(form.requires),
65
+ }
66
+ for form in self.forms
67
+ ]
runtime/aethon/rfi_reasoner.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from collections import Counter
4
+
5
+ from .rfi_graph import EdgeRecord, RelationalGraphStore
6
+ from .rfi_reasoning_rules import NativeReasoningRuleSet, ReasoningRule
7
+
8
+
9
+ class StructuralReasoner:
10
+ """Executes native reasoning rules over the relation graph."""
11
+
12
+ def __init__(self, graph: RelationalGraphStore, rules_path: str | None = None, rules_payload: dict | None = None) -> None:
13
+ self.graph = graph
14
+ self.rule_set = NativeReasoningRuleSet(rules_path, rules_payload)
15
+ self.base_rules = self.rule_set.rules
16
+ self.induced_rules: tuple[ReasoningRule, ...] = ()
17
+
18
+ def materialize(self, *, max_rounds: int = 4, induce_min_support: int = 2) -> list[int]:
19
+ created: list[int] = []
20
+ self.induced_rules = self.induce_rules(min_support=induce_min_support)
21
+ for _ in range(max_rounds):
22
+ round_ids: list[int] = []
23
+ for rule in self._all_rules():
24
+ round_ids.extend(self._apply_rule(rule))
25
+ if not round_ids:
26
+ break
27
+ created.extend(round_ids)
28
+ if created:
29
+ self.graph.commit()
30
+ return created
31
+
32
+ def induce_rules(self, *, min_support: int = 2) -> tuple[ReasoningRule, ...]:
33
+ counts: Counter[tuple[str, str, str]] = Counter()
34
+ existing = {
35
+ (rule.kind, rule.left_relation, rule.right_relation, rule.output_relation)
36
+ for rule in self.base_rules
37
+ }
38
+ active_edges = self.graph.iter_active_edges()
39
+ outgoing: dict[str, list[EdgeRecord]] = {}
40
+ direct_lookup: dict[tuple[str, str, str], bool] = {}
41
+ for edge in active_edges:
42
+ outgoing.setdefault(edge.subject, []).append(edge)
43
+ direct_lookup[(edge.subject, edge.relation, edge.object)] = True
44
+ for left in active_edges:
45
+ for right in outgoing.get(left.object, ()):
46
+ if left.subject == right.object:
47
+ continue
48
+ for candidate in outgoing.get(left.subject, ()):
49
+ if candidate.object != right.object:
50
+ continue
51
+ counts[(left.relation, right.relation, candidate.relation)] += 1
52
+ induced: list[ReasoningRule] = []
53
+ for (left_relation, right_relation, output_relation), support in counts.items():
54
+ signature = ("via_chain", left_relation, right_relation, output_relation)
55
+ if support < min_support or signature in existing:
56
+ continue
57
+ induced.append(
58
+ ReasoningRule(
59
+ name=f"induced_{left_relation}_{right_relation}_{output_relation}",
60
+ kind="via_chain",
61
+ left_relation=left_relation,
62
+ right_relation=right_relation,
63
+ output_relation=output_relation,
64
+ confidence=0.65,
65
+ )
66
+ )
67
+ return tuple(induced)
68
+
69
+ def _all_rules(self) -> tuple[ReasoningRule, ...]:
70
+ return self.base_rules + self.induced_rules
71
+
72
+ def _apply_rule(self, rule: ReasoningRule) -> list[int]:
73
+ if rule.kind == "transitive":
74
+ return self._apply_transitive(rule)
75
+ if rule.kind == "via_chain":
76
+ return self._apply_via_chain(rule)
77
+ if rule.kind == "copy":
78
+ return self._apply_copy(rule)
79
+ if rule.kind == "suffix_copy":
80
+ return self._apply_suffix_copy(rule)
81
+ if rule.kind == "object_support":
82
+ return self._apply_object_support(rule)
83
+ if rule.kind == "object_bridge":
84
+ return self._apply_object_bridge(rule)
85
+ if rule.kind == "via_object":
86
+ return self._apply_via_object(rule)
87
+ return []
88
+
89
+ def _apply_transitive(self, rule: ReasoningRule) -> list[int]:
90
+ created: list[int] = []
91
+ for left in self.graph.iter_active_edges():
92
+ if left.relation != rule.left_relation:
93
+ continue
94
+ for right in self.graph.get_objects(left.object, rule.right_relation):
95
+ created_id = self._add_if_supported(
96
+ subject=left.subject,
97
+ relation=rule.output_relation,
98
+ object_value=right.object,
99
+ rule_name=rule.name,
100
+ support_edges=(left, right),
101
+ )
102
+ if created_id is not None:
103
+ created.append(created_id)
104
+ return created
105
+
106
+ def _apply_via_chain(self, rule: ReasoningRule) -> list[int]:
107
+ created: list[int] = []
108
+ for left in self.graph.iter_active_edges():
109
+ if left.relation != rule.left_relation:
110
+ continue
111
+ for right in self.graph.get_objects(left.object, rule.right_relation):
112
+ created_id = self._add_if_supported(
113
+ subject=left.subject,
114
+ relation=rule.output_relation,
115
+ object_value=right.object,
116
+ rule_name=rule.name,
117
+ support_edges=(left, right),
118
+ )
119
+ if created_id is not None:
120
+ created.append(created_id)
121
+ return created
122
+
123
+ def _apply_copy(self, rule: ReasoningRule) -> list[int]:
124
+ created: list[int] = []
125
+ for edge in self.graph.iter_active_edges():
126
+ if edge.relation != rule.source_relation:
127
+ continue
128
+ created_id = self._add_if_supported(
129
+ subject=edge.subject,
130
+ relation=rule.output_relation,
131
+ object_value=edge.object,
132
+ rule_name=rule.name,
133
+ support_edges=(edge, edge),
134
+ )
135
+ if created_id is not None:
136
+ created.append(created_id)
137
+ return created
138
+
139
+ def _apply_suffix_copy(self, rule: ReasoningRule) -> list[int]:
140
+ created: list[int] = []
141
+ excluded = set(rule.exclude_relations)
142
+ for edge in self.graph.iter_active_edges():
143
+ if not edge.relation.endswith("_in"):
144
+ continue
145
+ if edge.relation in excluded:
146
+ continue
147
+ created_id = self._add_if_supported(
148
+ subject=edge.subject,
149
+ relation=rule.output_relation,
150
+ object_value=edge.object,
151
+ rule_name=rule.name,
152
+ support_edges=(edge, edge),
153
+ )
154
+ if created_id is not None:
155
+ created.append(created_id)
156
+ return created
157
+
158
+ def _apply_object_support(self, rule: ReasoningRule) -> list[int]:
159
+ created: list[int] = []
160
+ support_relations = set(rule.support_relation_set)
161
+ for edge in self.graph.iter_active_edges():
162
+ if edge.relation != rule.left_relation:
163
+ continue
164
+ for support_relation in support_relations:
165
+ for support in self.graph.get_objects(edge.subject, support_relation):
166
+ created_id = self._add_if_supported(
167
+ subject=edge.object,
168
+ relation=rule.output_relation,
169
+ object_value=support.object,
170
+ rule_name=rule.name,
171
+ support_edges=(edge, support),
172
+ )
173
+ if created_id is not None:
174
+ created.append(created_id)
175
+ return created
176
+
177
+ def _apply_object_bridge(self, rule: ReasoningRule) -> list[int]:
178
+ created: list[int] = []
179
+ for left in self.graph.iter_active_edges():
180
+ if left.relation != rule.left_relation:
181
+ continue
182
+ support = self.graph.get_active_edge(left.subject, rule.support_relation)
183
+ if support is None:
184
+ continue
185
+ created_id = self._add_if_supported(
186
+ subject=left.object,
187
+ relation=rule.output_relation,
188
+ object_value=support.object,
189
+ rule_name=rule.name,
190
+ support_edges=(left, support),
191
+ )
192
+ if created_id is not None:
193
+ created.append(created_id)
194
+ return created
195
+
196
+ def _apply_via_object(self, rule: ReasoningRule) -> list[int]:
197
+ created: list[int] = []
198
+ for left in self.graph.iter_active_edges():
199
+ if left.relation != rule.left_relation:
200
+ continue
201
+ right = self.graph.get_active_edge(left.object, rule.via_relation)
202
+ if right is None:
203
+ continue
204
+ created_id = self._add_if_supported(
205
+ subject=left.subject,
206
+ relation=rule.output_relation,
207
+ object_value=right.object,
208
+ rule_name=rule.name,
209
+ support_edges=(left, right),
210
+ )
211
+ if created_id is not None:
212
+ created.append(created_id)
213
+ return created
214
+
215
+ def _add_if_supported(
216
+ self,
217
+ *,
218
+ subject: str,
219
+ relation: str,
220
+ object_value: str,
221
+ rule_name: str,
222
+ support_edges: tuple[EdgeRecord, EdgeRecord],
223
+ ) -> int | None:
224
+ existing = self.graph.get_objects(subject, relation)
225
+ for edge in existing:
226
+ if edge.object == object_value:
227
+ return None
228
+ source_text = self._proof_source_text(rule_name, support_edges)
229
+ return self.graph.add_derived_fact(
230
+ subject=subject,
231
+ relation=relation,
232
+ object=object_value,
233
+ source_text=source_text,
234
+ supports_edge_id=support_edges[0].edge_id,
235
+ commit=False,
236
+ )
237
+
238
+ @staticmethod
239
+ def _proof_source_text(rule_name: str, support_edges: tuple[EdgeRecord, EdgeRecord]) -> str:
240
+ left, right = support_edges
241
+ return (
242
+ f"reason:{rule_name}|"
243
+ f"{left.subject}-[{left.relation}]->{left.object}|"
244
+ f"{right.subject}-[{right.relation}]->{right.object}"
245
+ )
runtime/aethon/rfi_reasoning_rules.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ import json
5
+ from pathlib import Path
6
+
7
+
8
+ @dataclass(frozen=True)
9
+ class ReasoningRule:
10
+ name: str
11
+ kind: str
12
+ output_relation: str
13
+ confidence: float
14
+ left_relation: str = ""
15
+ right_relation: str = ""
16
+ source_relation: str = ""
17
+ via_relation: str = ""
18
+ support_relation: str = ""
19
+ exclude_relations: tuple[str, ...] = ()
20
+ support_relation_set: tuple[str, ...] = ()
21
+
22
+
23
+ class NativeReasoningRuleSet:
24
+ """Loads native reasoning rules from data instead of freezing rule metadata in code."""
25
+
26
+ DEFAULT_PATH = Path(__file__).resolve().parents[1] / "data" / "native" / "reasoning" / "aethon_reasoning_rules_v1.json"
27
+
28
+ def __init__(self, path: str | Path | None = None, payload: dict | None = None) -> None:
29
+ self.path = Path(path) if path is not None else self.DEFAULT_PATH
30
+ self.payload = self._load_payload(payload)
31
+ self.rules = self._load_rules()
32
+
33
+ def _load_payload(self, payload: dict | None) -> dict:
34
+ if payload is not None:
35
+ return payload
36
+ if not self.path.exists():
37
+ return {}
38
+ return json.loads(self.path.read_text(encoding="utf-8"))
39
+
40
+ def _load_rules(self) -> tuple[ReasoningRule, ...]:
41
+ return tuple(
42
+ ReasoningRule(
43
+ name=str(row["name"]),
44
+ kind=str(row.get("kind", "")),
45
+ output_relation=str(row["output_relation"]),
46
+ confidence=float(row["confidence"]),
47
+ left_relation=str(row.get("left_relation", "")),
48
+ right_relation=str(row.get("right_relation", "")),
49
+ source_relation=str(row.get("source_relation", "")),
50
+ via_relation=str(row.get("via_relation", "")),
51
+ support_relation=str(row.get("support_relation", "")),
52
+ exclude_relations=tuple(str(item) for item in row.get("exclude_relations", [])),
53
+ support_relation_set=tuple(str(item) for item in row.get("support_relation_set", [])),
54
+ )
55
+ for row in self.payload.get("rules", [])
56
+ )
57
+
58
+ def to_payload(self) -> dict:
59
+ return dict(self.payload)
runtime/aethon/rfi_runtime.py ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ from dataclasses import dataclass
5
+ import json
6
+ from pathlib import Path
7
+ import re
8
+
9
+ from .rfi_bundle import NativeBundleManager
10
+ from .rfi_abstraction import AbstractionEngine
11
+ from .rfi_document_filter import DocumentQualityGate
12
+ from .rfi_graph import RelationalGraphStore
13
+ from .rfi_ingest import DeterministicTripleExtractor
14
+ from .rfi_lexicon import AethonNativeConceptCodec
15
+ from .rfi_metrics import StructuralCapacityMeter
16
+ from .rfi_ontology import ConceptOntology
17
+ from .rfi_query_forms import NativeQueryFormSet
18
+ from .rfi_query import ProofQueryEngine, QueryResult
19
+ from .rfi_reasoner import StructuralReasoner
20
+ from .rfi_surface import GraphVerbalizer
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class NativeResponse:
25
+ answer: str
26
+ text: str
27
+ explanation: str
28
+ proof: tuple[str, ...]
29
+ reasoning: tuple[str, ...]
30
+ mode: str
31
+
32
+
33
+ class AethonNativeBase:
34
+ """The first real no-weight Aethon base runtime."""
35
+
36
+ NAME = "Aethon N1 Base"
37
+ FAMILY = "Aethon Native"
38
+ TOKENIZER = f"{AethonNativeConceptCodec.NAME} ({AethonNativeConceptCodec.SHORT_NAME})"
39
+
40
+ def __init__(
41
+ self,
42
+ *,
43
+ db_path: str = ":memory:",
44
+ semantic_lexicon_path: str | None = None,
45
+ semantic_lexicon_payload: list[dict[str, str]] | None = None,
46
+ surface_lexicon_path: str | None = None,
47
+ surface_lexicon_payload: dict | None = None,
48
+ query_form_path: str | None = None,
49
+ query_form_payload: list[dict[str, object]] | None = None,
50
+ reasoning_rules_path: str | None = None,
51
+ reasoning_rules_payload: dict | None = None,
52
+ ) -> None:
53
+ self.semantic_lexicon_path = semantic_lexicon_path
54
+ self.semantic_lexicon_payload = semantic_lexicon_payload
55
+ self.surface_lexicon_path = surface_lexicon_path
56
+ self.surface_lexicon_payload = surface_lexicon_payload
57
+ self.query_form_path = query_form_path
58
+ self.query_form_payload = query_form_payload
59
+ self.reasoning_rules_path = reasoning_rules_path
60
+ self.reasoning_rules_payload = reasoning_rules_payload
61
+ self.ontology = ConceptOntology(semantic_lexicon_path, semantic_lexicon_payload)
62
+ self.codec = AethonNativeConceptCodec(self.ontology)
63
+ self.query_forms = NativeQueryFormSet(query_form_path, query_form_payload)
64
+ self.document_gate = DocumentQualityGate()
65
+ self.graph = RelationalGraphStore(db_path=db_path)
66
+ self.extractor = DeterministicTripleExtractor(self.ontology)
67
+ self.abstraction = AbstractionEngine(self.graph, self.ontology)
68
+ self.reasoner = StructuralReasoner(self.graph, reasoning_rules_path, reasoning_rules_payload)
69
+ self.query_engine = ProofQueryEngine(self.graph, self.ontology, self.abstraction, self.reasoner, self.query_forms)
70
+ self.surface = GraphVerbalizer(surface_lexicon_path, surface_lexicon_payload, self.graph)
71
+
72
+ def learn(self, text: str) -> dict[str, object]:
73
+ triples = self.extractor.extract(text)
74
+ edge_ids = self.graph.ingest_triples(triples, commit=False)
75
+ rules = self.abstraction.materialize_rules(min_support=2)
76
+ derived = self.reasoner.materialize()
77
+ self.graph.commit()
78
+ return {
79
+ "learned_edges": edge_ids,
80
+ "derived_edges": derived,
81
+ "rule_count": len(rules),
82
+ "lexicon": self.codec.export_tokens(text),
83
+ }
84
+
85
+ def learn_fast(self, text: str) -> dict[str, object]:
86
+ triples = self.extractor.extract(text)
87
+ edge_ids = self.graph.ingest_triples(triples, commit=False)
88
+ return {
89
+ "learned_edges": edge_ids,
90
+ "derived_edges": [],
91
+ "rule_count": 0,
92
+ "lexicon": self.codec.export_tokens(text),
93
+ }
94
+
95
+ def learn_ultra_fast(self, texts: list[str]) -> dict[str, int]:
96
+ triples = []
97
+ for text in texts:
98
+ if text.strip():
99
+ triples.extend(self.extractor.extract_ultra(text))
100
+ learned_edges = self.graph.ingest_triples_fast(triples, commit=False)
101
+ return {"learned_edges": learned_edges, "triple_count": len(triples)}
102
+
103
+ def capture_ultra_fast(self, lane: str, source: str, texts: list[str]) -> dict[str, int]:
104
+ units = [(lane, source, text.strip()) for text in texts if text.strip()]
105
+ captured = self.graph.ingest_raw_units(units, commit=False)
106
+ return {"captured_units": captured}
107
+
108
+ def digest_captured_units(
109
+ self,
110
+ *,
111
+ batch_size: int = 5000,
112
+ flush_every: int = 20000,
113
+ reason_rounds: int = 6,
114
+ purge_after_digest: bool = False,
115
+ progress_callback=None,
116
+ ) -> dict[str, int]:
117
+ total_units = self.graph.count_undigested_raw_units()
118
+ processed_units = 0
119
+ learned_edges = 0
120
+ derived_edges = 0
121
+ rule_count = 0
122
+ pending_units = 0
123
+ while True:
124
+ batch = self.graph.fetch_undigested_raw_units(limit=batch_size)
125
+ if not batch:
126
+ break
127
+ texts = [str(item["text"]) for item in batch if str(item["text"]).strip()]
128
+ if texts:
129
+ learned = self.learn_ultra_fast(texts)
130
+ learned_edges += int(learned["learned_edges"])
131
+ unit_ids = [int(item["unit_id"]) for item in batch]
132
+ self.graph.mark_raw_units_digested(unit_ids, commit=False)
133
+ processed_units += len(unit_ids)
134
+ pending_units += len(unit_ids)
135
+ if pending_units >= flush_every:
136
+ flushed = self.flush_learning(reason_rounds=2)
137
+ derived_edges += int(flushed["derived_edges"])
138
+ rule_count += int(flushed["rule_count"])
139
+ pending_units = 0
140
+ if progress_callback is not None:
141
+ progress_callback(
142
+ {
143
+ "processed_units": processed_units,
144
+ "total_units": total_units,
145
+ "learned_edges": learned_edges,
146
+ "derived_edges": derived_edges,
147
+ "rule_count": rule_count,
148
+ }
149
+ )
150
+ if pending_units:
151
+ flushed = self.flush_learning(reason_rounds=reason_rounds)
152
+ derived_edges += int(flushed["derived_edges"])
153
+ rule_count += int(flushed["rule_count"])
154
+ else:
155
+ self.graph.commit()
156
+ purged_units = 0
157
+ if purge_after_digest:
158
+ purged_units = self.graph.purge_digested_raw_units(commit=True)
159
+ return {
160
+ "processed_units": processed_units,
161
+ "total_units": total_units,
162
+ "learned_edges": learned_edges,
163
+ "derived_edges": derived_edges,
164
+ "rule_count": rule_count,
165
+ "purged_units": purged_units,
166
+ }
167
+
168
+ def flush_learning(
169
+ self,
170
+ *,
171
+ min_support: int = 2,
172
+ reason_rounds: int = 4,
173
+ induce_min_support: int | None = None,
174
+ ) -> dict[str, int]:
175
+ rules = self.abstraction.materialize_rules(min_support=min_support)
176
+ derived = self.reasoner.materialize(
177
+ max_rounds=reason_rounds,
178
+ induce_min_support=induce_min_support if induce_min_support is not None else min_support,
179
+ )
180
+ self.graph.commit()
181
+ return {"rule_count": len(rules), "derived_edges": len(derived)}
182
+
183
+ def learn_document(self, title: str, text: str) -> dict[str, object]:
184
+ selection = self.document_gate.select(title, text)
185
+ if not selection.accepted:
186
+ return {"learned_edges": [], "derived_edges": [], "rule_count": 0, "lexicon": [], "reason": selection.reason}
187
+ learned_edges: list[int] = []
188
+ derived_edges: list[int] = []
189
+ rule_count = 0
190
+ lexicon: list[dict[str, object]] = []
191
+ for unit in selection.units:
192
+ learned = self.learn_fast(unit)
193
+ learned_edges.extend(learned["learned_edges"])
194
+ lexicon.extend(learned["lexicon"])
195
+ flushed = self.flush_learning()
196
+ derived_count = int(flushed["derived_edges"])
197
+ if derived_count:
198
+ derived_edges = [0] * derived_count
199
+ rule_count = int(flushed["rule_count"])
200
+ return {
201
+ "learned_edges": learned_edges,
202
+ "derived_edges": derived_edges,
203
+ "rule_count": rule_count,
204
+ "lexicon": lexicon,
205
+ "reason": selection.reason,
206
+ "units": list(selection.units),
207
+ }
208
+
209
+ def learn_jsonl(self, path: str) -> dict[str, int]:
210
+ source = Path(path)
211
+ rows = 0
212
+ facts = 0
213
+ with source.open("r", encoding="utf-8") as handle:
214
+ for line in handle:
215
+ row = json.loads(line)
216
+ title = str(row.get("title", "")).strip()
217
+ text = str(row.get("text", "")).strip()
218
+ if title or text:
219
+ learned = self.learn_document(title, text)
220
+ if learned["learned_edges"]:
221
+ facts += 1
222
+ for field in ("memory", "facts", "fact"):
223
+ payload = row.get(field)
224
+ if isinstance(payload, str) and payload.strip():
225
+ self.learn(payload)
226
+ facts += 1
227
+ elif isinstance(payload, list):
228
+ for item in payload:
229
+ if isinstance(item, str) and item.strip():
230
+ self.learn(item)
231
+ facts += 1
232
+ rows += 1
233
+ return {"rows": rows, "facts": facts}
234
+
235
+ def ask(self, query: str) -> NativeResponse:
236
+ parts = self._split_query_parts(query)
237
+ if len(parts) > 1:
238
+ responses = [self.ask(part) for part in parts]
239
+ return NativeResponse(
240
+ answer=" | ".join(response.answer for response in responses),
241
+ text=" ".join(response.text for response in responses if response.text),
242
+ explanation=" ".join(response.explanation for response in responses if response.explanation),
243
+ proof=tuple(step for response in responses for step in response.proof),
244
+ reasoning=tuple(step for response in responses for step in response.reasoning),
245
+ mode="multi",
246
+ )
247
+ self.surface.advance_response_state()
248
+ result = self.query_engine.answer(query)
249
+ if result is None:
250
+ return NativeResponse(
251
+ answer="<unknown>",
252
+ text=self.surface.verbalize_result(query, None),
253
+ explanation=self.surface.explain_result(query, None),
254
+ proof=(),
255
+ reasoning=(),
256
+ mode="unknown",
257
+ )
258
+ return self._render(query, result)
259
+
260
+ def inspect(self, text: str) -> list[dict[str, object]]:
261
+ return self.codec.export_tokens(text)
262
+
263
+ def capacity(self) -> dict[str, int]:
264
+ if self.graph.db_path != ":memory:":
265
+ return StructuralCapacityMeter.from_sqlite(self.graph.db_path).to_metadata()
266
+
267
+ import sqlite3
268
+ import tempfile
269
+
270
+ with tempfile.TemporaryDirectory() as temp_dir:
271
+ temp_path = Path(temp_dir) / "graph.sqlite3"
272
+ destination = sqlite3.connect(str(temp_path))
273
+ try:
274
+ self.graph.conn.backup(destination)
275
+ finally:
276
+ destination.close()
277
+ return StructuralCapacityMeter.from_sqlite(temp_path).to_metadata()
278
+
279
+ def close(self) -> None:
280
+ self.graph.close()
281
+
282
+ def _render(self, query: str, result: QueryResult) -> NativeResponse:
283
+ return NativeResponse(
284
+ answer=result.answer,
285
+ text=self.surface.verbalize_result(query, result),
286
+ explanation=self.surface.explain_result(query, result),
287
+ proof=result.proof,
288
+ reasoning=result.reasoning,
289
+ mode=result.mode,
290
+ )
291
+
292
+ @staticmethod
293
+ def _split_query_parts(query: str) -> list[str]:
294
+ parts: list[str] = []
295
+ for part in re.split(
296
+ r"(?:\?\s+|\?\s*$|(?:\s+and\s+also\s+)|(?:\s+also\s+)|(?:\s*;\s*)|(?:\s+then\s+)|(?:\r?\n+))",
297
+ query,
298
+ ):
299
+ cleaned = part.strip()
300
+ cleaned = re.sub(r"^(?:also|and)\s+", "", cleaned, flags=re.IGNORECASE)
301
+ cleaned = re.sub(r"\s+", " ", cleaned).strip(" ?!.")
302
+ if cleaned:
303
+ parts.append(cleaned)
304
+ if len(parts) <= 1:
305
+ return [query.strip()]
306
+ return [part if part.endswith("?") else f"{part}?" for part in parts]
307
+
308
+
309
+ def parse_args() -> argparse.Namespace:
310
+ parser = argparse.ArgumentParser(description="Run the first real no-weight Aethon native base.")
311
+ parser.add_argument("--db-path", type=str, default=":memory:")
312
+ parser.add_argument("--learn", action="append", default=[])
313
+ parser.add_argument("--learn-jsonl", type=str, default="")
314
+ parser.add_argument("--ask", action="append", default=[])
315
+ parser.add_argument("--inspect", type=str, default="")
316
+ parser.add_argument("--capacity", action="store_true")
317
+ parser.add_argument("--save-bundle", type=str, default="")
318
+ parser.add_argument("--load-bundle", type=str, default="")
319
+ return parser.parse_args()
320
+
321
+
322
+ def main() -> None:
323
+ args = parse_args()
324
+ runtime = NativeBundleManager.load(args.load_bundle) if args.load_bundle else AethonNativeBase(db_path=args.db_path)
325
+ try:
326
+ for fact in args.learn:
327
+ learned = runtime.learn(fact)
328
+ print(f"learned_edges={len(learned['learned_edges'])} derived_edges={len(learned['derived_edges'])} rule_count={learned['rule_count']}")
329
+ if args.learn_jsonl:
330
+ stats = runtime.learn_jsonl(args.learn_jsonl)
331
+ print(f"learned_rows={stats['rows']} learned_facts={stats['facts']}")
332
+ if args.inspect:
333
+ print(json.dumps(runtime.inspect(args.inspect), indent=2))
334
+ if args.capacity:
335
+ print(json.dumps(runtime.capacity(), indent=2))
336
+ for query in args.ask:
337
+ response = runtime.ask(query)
338
+ print(f"Q: {query}")
339
+ print(f"A: {response.answer}")
340
+ print(f"mode: {response.mode}")
341
+ print(f"text: {response.text}")
342
+ print(f"explain: {response.explanation}")
343
+ if response.reasoning:
344
+ print("reasoning:")
345
+ for step in response.reasoning:
346
+ print(f" - {step}")
347
+ if response.proof:
348
+ print("proof:")
349
+ for step in response.proof:
350
+ print(f" - {step}")
351
+ if args.save_bundle:
352
+ bundle = NativeBundleManager.save(runtime, args.save_bundle)
353
+ print(f"bundle={bundle}")
354
+ finally:
355
+ runtime.close()
356
+
357
+
358
+ if __name__ == "__main__":
359
+ main()
runtime/aethon/rfi_semantics.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ import json
5
+ from pathlib import Path
6
+
7
+
8
+ @dataclass(frozen=True)
9
+ class SemanticEntry:
10
+ kind: str
11
+ symbol: str
12
+ meaning: str
13
+
14
+
15
+ class NativeSemanticLexicon:
16
+ """Loads semantic normalization units from native data files instead of burying them in code."""
17
+
18
+ DEFAULT_PATH = Path(__file__).resolve().parents[1] / "data" / "native" / "semantics" / "aethon_semantic_lexicon_v1.jsonl"
19
+
20
+ def __init__(self, path: str | Path | None = None, payload: list[dict[str, str]] | None = None) -> None:
21
+ self.path = Path(path) if path is not None else self.DEFAULT_PATH
22
+ self.entries = self._load_entries(payload)
23
+ self.emoji_map = {entry.symbol: entry.meaning for entry in self.entries if entry.kind == "emoji"}
24
+ self.abbreviation_map = {entry.symbol.lower(): entry.meaning for entry in self.entries if entry.kind == "abbreviation"}
25
+ self.alias_map = {
26
+ entry.symbol.lower(): entry.meaning
27
+ for entry in self.entries
28
+ if entry.kind in {"abbreviation", "semantic_alias", "multilingual_alias"}
29
+ }
30
+ self.phrase_alias_map = {
31
+ entry.symbol.lower(): entry.meaning
32
+ for entry in self.entries
33
+ if entry.kind in {"phrase_alias", "multilingual_phrase"}
34
+ }
35
+ self.typo_map = {entry.symbol.lower(): entry.meaning for entry in self.entries if entry.kind == "typo"}
36
+ self.relation_prepositions = {
37
+ entry.symbol.lower()
38
+ for entry in self.entries
39
+ if entry.kind == "relation_preposition"
40
+ }
41
+ self.query_fillers = {
42
+ entry.symbol.lower()
43
+ for entry in self.entries
44
+ if entry.kind == "query_filler"
45
+ }
46
+
47
+ def _load_entries(self, payload: list[dict[str, str]] | None) -> tuple[SemanticEntry, ...]:
48
+ if payload is not None:
49
+ return tuple(self._entry_from_row(row) for row in payload)
50
+ if not self.path.exists():
51
+ return ()
52
+ entries: list[SemanticEntry] = []
53
+ with self.path.open("r", encoding="utf-8") as handle:
54
+ for line in handle:
55
+ entries.append(self._entry_from_row(json.loads(line)))
56
+ return tuple(entries)
57
+
58
+ @staticmethod
59
+ def _entry_from_row(row: dict[str, str]) -> SemanticEntry:
60
+ return SemanticEntry(
61
+ kind=str(row["kind"]).strip(),
62
+ symbol=str(row["symbol"]).strip(),
63
+ meaning=str(row["meaning"]).strip(),
64
+ )
65
+
66
+ def to_payload(self) -> list[dict[str, str]]:
67
+ return [
68
+ {
69
+ "kind": entry.kind,
70
+ "symbol": entry.symbol,
71
+ "meaning": entry.meaning,
72
+ }
73
+ for entry in self.entries
74
+ ]
75
+
76
+ def describe_emojis(self, text: str) -> list[tuple[str, str]]:
77
+ seen: list[tuple[str, str]] = []
78
+ for emoji, meaning in self.emoji_map.items():
79
+ if emoji in text and (emoji, meaning) not in seen:
80
+ seen.append((emoji, meaning))
81
+ return seen
runtime/aethon/rfi_surface.py ADDED
@@ -0,0 +1,546 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from .rfi_graph import RelationalGraphStore
4
+ from .rfi_query import QueryResult
5
+ from .rfi_surface_lexicon import NativeSurfaceLexicon
6
+
7
+
8
+ class GraphVerbalizer:
9
+ """Turns proof-backed query results into direct language without weight-based decoding."""
10
+
11
+ def __init__(
12
+ self,
13
+ surface_lexicon_path: str | None = None,
14
+ surface_lexicon_payload: dict | None = None,
15
+ graph: RelationalGraphStore | None = None,
16
+ ) -> None:
17
+ self.lexicon = NativeSurfaceLexicon(surface_lexicon_path, surface_lexicon_payload)
18
+ self.graph = graph
19
+ self.response_index = 0
20
+ self.response_history: dict[str, list[str]] = {}
21
+
22
+ def advance_response_state(self) -> None:
23
+ self.response_index += 1
24
+
25
+ def verbalize_result(self, query: str, result: QueryResult | None) -> str:
26
+ if result is None:
27
+ return f"I cannot ground an answer to '{query}' from what I currently know."
28
+ history_key = f"text::{query}::{result.mode}::{result.answer}"
29
+ if result.mode == "direct":
30
+ if result.proof and result.proof[0].startswith("emoji:"):
31
+ return f"The emoji you used is {self._humanize(result.answer)}."
32
+ supports = self._proof_sentences(result)
33
+ if supports:
34
+ return self._compose_support_paragraph(query + result.answer, supports, self._humanize(result.answer))
35
+ return self._sentence(self._humanize(result.answer))
36
+ if result.mode == "derived":
37
+ if result.proof and result.proof[0].startswith("math:"):
38
+ candidates = (
39
+ f"I computed the exact answer as {self._humanize(result.answer)}.",
40
+ f"The exact result is {self._humanize(result.answer)}. I reached it by direct computation.",
41
+ f"I evaluated the arithmetic structure and got {self._humanize(result.answer)}.",
42
+ f"My computation resolves to {self._humanize(result.answer)}.",
43
+ f"Working through the arithmetic gives {self._humanize(result.answer)}.",
44
+ f"I solve the structure exactly and end at {self._humanize(result.answer)}.",
45
+ )
46
+ return self._choose_novel(history_key, candidates)
47
+ supports = self._proof_sentences(result)
48
+ if supports:
49
+ return self._compose_support_paragraph(query + result.answer, supports, self._humanize(result.answer))
50
+ candidates = (
51
+ f"I derive the answer as {self._humanize(result.answer)} from stored relations.",
52
+ f"My structural reasoning leads me to {self._humanize(result.answer)}.",
53
+ f"I do not retrieve {self._humanize(result.answer)} directly; I derive it from linked relations.",
54
+ )
55
+ return self._choose_novel(history_key, candidates)
56
+ if result.mode == "composed":
57
+ subject = self._subject_from_query(query)
58
+ if subject is not None:
59
+ supports = [self._proof_line_to_sentence(step) for step in result.proof if self._proof_line_to_sentence(step)]
60
+ if supports:
61
+ return self._compose_support_paragraph(query + result.answer, supports, self._humanize(result.answer))
62
+ candidates = (
63
+ f"I connect the stored facts to answer with {self._humanize(result.answer)}.",
64
+ f"My answer is {self._humanize(result.answer)} because several linked facts converge on it.",
65
+ f"I compose multiple facts and end at {self._humanize(result.answer)}.",
66
+ )
67
+ return self._choose_novel(history_key, candidates)
68
+ if result.mode == "path":
69
+ supports = self._proof_sentences(result)
70
+ if supports:
71
+ return self._compose_support_paragraph(query + result.answer, supports, self._humanize(self._path_text(result.answer)))
72
+ path = self._humanize(self._path_text(result.answer))
73
+ return self._choose_novel(
74
+ history_key,
75
+ (
76
+ f"I connect them through {path}.",
77
+ f"The path between them runs through {path}.",
78
+ f"My reasoning links them by way of {path}.",
79
+ ),
80
+ )
81
+ if result.mode == "plan":
82
+ supports = self._proof_sentences(result)
83
+ if supports:
84
+ return self._compose_support_paragraph(query + result.answer + "::plan", supports, self._humanize(result.answer))
85
+ return self._choose_novel(
86
+ history_key,
87
+ (
88
+ f"The next grounded step is {self._humanize(result.answer)}.",
89
+ f"My planning chain points to {self._humanize(result.answer)} first.",
90
+ f"The structure says {self._humanize(result.answer)} should happen next.",
91
+ ),
92
+ )
93
+ if result.mode == "story":
94
+ supports = self._proof_sentences(result)
95
+ if supports:
96
+ return self._compose_story_paragraph(query + result.answer + "::story", supports)
97
+ return self._choose_novel(
98
+ history_key,
99
+ (
100
+ f"I can ground a story around {self._humanize(result.answer)} from the graph, but I need more concrete events to tell it richly.",
101
+ f"The graph mentions {self._humanize(result.answer)}, though I would need more events to narrate a fuller story.",
102
+ ),
103
+ )
104
+ if result.mode == "revision":
105
+ relation, transition = result.answer.split(":", 1)
106
+ previous_value, new_value = transition.split("->", 1)
107
+ return (
108
+ f"The stored {relation.replace('_', ' ')} changed from "
109
+ f"{self._humanize(previous_value)} to {self._humanize(new_value)}."
110
+ )
111
+ if result.mode == "contradiction":
112
+ return "Yes. The graph contains a revision or contradiction for that subject."
113
+ if result.mode == "abstract":
114
+ return self._compose_abstract_answer(result)
115
+ if result.mode == "ontology":
116
+ return f"{self._humanize(result.answer)}."
117
+ return f"{self._humanize(result.answer)}."
118
+
119
+ def verbalize_edge(self, subject: str, relation: str, object_value: str) -> str:
120
+ if subject == "aethon":
121
+ return self._compose_self_sentence(relation, object_value)
122
+ return self._compose_relation_sentence(subject, relation, object_value)
123
+
124
+ def verbalize_proof(self, result: QueryResult | None) -> str:
125
+ if result is None or not result.proof:
126
+ return "No proof trace is available."
127
+ lines = ["Proof trace:"]
128
+ for step in result.proof:
129
+ lines.append(f"- {step}")
130
+ return "\n".join(lines)
131
+
132
+ def verbalize_reasoning(self, result: QueryResult | None) -> str:
133
+ if result is None or not result.reasoning:
134
+ return "No reasoning steps are available."
135
+ intros = (
136
+ "I reason through the graph like this:",
137
+ "My reasoning chain is:",
138
+ "I connect the stored facts in this order:",
139
+ )
140
+ intro = intros[self.response_index % len(intros)]
141
+ lines = [intro]
142
+ for step in result.reasoning:
143
+ lines.append(f"- {step}")
144
+ return "\n".join(lines)
145
+
146
+ def explain_result(self, query: str, result: QueryResult | None) -> str:
147
+ if result is None:
148
+ return f"I do not have enough grounded structure yet to answer '{query}'."
149
+ edge = self._first_edge(result)
150
+ history_key = f"explain::{query}::{result.mode}::{result.answer}"
151
+ if result.mode in {"direct", "ontology"}:
152
+ if result.proof and result.proof[0].startswith("emoji:"):
153
+ return f"I identified {self._humanize(result.answer)} directly from the symbols in your message."
154
+ supports = self._proof_sentences(result)
155
+ if supports:
156
+ return self._compose_support_paragraph(query + result.answer + "::explain", supports, self._humanize(result.answer))
157
+ if edge is None:
158
+ return self.verbalize_result(query, result)
159
+ return self.verbalize_result(query, result)
160
+ if result.mode == "derived":
161
+ if not result.proof:
162
+ return self.verbalize_result(query, result)
163
+ proof = result.proof[0]
164
+ if proof.startswith("math:"):
165
+ return self._choose_novel(
166
+ history_key,
167
+ (
168
+ f"I reached {self._humanize(result.answer)} by evaluating the arithmetic structure directly instead of guessing.",
169
+ f"The result {self._humanize(result.answer)} comes from exact computation over the expression.",
170
+ f"I solved the expression structurally, which yields {self._humanize(result.answer)}.",
171
+ ),
172
+ )
173
+ if proof.startswith("derived:"):
174
+ supports = self._derived_support_lines(proof)
175
+ if supports:
176
+ return self._compose_support_paragraph(
177
+ query + result.answer + proof,
178
+ supports,
179
+ self._humanize(result.answer),
180
+ )
181
+ detail = proof.split("|", 1)[-1]
182
+ return (
183
+ f"The answer is {self._humanize(result.answer)} because Aethon derived a new fact from "
184
+ f"earlier stored relations: {self._humanize(detail)}."
185
+ )
186
+ return self.verbalize_result(query, result)
187
+ if result.mode == "composed":
188
+ supports = [self._proof_line_to_sentence(step) for step in result.proof if self._proof_line_to_sentence(step)]
189
+ if supports:
190
+ return self._compose_support_paragraph(
191
+ query + result.answer + "".join(result.proof),
192
+ supports,
193
+ self._humanize(result.answer),
194
+ )
195
+ return f"The answer is {self._humanize(result.answer)} because multiple linked facts point to it."
196
+ if result.mode == "path":
197
+ supports = self._proof_sentences(result)
198
+ if supports:
199
+ return self._compose_support_paragraph(query + result.answer + "::path", supports, self._humanize(self._path_text(result.answer)))
200
+ if not result.proof:
201
+ return self.verbalize_result(query, result)
202
+ relation_chain = " then ".join(self._humanize(self._extract_relation(step) or step) for step in result.proof)
203
+ return f"I connect the concepts by following {relation_chain}."
204
+ if result.mode == "plan":
205
+ supports = self._proof_sentences(result)
206
+ if supports:
207
+ return self._compose_support_paragraph(query + result.answer + "::plan_explain", supports, self._humanize(result.answer))
208
+ return f"The planning structure points to {self._humanize(result.answer)} as the next grounded step."
209
+ if result.mode == "story":
210
+ supports = self._proof_sentences(result)
211
+ if supports:
212
+ return self._compose_story_paragraph(query + result.answer + "::story_explain", supports)
213
+ return f"I need more concrete events about {self._humanize(result.answer)} before I can extend the story further."
214
+ if result.mode == "abstract":
215
+ return self._compose_abstract_explanation(result)
216
+ if result.mode == "revision":
217
+ return self.verbalize_result(query, result)
218
+ if result.mode == "contradiction":
219
+ return self.verbalize_result(query, result)
220
+ return self.verbalize_result(query, result)
221
+
222
+ @staticmethod
223
+ def _plain_humanize(text: str) -> str:
224
+ return text.replace("_", " ")
225
+
226
+ def _humanize(self, text: str) -> str:
227
+ if self.graph is not None and text and all(char.islower() or char.isdigit() or char == "_" for char in text):
228
+ return self.graph.get_display_name(text)
229
+ return self._plain_humanize(text)
230
+
231
+ def _sentence(self, text: str) -> str:
232
+ cleaned = text.strip()
233
+ if not cleaned:
234
+ return ""
235
+ cleaned = cleaned[0].upper() + cleaned[1:]
236
+ if cleaned[-1] not in ".!?":
237
+ cleaned += "."
238
+ return cleaned
239
+
240
+ @staticmethod
241
+ def _lower_sentence(text: str) -> str:
242
+ cleaned = text.strip()
243
+ if not cleaned:
244
+ return ""
245
+ if cleaned[-1] in ".!?":
246
+ cleaned = cleaned[:-1]
247
+ lowered = cleaned[0].lower() + cleaned[1:] if cleaned else ""
248
+ words = lowered.split()
249
+ return " ".join("I" if word == "i" else word for word in words)
250
+
251
+ def _first_edge(self, result: QueryResult | None) -> tuple[str, str, str] | None:
252
+ if result is None:
253
+ return None
254
+ for step in result.proof:
255
+ parsed = self._parse_edge(step)
256
+ if parsed is not None:
257
+ return parsed
258
+ return None
259
+
260
+ def _parse_edge(self, proof_line: str) -> tuple[str, str, str] | None:
261
+ if "-[" not in proof_line or "]->" not in proof_line:
262
+ return None
263
+ payload = proof_line.split("|", 1)[0]
264
+ _, edge = payload.split(":", 1)
265
+ subject, rest = edge.split("-[", 1)
266
+ relation, object_value = rest.split("]->", 1)
267
+ return subject, relation, object_value
268
+
269
+ def _proof_line_to_sentence(self, proof_line: str) -> str | None:
270
+ parsed = self._parse_edge(proof_line)
271
+ if parsed is not None:
272
+ subject, relation, object_value = parsed
273
+ return self.verbalize_edge(subject, relation, object_value)
274
+ if proof_line.startswith("compose:"):
275
+ return "Aethon combined linked facts to reach the final answer"
276
+ if proof_line.startswith("ontology:"):
277
+ payload = proof_line.split(":", 1)[1]
278
+ subject, _, object_value = payload.partition("->is_a->")
279
+ return f"{self._humanize(subject)} belongs to the class {self._humanize(object_value)}"
280
+ return None
281
+
282
+ def _derived_support_lines(self, proof_line: str) -> list[str]:
283
+ parts = proof_line.split("|")
284
+ supports: list[str] = []
285
+ seen: set[str] = set()
286
+ for part in parts[1:]:
287
+ parsed = self._parse_edge(f"derived:{part}") if "-[" in part and "]->" in part else None
288
+ if parsed is not None:
289
+ subject, relation, object_value = parsed
290
+ sentence = self.verbalize_edge(subject, relation, object_value)
291
+ if sentence not in seen:
292
+ supports.append(sentence)
293
+ seen.add(sentence)
294
+ return supports
295
+
296
+ def _proof_sentences(self, result: QueryResult) -> list[str]:
297
+ supports: list[str] = []
298
+ seen: set[str] = set()
299
+ for step in result.proof:
300
+ if step.startswith("derived:"):
301
+ for line in self._derived_support_lines(step):
302
+ if line not in seen:
303
+ supports.append(line)
304
+ seen.add(line)
305
+ continue
306
+ sentence = self._proof_line_to_sentence(step)
307
+ if sentence and sentence not in seen:
308
+ supports.append(sentence)
309
+ seen.add(sentence)
310
+ return supports
311
+
312
+ def _extract_relation(self, proof_line: str) -> str | None:
313
+ parsed = self._parse_edge(proof_line)
314
+ if parsed is None:
315
+ return None
316
+ return parsed[1]
317
+
318
+ def _path_text(self, relation_chain: str) -> str:
319
+ pieces = [self._humanize(piece.strip()) for piece in relation_chain.split("->")]
320
+ if not pieces:
321
+ return relation_chain
322
+ if len(pieces) == 1:
323
+ return pieces[0]
324
+ return ", then ".join(pieces)
325
+
326
+ def _subject_from_query(self, query: str) -> str | None:
327
+ lowered = query.strip().rstrip(" ?")
328
+ words = lowered.split()
329
+ if len(words) >= 3 and words[:2] == ["where", "is"]:
330
+ return " ".join(words[2:])
331
+ if len(words) >= 4 and words[:2] == ["where", "does"]:
332
+ return words[2]
333
+ if len(words) >= 4 and words[:2] == ["what", "does"]:
334
+ return words[2]
335
+ if len(words) >= 4 and words[:2] == ["what", "did"]:
336
+ return words[2]
337
+ if len(words) >= 4 and words[:2] == ["what", "is"] and words[-1] == "carrying":
338
+ return " ".join(words[2:-1])
339
+ if len(words) >= 5 and words[:2] == ["which", "animal"] and "watch" in words:
340
+ watch_index = words.index("watch")
341
+ return " ".join(words[3:watch_index]) if watch_index > 3 else None
342
+ return None
343
+
344
+ def _compose_support_paragraph(self, seed: str, supports: list[str], answer: str) -> str:
345
+ cleaned = [self._sentence(line) for line in supports if line.strip()]
346
+ if not cleaned:
347
+ summaries = tuple(variant.format(answer=answer) for variant in self.lexicon.support_summary_variants)
348
+ if summaries:
349
+ return self._choose_novel(f"support::{seed}::{answer}", summaries)
350
+ return f"So I answer {answer}."
351
+ summaries = tuple(variant.format(answer=answer) for variant in self.lexicon.support_summary_variants)
352
+ candidates: list[str] = []
353
+ connectors = self.lexicon.support_connectors or ("Then",)
354
+ first_sentence = cleaned[0]
355
+ lowered_first = self._lower_sentence(first_sentence)
356
+ openings = (
357
+ first_sentence,
358
+ f"I start from this fact: {lowered_first}.",
359
+ f"The graph first gives me this: {lowered_first}.",
360
+ f"One grounded fact is that {lowered_first}.",
361
+ f"I begin with {lowered_first}.",
362
+ f"My first support fact is that {lowered_first}.",
363
+ )
364
+ reflections = (
365
+ f"Across that proof, the same answer keeps surfacing: {answer}.",
366
+ f"The linked structure stays consistent all the way to {answer}.",
367
+ f"Nothing in that chain breaks the answer {answer}.",
368
+ f"Those connected facts keep reinforcing {answer}.",
369
+ )
370
+ for offset in range(min(max(len(connectors), 1), 6)):
371
+ for opening in openings:
372
+ pieces = [opening]
373
+ for index, line in enumerate(cleaned[1:], start=1):
374
+ connector = connectors[(offset + index - 1) % len(connectors)]
375
+ pieces.append(f"{connector}, {self._lower_sentence(line)}.")
376
+ reflection = reflections[(offset + len(opening)) % len(reflections)]
377
+ pieces.append(reflection)
378
+ summary = summaries[(offset + len(candidates)) % len(summaries)] if summaries else f"So I answer {answer}."
379
+ pieces.append(summary)
380
+ candidates.append(" ".join(pieces))
381
+ return self._choose_novel(f"support::{seed}::{answer}", tuple(candidates))
382
+
383
+ def _compose_story_paragraph(self, seed: str, supports: list[str]) -> str:
384
+ cleaned = [self._sentence(line) for line in supports if line.strip()]
385
+ if not cleaned:
386
+ return ""
387
+ intros = (
388
+ "Here is the grounded story I can tell.",
389
+ "The graph lets me tell the story this way.",
390
+ "I can narrate the stored story like this.",
391
+ )
392
+ closers = (
393
+ "That is the grounded thread I can recover from what Aethon remembers.",
394
+ "Those events stay connected in memory, so the story holds together.",
395
+ "That sequence is how the stored events unfold in Aethon's graph.",
396
+ )
397
+ intro = self._choose_novel(f"story-intro::{seed}", intros)
398
+ closer = self._choose_novel(f"story-close::{seed}", closers)
399
+ body = " ".join(cleaned)
400
+ return f"{intro} {body} {closer}"
401
+
402
+ def _compose_abstract_answer(self, result: QueryResult) -> str:
403
+ explanation = self._compose_abstract_explanation(result)
404
+ if explanation:
405
+ return explanation
406
+ return f"I generalize the answer as {self._humanize(result.answer)}."
407
+
408
+ def _compose_abstract_explanation(self, result: QueryResult) -> str:
409
+ if not result.proof:
410
+ return f"I generalize the answer as {self._humanize(result.answer)}."
411
+ pieces: list[str] = []
412
+ seen: set[str] = set()
413
+ for step in result.proof:
414
+ sentence = self._abstract_proof_to_sentence(step)
415
+ if sentence and sentence not in seen:
416
+ pieces.append(sentence)
417
+ seen.add(sentence)
418
+ if not pieces:
419
+ return f"I generalize the answer as {self._humanize(result.answer)}."
420
+ closers = (
421
+ f"So I answer {self._humanize(result.answer)}.",
422
+ f"That is why I generalize the answer as {self._humanize(result.answer)}.",
423
+ f"So my generalized answer is {self._humanize(result.answer)}.",
424
+ f"That chain lets me answer with {self._humanize(result.answer)}.",
425
+ )
426
+ candidates = []
427
+ for offset in range(min(max(len(closers), 1), 6)):
428
+ ordered = list(pieces[offset % len(pieces):]) + list(pieces[:offset % len(pieces)])
429
+ rendered = " ".join(self._sentence(piece) for piece in ordered)
430
+ candidates.append(f"{rendered} {closers[offset % len(closers)]}")
431
+ return self._choose_novel(f"abstract::{result.answer}::{''.join(result.proof)}", tuple(candidates))
432
+
433
+ def _abstract_proof_to_sentence(self, proof_line: str) -> str | None:
434
+ if proof_line.startswith("abstract:"):
435
+ parsed = self._parse_edge(proof_line)
436
+ if parsed is None:
437
+ return None
438
+ subject, relation, object_value = parsed
439
+ return f"I know that {self._humanize(subject)} {relation.replace('_', ' ')} {self._humanize(object_value)}"
440
+ if proof_line.startswith("subject:") or proof_line.startswith("object:"):
441
+ payload = proof_line.split(":", 1)[1]
442
+ concept, _, parent = payload.partition("->is_a->")
443
+ if concept and parent:
444
+ return f"{self._humanize(concept)} belongs to {self._humanize(parent)}"
445
+ if proof_line.startswith("ontology:"):
446
+ payload = proof_line.split(":", 1)[1]
447
+ concept, _, parent = payload.partition("->is_a->")
448
+ if concept and parent:
449
+ return f"{self._humanize(concept)} belongs to {self._humanize(parent)}"
450
+ return None
451
+
452
+ @staticmethod
453
+ def _choose(seed: str, options: tuple[str, ...]) -> str:
454
+ if not options:
455
+ return ""
456
+ return options[sum(ord(char) for char in seed) % len(options)]
457
+
458
+ def _choose_novel(self, key: str, candidates: tuple[str, ...]) -> str:
459
+ if not candidates:
460
+ return ""
461
+ history = self.response_history.setdefault(key, [])
462
+ for candidate in candidates:
463
+ if candidate not in history:
464
+ history.append(candidate)
465
+ if len(history) > 24:
466
+ del history[:-24]
467
+ return candidate
468
+ choice = candidates[self.response_index % len(candidates)]
469
+ history.append(choice)
470
+ if len(history) > 24:
471
+ del history[:-24]
472
+ return choice
473
+
474
+ def _compose_relation_sentence(self, subject: str, relation: str, object_value: str) -> str:
475
+ subject_text = self._humanize(subject)
476
+ object_text = self._humanize(object_value)
477
+ tokens = relation.split("_")
478
+ head = tokens[0] if tokens else relation
479
+ tail = " ".join(tokens[1:])
480
+
481
+ if relation in {"is_a", "be"}:
482
+ return f"{subject_text} is {self._article(object_text)} {object_text}"
483
+ if relation.endswith("_in"):
484
+ verb = head if head not in {"located", "lives", "work"} else {
485
+ "located": "is located",
486
+ "lives": "lives",
487
+ "work": "works",
488
+ }.get(head, head)
489
+ return f"{subject_text} {verb} in {object_text}"
490
+ if relation in {"return", "return_value"}:
491
+ return f"{subject_text} returns {object_text}"
492
+ if relation == "depend_on":
493
+ return f"{subject_text} depends on {object_text}"
494
+ if relation == "prefer":
495
+ return f"{subject_text} prefers {object_text}"
496
+ if relation == "not_like_anymore":
497
+ return f"{subject_text} no longer likes {object_text}"
498
+ if relation == "contains":
499
+ return f"{subject_text} contains {object_text}"
500
+ if relation == "home_of":
501
+ return f"{subject_text} is the home of {object_text}"
502
+ if relation == "purchase_site_of":
503
+ return f"{subject_text} is where {object_text} was bought"
504
+ if relation == "chased_by":
505
+ return f"{subject_text} is chased by {object_text}"
506
+ if relation == "attacked_by":
507
+ return f"{subject_text} is attacked by {object_text}"
508
+ if relation == "watched_by":
509
+ return f"{subject_text} is watched by {object_text}"
510
+ if relation == "kept_by":
511
+ return f"{subject_text} is kept by {object_text}"
512
+ if relation == "carried_by":
513
+ return f"{subject_text} is carried by {object_text}"
514
+ if tail:
515
+ return f"{subject_text} {head} {tail} {object_text}"
516
+ return f"{subject_text} {head}s {object_text}" if not head.endswith("s") else f"{subject_text} {head} {object_text}"
517
+
518
+ def _compose_self_sentence(self, relation: str, object_value: str) -> str:
519
+ object_text = self._humanize(object_value)
520
+ if relation in {"is_a", "be"}:
521
+ return f"I am {self._article(object_text)} {object_text}"
522
+ if relation == "use":
523
+ return f"I use {object_text}"
524
+ if relation == "report":
525
+ return f"I report {object_text}"
526
+ if relation == "depend_on":
527
+ return f"I depend on {object_text}"
528
+ if relation == "prefer":
529
+ return f"I prefer {object_text}"
530
+ if relation == "like":
531
+ return f"I like {object_text}"
532
+ if relation == "equals":
533
+ return f"My value is {object_text}"
534
+ if relation.endswith("_in"):
535
+ head = relation.split("_", 1)[0]
536
+ return f"I {head} in {object_text}"
537
+ tokens = relation.split("_")
538
+ if len(tokens) > 1:
539
+ return f"I {' '.join(tokens)} {object_text}"
540
+ return f"I {relation} {object_text}"
541
+
542
+ @staticmethod
543
+ def _article(text: str) -> str:
544
+ if not text:
545
+ return "a"
546
+ return "an" if text[0].lower() in {"a", "e", "i", "o", "u"} else "a"
runtime/aethon/rfi_surface_lexicon.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+
6
+
7
+ class NativeSurfaceLexicon:
8
+ """Loads Aethon surface templates and phrasing variants from native data."""
9
+
10
+ DEFAULT_PATH = Path(__file__).resolve().parents[1] / "data" / "native" / "surface" / "aethon_surface_lexicon_v1.json"
11
+
12
+ def __init__(self, path: str | Path | None = None, payload: dict | None = None) -> None:
13
+ self.path = Path(path) if path is not None else self.DEFAULT_PATH
14
+ self.payload = self._load_payload(payload)
15
+ self.relation_templates: dict[str, str] = self.payload.get("relation_templates", {})
16
+ self.self_relation_templates: dict[str, str] = self.payload.get("self_relation_templates", {})
17
+ self.direct_fact_variants: tuple[str, ...] = tuple(self.payload.get("direct_fact_variants", ()))
18
+ self.direct_self_variants: tuple[str, ...] = tuple(self.payload.get("direct_self_variants", ()))
19
+ self.direct_emoji_variants: tuple[str, ...] = tuple(self.payload.get("direct_emoji_variants", ()))
20
+ self.direct_unknown_variants: tuple[str, ...] = tuple(self.payload.get("direct_unknown_variants", ()))
21
+ self.derived_math_variants: tuple[str, ...] = tuple(self.payload.get("derived_math_variants", ()))
22
+ self.derived_fact_variants: tuple[str, ...] = tuple(self.payload.get("derived_fact_variants", ()))
23
+ self.derived_fallback_variants: tuple[str, ...] = tuple(self.payload.get("derived_fallback_variants", ()))
24
+ self.composed_variants: tuple[str, ...] = tuple(self.payload.get("composed_variants", ()))
25
+ self.path_variants: tuple[str, ...] = tuple(self.payload.get("path_variants", ()))
26
+ self.abstract_variants: tuple[str, ...] = tuple(self.payload.get("abstract_variants", ()))
27
+ self.explain_unknown_variants: tuple[str, ...] = tuple(self.payload.get("explain_unknown_variants", ()))
28
+ self.explain_emoji_variants: tuple[str, ...] = tuple(self.payload.get("explain_emoji_variants", ()))
29
+ self.explain_math_variants: tuple[str, ...] = tuple(self.payload.get("explain_math_variants", ()))
30
+ self.support_connectors: tuple[str, ...] = tuple(self.payload.get("support_connectors", ()))
31
+ self.support_summary_variants: tuple[str, ...] = tuple(self.payload.get("support_summary_variants", ()))
32
+ self.path_explain_variants: tuple[str, ...] = tuple(self.payload.get("path_explain_variants", ()))
33
+ self.reasoning_intro_variants: tuple[str, ...] = tuple(self.payload.get("reasoning_intro_variants", ()))
34
+
35
+ def _load_payload(self, payload: dict | None) -> dict:
36
+ if payload is not None:
37
+ return payload
38
+ if not self.path.exists():
39
+ return {}
40
+ return json.loads(self.path.read_text(encoding="utf-8"))
41
+
42
+ def to_payload(self) -> dict:
43
+ return dict(self.payload)