FerrellSyntheticIntelligence commited on
Commit ·
3746aeb
1
Parent(s): 28ae43b
feat: hard-sync reasoning modules
Browse files- concept_graph.py +31 -0
- high_precision.py +21 -0
- science_reasoner.py +16 -0
concept_graph.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json, os, numpy as np, faiss
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
from typing import Dict, List, Tuple, Any
|
| 4 |
+
|
| 5 |
+
class ConceptNode:
|
| 6 |
+
def __init__(self, cid, label, embedding, confidence, edges=None):
|
| 7 |
+
self.cid, self.label, self.embedding, self.confidence, self.edges = cid, label, embedding, confidence, edges or []
|
| 8 |
+
def to_dict(self):
|
| 9 |
+
return {"cid": self.cid, "label": self.label, "embedding": self.embedding.tolist(), "confidence": self.confidence, "edges": self.edges}
|
| 10 |
+
@staticmethod
|
| 11 |
+
def from_dict(d):
|
| 12 |
+
return ConceptNode(int(d["cid"]), str(d["label"]), np.array(d["embedding"], dtype=np.float32), float(d["confidence"]), [tuple(e) for e in d.get("edges", [])])
|
| 13 |
+
|
| 14 |
+
class ConceptGraph:
|
| 15 |
+
def __init__(self, dim=768, persist_dir="data/concept_graph"):
|
| 16 |
+
self.dim, self.persist_dir = dim, Path(persist_dir)
|
| 17 |
+
self.persist_dir.mkdir(parents=True, exist_ok=True)
|
| 18 |
+
self.index = faiss.IndexFlatL2(dim)
|
| 19 |
+
self._nodes: Dict[int, ConceptNode] = {}
|
| 20 |
+
def add_node(self, label, embedding, confidence, edges=None):
|
| 21 |
+
vec = embedding.astype(np.float32)
|
| 22 |
+
vec /= np.linalg.norm(vec)
|
| 23 |
+
self.index.add(np.expand_dims(vec, 0))
|
| 24 |
+
cid = self.index.ntotal - 1
|
| 25 |
+
node = ConceptNode(cid, label, vec, confidence, edges)
|
| 26 |
+
self._nodes[cid] = node
|
| 27 |
+
return cid
|
| 28 |
+
def persist(self):
|
| 29 |
+
with (self.persist_dir / "concepts.json").open("w") as f:
|
| 30 |
+
json.dump([n.to_dict() for n in self._nodes.values()], f, indent=2)
|
| 31 |
+
faiss.write_index(self.index, str(self.persist_dir / "faiss.index"))
|
high_precision.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import math
|
| 3 |
+
from decimal import Decimal, getcontext, localcontext
|
| 4 |
+
from typing import Union
|
| 5 |
+
getcontext().prec = 60
|
| 6 |
+
Number = Union[float, Decimal]
|
| 7 |
+
def to_decimal(x: Number) -> Decimal:
|
| 8 |
+
return x if isinstance(x, Decimal) else Decimal(str(x))
|
| 9 |
+
def sqrt(x: Number) -> Decimal:
|
| 10 |
+
with localcontext() as ctx:
|
| 11 |
+
ctx.prec = getcontext().prec
|
| 12 |
+
return to_decimal(x).sqrt()
|
| 13 |
+
def exp(x: Number) -> Decimal:
|
| 14 |
+
with localcontext() as ctx:
|
| 15 |
+
ctx.prec = getcontext().prec
|
| 16 |
+
return to_decimal(x).exp()
|
| 17 |
+
def log(x: Number, base: Number = math.e) -> Decimal:
|
| 18 |
+
with localcontext() as ctx:
|
| 19 |
+
ctx.prec = getcontext().prec
|
| 20 |
+
d = to_decimal(x).ln()
|
| 21 |
+
return d / to_decimal(base).ln() if base != math.e else d
|
science_reasoner.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np, sympy as sp
|
| 2 |
+
class ScienceReasoner:
|
| 3 |
+
def __init__(self, graph): self.graph = graph
|
| 4 |
+
def infer(self, propositions, steps, max_depth=10):
|
| 5 |
+
premise_cids = [self.graph.add_node(p.text, p.embedding, p.confidence) for p in propositions]
|
| 6 |
+
current_cids, depth = premise_cids, 0
|
| 7 |
+
while depth < max_depth:
|
| 8 |
+
node_a = self.graph.get_node(current_cids[0])
|
| 9 |
+
node_b = self.graph.get_node(current_cids[1] if len(current_cids)>1 else current_cids[0])
|
| 10 |
+
new_conf = node_a.confidence * node_b.confidence
|
| 11 |
+
label = f"({node_a.label} AND {node_b.label})"
|
| 12 |
+
embed = (node_a.embedding + node_b.embedding) / 2.0
|
| 13 |
+
last_cid = self.graph.add_node(label, embed / np.linalg.norm(embed), new_conf)
|
| 14 |
+
current_cids = [last_cid] + current_cids
|
| 15 |
+
depth += 1
|
| 16 |
+
return self.graph.get_node(last_cid)
|