Spaces:
Build error
Build error
File size: 2,256 Bytes
1004967 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | """
Convert a raw triples JSON cache (e.g. alex_rivera_raw.json) to a
NetworkX MultiDiGraph and save it as a .gpickle file.
Usage:
python scripts/convert_raw_triples.py --persona alex_rivera
No LLM or langchain dependencies — only needs networkx.
"""
import argparse
import json
import pickle
from pathlib import Path
import networkx as nx
def build_graph(triples: list[dict]) -> nx.MultiDiGraph:
"""
Build a MultiDiGraph from a list of {subject, relation, object, confidence} dicts.
Deduplicates exact (s, r, o) triplets, keeping the highest-confidence instance.
"""
best: dict[tuple, dict] = {}
for t in triples:
key = (t["subject"], t["relation"], t["object"])
if t["confidence"] > best.get(key, {}).get("confidence", -1.0):
best[key] = t
graph = nx.MultiDiGraph()
for (s, r, o), t in best.items():
graph.add_node(s)
graph.add_node(o)
graph.add_edge(s, o, relation=r, confidence=t["confidence"])
return graph
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--persona", default="alex_rivera")
parser.add_argument("--kg-dir", default="data/knowledge_graphs")
args = parser.parse_args()
kg_dir = Path(args.kg_dir)
raw_path = kg_dir / f"{args.persona}_raw.json"
out_path = kg_dir / f"{args.persona}.gpickle"
if not raw_path.exists():
print(f"ERROR: {raw_path} not found. Run build_kg.py first to generate it.")
return
print(f"Loading {raw_path} ...")
with raw_path.open(encoding="utf-8") as fh:
triples = json.load(fh)
print(f" {len(triples)} raw triples loaded")
graph = build_graph(triples)
print(f" Nodes : {graph.number_of_nodes()}")
print(f" Edges : {graph.number_of_edges()}")
# Show a sample of edges
print("\nSample edges:")
for i, (s, o, data) in enumerate(graph.edges(data=True)):
print(f" {s} --[{data['relation']}]--> {o} (conf={data['confidence']:.2f})")
if i >= 9:
print(f" ... ({graph.number_of_edges() - 10} more)")
break
with out_path.open("wb") as fh:
pickle.dump(graph, fh)
print(f"\nSaved to {out_path}")
if __name__ == "__main__":
main()
|