Spaces:
Build error
Build error
| """ | |
| Convert a raw triples JSON cache (e.g. alex_rivera_raw.json) to a | |
| NetworkX MultiDiGraph and save it as a .gpickle file. | |
| Usage: | |
| python scripts/convert_raw_triples.py --persona alex_rivera | |
| No LLM or langchain dependencies — only needs networkx. | |
| """ | |
| import argparse | |
| import json | |
| import pickle | |
| from pathlib import Path | |
| import networkx as nx | |
| def build_graph(triples: list[dict]) -> nx.MultiDiGraph: | |
| """ | |
| Build a MultiDiGraph from a list of {subject, relation, object, confidence} dicts. | |
| Deduplicates exact (s, r, o) triplets, keeping the highest-confidence instance. | |
| """ | |
| best: dict[tuple, dict] = {} | |
| for t in triples: | |
| key = (t["subject"], t["relation"], t["object"]) | |
| if t["confidence"] > best.get(key, {}).get("confidence", -1.0): | |
| best[key] = t | |
| graph = nx.MultiDiGraph() | |
| for (s, r, o), t in best.items(): | |
| graph.add_node(s) | |
| graph.add_node(o) | |
| graph.add_edge(s, o, relation=r, confidence=t["confidence"]) | |
| return graph | |
| def main() -> None: | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--persona", default="alex_rivera") | |
| parser.add_argument("--kg-dir", default="data/knowledge_graphs") | |
| args = parser.parse_args() | |
| kg_dir = Path(args.kg_dir) | |
| raw_path = kg_dir / f"{args.persona}_raw.json" | |
| out_path = kg_dir / f"{args.persona}.gpickle" | |
| if not raw_path.exists(): | |
| print(f"ERROR: {raw_path} not found. Run build_kg.py first to generate it.") | |
| return | |
| print(f"Loading {raw_path} ...") | |
| with raw_path.open(encoding="utf-8") as fh: | |
| triples = json.load(fh) | |
| print(f" {len(triples)} raw triples loaded") | |
| graph = build_graph(triples) | |
| print(f" Nodes : {graph.number_of_nodes()}") | |
| print(f" Edges : {graph.number_of_edges()}") | |
| # Show a sample of edges | |
| print("\nSample edges:") | |
| for i, (s, o, data) in enumerate(graph.edges(data=True)): | |
| print(f" {s} --[{data['relation']}]--> {o} (conf={data['confidence']:.2f})") | |
| if i >= 9: | |
| print(f" ... ({graph.number_of_edges() - 10} more)") | |
| break | |
| with out_path.open("wb") as fh: | |
| pickle.dump(graph, fh) | |
| print(f"\nSaved to {out_path}") | |
| if __name__ == "__main__": | |
| main() | |