narada-env / scripts /export_neo4j.py
Krishna
Multi-step GRPO, milestone reward, benchmark, Neo4j export, graph viz
2a62ebe
Raw
History Blame Contribute Delete
3.37 kB
"""
Export the Narada knowledge graph to Neo4j Cypher format.
Generates two files:
neo4j_nodes.cypher β€” CREATE constraints + MERGE node statements
neo4j_rels.cypher β€” MERGE relationship statements
Import into Neo4j Aura (free tier, 50MB) or Desktop:
neo4j-admin database import ... OR paste into Neo4j Browser
Usage:
PYTHONPATH=src/envs python scripts/export_neo4j.py
PYTHONPATH=src/envs python scripts/export_neo4j.py --limit 5000
"""
from __future__ import annotations
import argparse
import os
import sys
# Make sure graph module is importable
_src = os.path.join(os.path.dirname(__file__), "..", "src", "envs")
if _src not in sys.path:
sys.path.insert(0, _src)
from narada.graph import get_graph
def escape(s: str) -> str:
return s.replace("\\", "\\\\").replace("'", "\\'").replace("\n", " ").replace("\r", "")
def export(limit: int) -> None:
print("Loading graph...", flush=True)
g = get_graph()
nodes = list(g.nodes.items())
if limit:
nodes = nodes[:limit]
node_ids = {nid for nid, _ in nodes}
print(f"Exporting {len(nodes)} nodes...", flush=True)
with open("neo4j_nodes.cypher", "w", encoding="utf-8") as f:
f.write("// Narada knowledge graph β€” nodes\n")
f.write("// Generated by scripts/export_neo4j.py\n\n")
f.write("CREATE CONSTRAINT IF NOT EXISTS FOR (n:NaradaNode) REQUIRE n.id IS UNIQUE;\n\n")
for nid, nd in nodes:
label = nd.get("type", "unknown").capitalize()
name = escape(nd.get("name", nid))
desc = escape(nd.get("description", ""))[:200]
f.write(
f"MERGE (n:NaradaNode:{label} {{id: '{escape(nid)}'}}) "
f"SET n.name = '{name}', n.description = '{desc}';\n"
)
print("Exporting relationships...", flush=True)
rel_count = 0
with open("neo4j_rels.cypher", "w", encoding="utf-8") as f:
f.write("// Narada knowledge graph β€” relationships\n\n")
for nid, _ in nodes:
for neighbor in g.get_neighbors(nid):
if neighbor in node_ids:
f.write(
f"MATCH (a:NaradaNode {{id: '{escape(nid)}'}}), "
f"(b:NaradaNode {{id: '{escape(neighbor)}'}}) "
f"MERGE (a)-[:CONNECTS]->(b);\n"
)
rel_count += 1
print(f"Done. {len(nodes)} nodes, {rel_count} relationships.")
print("Files: neo4j_nodes.cypher, neo4j_rels.cypher")
print()
print("To import into Neo4j Browser (Aura / Desktop):")
print(" 1. Open Neo4j Browser β†’ run neo4j_nodes.cypher")
print(" 2. Run neo4j_rels.cypher")
print(" 3. MATCH (n:NaradaNode) RETURN n LIMIT 100 β€” to visualise")
print()
print("Useful Cypher queries:")
print(" // Show 2-hop subgraph from a disease node")
print(" MATCH p=(d:Disease)-[*1..2]-(x) WHERE d.name CONTAINS 'Marfan' RETURN p LIMIT 50")
print()
print(" // Find path from phenotype to variant")
print(" MATCH p=shortestPath((ph:Phenotype {id:'HP:0001250'})-[*]-(v:Variant))")
print(" RETURN p LIMIT 10")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--limit", type=int, default=0, help="Node limit (0 = all)")
args = parser.parse_args()
export(args.limit)