Spaces:
Running
Running
| """ | |
| Export the Narada knowledge graph to Neo4j Cypher format. | |
| Generates two files: | |
| neo4j_nodes.cypher β CREATE constraints + MERGE node statements | |
| neo4j_rels.cypher β MERGE relationship statements | |
| Import into Neo4j Aura (free tier, 50MB) or Desktop: | |
| neo4j-admin database import ... OR paste into Neo4j Browser | |
| Usage: | |
| PYTHONPATH=src/envs python scripts/export_neo4j.py | |
| PYTHONPATH=src/envs python scripts/export_neo4j.py --limit 5000 | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import os | |
| import sys | |
| # Make sure graph module is importable | |
| _src = os.path.join(os.path.dirname(__file__), "..", "src", "envs") | |
| if _src not in sys.path: | |
| sys.path.insert(0, _src) | |
| from narada.graph import get_graph | |
| def escape(s: str) -> str: | |
| return s.replace("\\", "\\\\").replace("'", "\\'").replace("\n", " ").replace("\r", "") | |
| def export(limit: int) -> None: | |
| print("Loading graph...", flush=True) | |
| g = get_graph() | |
| nodes = list(g.nodes.items()) | |
| if limit: | |
| nodes = nodes[:limit] | |
| node_ids = {nid for nid, _ in nodes} | |
| print(f"Exporting {len(nodes)} nodes...", flush=True) | |
| with open("neo4j_nodes.cypher", "w", encoding="utf-8") as f: | |
| f.write("// Narada knowledge graph β nodes\n") | |
| f.write("// Generated by scripts/export_neo4j.py\n\n") | |
| f.write("CREATE CONSTRAINT IF NOT EXISTS FOR (n:NaradaNode) REQUIRE n.id IS UNIQUE;\n\n") | |
| for nid, nd in nodes: | |
| label = nd.get("type", "unknown").capitalize() | |
| name = escape(nd.get("name", nid)) | |
| desc = escape(nd.get("description", ""))[:200] | |
| f.write( | |
| f"MERGE (n:NaradaNode:{label} {{id: '{escape(nid)}'}}) " | |
| f"SET n.name = '{name}', n.description = '{desc}';\n" | |
| ) | |
| print("Exporting relationships...", flush=True) | |
| rel_count = 0 | |
| with open("neo4j_rels.cypher", "w", encoding="utf-8") as f: | |
| f.write("// Narada knowledge graph β relationships\n\n") | |
| for nid, _ in nodes: | |
| for neighbor in g.get_neighbors(nid): | |
| if neighbor in node_ids: | |
| f.write( | |
| f"MATCH (a:NaradaNode {{id: '{escape(nid)}'}}), " | |
| f"(b:NaradaNode {{id: '{escape(neighbor)}'}}) " | |
| f"MERGE (a)-[:CONNECTS]->(b);\n" | |
| ) | |
| rel_count += 1 | |
| print(f"Done. {len(nodes)} nodes, {rel_count} relationships.") | |
| print("Files: neo4j_nodes.cypher, neo4j_rels.cypher") | |
| print() | |
| print("To import into Neo4j Browser (Aura / Desktop):") | |
| print(" 1. Open Neo4j Browser β run neo4j_nodes.cypher") | |
| print(" 2. Run neo4j_rels.cypher") | |
| print(" 3. MATCH (n:NaradaNode) RETURN n LIMIT 100 β to visualise") | |
| print() | |
| print("Useful Cypher queries:") | |
| print(" // Show 2-hop subgraph from a disease node") | |
| print(" MATCH p=(d:Disease)-[*1..2]-(x) WHERE d.name CONTAINS 'Marfan' RETURN p LIMIT 50") | |
| print() | |
| print(" // Find path from phenotype to variant") | |
| print(" MATCH p=shortestPath((ph:Phenotype {id:'HP:0001250'})-[*]-(v:Variant))") | |
| print(" RETURN p LIMIT 10") | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--limit", type=int, default=0, help="Node limit (0 = all)") | |
| args = parser.parse_args() | |
| export(args.limit) | |