Rename mcp/graph_utils.py to mcp/graph_metrics.py
Browse files- mcp/graph_metrics.py +69 -0
- mcp/graph_utils.py +0 -110
mcp/graph_metrics.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
graph_metrics.py · Lightweight NetworkX helpers for MedGenesis
|
| 3 |
+
|
| 4 |
+
Key features
|
| 5 |
+
────────────
|
| 6 |
+
• Accepts edge dictionaries in either Streamlit-agraph or PyVis style:
|
| 7 |
+
{"source": "n1", "target": "n2"} ← agraph
|
| 8 |
+
{"from": "n1", "to": "n2"} ← PyVis
|
| 9 |
+
• Silently skips malformed edges (no KeyError).
|
| 10 |
+
• Provides three public helpers:
|
| 11 |
+
build_nx(nodes, edges) → networkx.Graph
|
| 12 |
+
get_top_hubs(G, k=5) → List[(node_id, degree_centrality)]
|
| 13 |
+
get_density(G) → float (0–1)
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
from __future__ import annotations
|
| 17 |
+
from typing import List, Dict, Tuple
|
| 18 |
+
import networkx as nx
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
# ────────────────────────────────────────────────────────────────────
|
| 22 |
+
# Internal helpers
|
| 23 |
+
# ────────────────────────────────────────────────────────────────────
|
| 24 |
+
def _edge_ends(e: Dict) -> Tuple[str, str] | None:
|
| 25 |
+
"""Return (src, dst) tuple if both ends exist; else None."""
|
| 26 |
+
src = e.get("source") or e.get("from")
|
| 27 |
+
dst = e.get("target") or e.get("to")
|
| 28 |
+
if src and dst:
|
| 29 |
+
return src, dst
|
| 30 |
+
return None
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# ────────────────────────────────────────────────────────────────────
|
| 34 |
+
# Public API
|
| 35 |
+
# ────────────────────────────────────────────────────────────────────
|
| 36 |
+
def build_nx(nodes: List[Dict], edges: List[Dict]) -> nx.Graph:
|
| 37 |
+
"""
|
| 38 |
+
Convert agraph / PyVis node+edge dicts into a NetworkX Graph.
|
| 39 |
+
|
| 40 |
+
Nodes: must contain "id" (a unique string)
|
| 41 |
+
Edges: accepted shapes → {"source":, "target":} or {"from":, "to":}
|
| 42 |
+
"""
|
| 43 |
+
G = nx.Graph()
|
| 44 |
+
|
| 45 |
+
# Add nodes with label attribute (used by Metrics tab)
|
| 46 |
+
for n in nodes:
|
| 47 |
+
G.add_node(n["id"], label=n.get("label", n["id"]))
|
| 48 |
+
|
| 49 |
+
# Add edges (skip malformed)
|
| 50 |
+
for e in edges:
|
| 51 |
+
ends = _edge_ends(e)
|
| 52 |
+
if ends:
|
| 53 |
+
G.add_edge(*ends)
|
| 54 |
+
|
| 55 |
+
return G
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def get_top_hubs(G: nx.Graph, k: int = 5) -> List[Tuple[str, float]]:
|
| 59 |
+
"""
|
| 60 |
+
Return top-k nodes by degree-centrality.
|
| 61 |
+
Example output: [('TP53', 0.42), ('EGFR', 0.36), ...]
|
| 62 |
+
"""
|
| 63 |
+
dc = nx.degree_centrality(G)
|
| 64 |
+
return sorted(dc.items(), key=lambda x: x[1], reverse=True)[:k]
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def get_density(G: nx.Graph) -> float:
|
| 68 |
+
"""Graph density in [0, 1]."""
|
| 69 |
+
return nx.density(G)
|
mcp/graph_utils.py
DELETED
|
@@ -1,110 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""MedGenesis – NetworkX helpers (robust version)
|
| 3 |
-
|
| 4 |
-
Key upgrades over the legacy helper:
|
| 5 |
-
|
| 6 |
-
1. **Edge‑key flexibility** – `build_nx` now recognises *four* common
|
| 7 |
-
schemas produced by Streamlit‑agraph, PyVis, Neo4j exports or OT graphs:
|
| 8 |
-
|
| 9 |
-
• `{"source": "n1", "target": "n2"}` (agraph)
|
| 10 |
-
• `{"from": "n1", "to": "n2"}` (PyVis)
|
| 11 |
-
• `{"src": "n1", "dst": "n2"}` (neo4j/json)
|
| 12 |
-
• `{"u": "n1", "v": "n2"}` (NetworkX native)
|
| 13 |
-
|
| 14 |
-
2. **Weight aware** – optional numeric `weight` (or `value`) field becomes
|
| 15 |
-
an edge attribute (defaults to 1).
|
| 16 |
-
|
| 17 |
-
3. **Self‑loop skip** – ignores self‑edges to keep density sensible.
|
| 18 |
-
|
| 19 |
-
4. **Utility metrics** – adds `betweenness` & `clustering` helpers in
|
| 20 |
-
addition to top‑hub degree ranking.
|
| 21 |
-
"""
|
| 22 |
-
|
| 23 |
-
from __future__ import annotations
|
| 24 |
-
|
| 25 |
-
from typing import Dict, List, Tuple
|
| 26 |
-
import networkx as nx
|
| 27 |
-
|
| 28 |
-
__all__ = [
|
| 29 |
-
"build_nx",
|
| 30 |
-
"get_top_hubs",
|
| 31 |
-
"get_density",
|
| 32 |
-
"get_betweenness",
|
| 33 |
-
"get_clustering_coeff",
|
| 34 |
-
]
|
| 35 |
-
|
| 36 |
-
# ---------------------------------------------------------------------
|
| 37 |
-
# Internal helpers
|
| 38 |
-
# ---------------------------------------------------------------------
|
| 39 |
-
|
| 40 |
-
def _edge_endpoints(e: Dict) -> Tuple[str, str] | None:
|
| 41 |
-
"""Return (src, dst) if both ends exist; else None."""
|
| 42 |
-
src = e.get("source") or e.get("from") or e.get("src") or e.get("u")
|
| 43 |
-
dst = e.get("target") or e.get("to") or e.get("dst") or e.get("v")
|
| 44 |
-
if src and dst and src != dst:
|
| 45 |
-
return str(src), str(dst)
|
| 46 |
-
return None
|
| 47 |
-
|
| 48 |
-
# ---------------------------------------------------------------------
|
| 49 |
-
# Public API
|
| 50 |
-
# ---------------------------------------------------------------------
|
| 51 |
-
|
| 52 |
-
def build_nx(nodes: List[Dict], edges: List[Dict]) -> nx.Graph:
|
| 53 |
-
"""Convert heterogeneous node/edge dicts into an undirected NetworkX graph.
|
| 54 |
-
|
| 55 |
-
Parameters
|
| 56 |
-
----------
|
| 57 |
-
nodes : list of node dicts – each must contain an `id` key; other keys
|
| 58 |
-
are copied as attributes.
|
| 59 |
-
edges : list of edge dicts – keys can be any of the recognised schemas.
|
| 60 |
-
|
| 61 |
-
Returns
|
| 62 |
-
-------
|
| 63 |
-
nx.Graph – ready for downstream centrality / drawing.
|
| 64 |
-
"""
|
| 65 |
-
G = nx.Graph()
|
| 66 |
-
|
| 67 |
-
# Nodes ----------------------------------------------------------------
|
| 68 |
-
for n in nodes:
|
| 69 |
-
node_id = str(n["id"])
|
| 70 |
-
attrs = {k: v for k, v in n.items() if k != "id"}
|
| 71 |
-
G.add_node(node_id, **attrs)
|
| 72 |
-
|
| 73 |
-
# Edges ----------------------------------------------------------------
|
| 74 |
-
for e in edges:
|
| 75 |
-
endpoints = _edge_endpoints(e)
|
| 76 |
-
if not endpoints:
|
| 77 |
-
continue
|
| 78 |
-
u, v = endpoints
|
| 79 |
-
w = e.get("weight") or e.get("value") or 1
|
| 80 |
-
G.add_edge(u, v, weight=float(w))
|
| 81 |
-
|
| 82 |
-
return G
|
| 83 |
-
|
| 84 |
-
# ---------------------------------------------------------------------
|
| 85 |
-
# Metrics helpers
|
| 86 |
-
# ---------------------------------------------------------------------
|
| 87 |
-
|
| 88 |
-
def get_top_hubs(G: nx.Graph, k: int = 5) -> List[Tuple[str, float]]:
|
| 89 |
-
"""Return top‑*k* nodes by **degree centrality**."""
|
| 90 |
-
dc = nx.degree_centrality(G)
|
| 91 |
-
return sorted(dc.items(), key=lambda kv: kv[1], reverse=True)[:k]
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
def get_betweenness(G: nx.Graph, k: int = 5) -> List[Tuple[str, float]]:
|
| 95 |
-
"""Top‑*k* nodes by betweenness centrality (approx if |V| > 500)."""
|
| 96 |
-
if G.number_of_nodes() > 500:
|
| 97 |
-
bc = nx.betweenness_centrality(G, k=200, seed=42)
|
| 98 |
-
else:
|
| 99 |
-
bc = nx.betweenness_centrality(G)
|
| 100 |
-
return sorted(bc.items(), key=lambda kv: kv[1], reverse=True)[:k]
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
def get_clustering_coeff(G: nx.Graph) -> float:
|
| 104 |
-
"""Return average clustering coefficient (0‑1)."""
|
| 105 |
-
return nx.average_clustering(G)
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
def get_density(G: nx.Graph) -> float:
|
| 109 |
-
"""Graph density in [0, 1]."""
|
| 110 |
-
return nx.density(G)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|