Spaces:

mgbam
/

MCP_Res

Runtime error

App Files Files Community

mgbam commited on Jun 26, 2025

Commit

e202a39

verified ·

1 Parent(s): 7808af5

Update mcp/knowledge_graph.py

Browse files

Files changed (1) hide show

mcp/knowledge_graph.py +73 -121

mcp/knowledge_graph.py CHANGED Viewed

@@ -1,140 +1,92 @@
 # mcp/knowledge_graph.py
-"""
-Build agraph-compatible nodes + edges for the MedGenesis UI.
-Robustness notes
-----------------
-* Accepts *any* iterable for ``papers``, ``umls``, ``drug_safety``.
-* Silently skips items that are **not** dictionaries or have missing keys.
-* Normalises drug-safety payloads that may arrive as dict **or** list.
-* Always casts labels to string – avoids ``None.lower()`` errors.
-"""
-from __future__ import annotations
-import re
-from typing import List, Tuple
 from streamlit_agraph import Node, Edge, Config
-# ── helpers -----------------------------------------------------------------
-def _safe_str(x) -> str:
-    """Return UTF-8 string or empty string."""
-    return str(x) if x is not None else ""
-def _uniquify(nodes: List[Node]) -> List[Node]:
-    """Remove duplicate node-ids (keep first)."""
-    seen, out = set(), []
-    for n in nodes:
-        if n.id not in seen:
-            out.append(n)
-            seen.add(n.id)
-    return out
-# ── public builder ----------------------------------------------------------
-def build_agraph(
-    papers: list,
-    umls: list,
-    drug_safety: list,
-) -> Tuple[List[Node], List[Edge], Config]:
     """
-    Parameters
-    ----------
-    papers : List[dict]
-        Must contain keys ``title``, ``summary``.
-    umls : List[dict]
-        Dicts with at least ``name`` and ``cui``.
-    drug_safety : List[dict | list]
-        OpenFDA records – could be one dict or list of dicts.
-    Returns
-    -------
-    nodes, edges, cfg : tuple
-        Ready for ``streamlit_agraph.agraph``.
     """
-    nodes: List[Node] = []
-    edges: List[Edge] = []
-    # ── UMLS concepts -------------------------------------------------------
-    for c in umls:
-        if not isinstance(c, dict):
             continue
-        cui = _safe_str(c.get("cui")).strip()
-        name = _safe_str(c.get("name")).strip()
-        if not (cui and name):
             continue
-        nodes.append(
-            Node(id=f"concept_{cui}", label=name, size=28, color="#00b894")
-        )
-    # ── Drug safety --------------------------------------------------------
-    drug_nodes: List[Tuple[str, str]] = []
-    for idx, rec in enumerate(drug_safety):
-        if not rec:
-            continue
-        recs = rec if isinstance(rec, list) else [rec]
-        for j, r in enumerate(recs):
-            if not isinstance(r, dict):
-                continue
-            dn = (
-                r.get("drug_name")
-                or r.get("patient", {}).get("drug")
-                or r.get("medicinalproduct")
-            )
-            dn = _safe_str(dn).strip() or f"drug_{idx}_{j}"
-            did = f"drug_{idx}_{j}"
-            drug_nodes.append((did, dn))
-            nodes.append(Node(id=did, label=dn, size=25, color="#d35400"))
-    # ── Papers & edges ------------------------------------------------------
-    for p_idx, p in enumerate(papers):
-        if not isinstance(p, dict):
-            continue
-        pid = f"paper_{p_idx}"
-        title = _safe_str(p.get("title"))
-        summary = _safe_str(p.get("summary"))
-        nodes.append(
-            Node(
-                id=pid,
-                label=f"P{p_idx + 1}",
-                tooltip=title,
-                size=16,
-                color="#0984e3",
             )
-        )
-        text_blob = f"{title} {summary}".lower()
-        # → concept edges
-        for c in umls:
-            if not isinstance(c, dict):
-                continue
-            name = _safe_str(c.get("name")).lower()
-            cui = _safe_str(c.get("cui"))
-            if name and cui and name in text_blob:
-                edges.append(
-                    Edge(source=pid, target=f"concept_{cui}", label="mentions")
-                )
-        # → drug edges
-        for did, dn in drug_nodes:
-            if dn.lower() in text_blob:
-                edges.append(Edge(source=pid, target=did, label="mentions"))
-    # ── deduplicate & config ------------------------------------------------
-    nodes = _uniquify(nodes)
     cfg = Config(
         width="100%",
-        height="600px",
         directed=False,
         nodeHighlightBehavior=True,
-        highlightColor="#f1c40f",
         collapsible=True,
         node={"labelProperty": "label"},
     )
     return nodes, edges, cfg

 # mcp/knowledge_graph.py
 from streamlit_agraph import Node, Edge, Config
+import re
+# Colors for graph nodes
+EDGE_COLOR     = "#888"
+DRUG_COLOR     = "#f39c12"
+CONCEPT_COLOR  = "#00b894"
+PAPER_COLOR    = "#3498db"
+HL_COLOR       = "#f1c40f"
+DIM_COLOR      = "#d3d3d3"
+def build_agraph(papers, umls, drug_safety):
     """
+    Build a Streamlit-agraph network:
+      - papers: list of PubMed/arXiv dicts
+      - umls: list of UMLSConcept dicts (may have None values)
+      - drug_safety: list of OpenFDA/other dicts
+    Returns (nodes, edges, config)
     """
+    nodes, edges = [], []
+    # --- Add UMLS concept nodes ---
+    for c in (umls or []):
+        cui = c.get("cui") if c else None
+        name = c.get("name") if c else None
+        if not cui or not name:
             continue
+        node_id = f"concept_{cui}"
+        nodes.append(Node(id=node_id, label=name, size=22, color=CONCEPT_COLOR))
+    # --- Add Drug nodes ---
+    drug_ids = []
+    for i, drug_blob in enumerate(drug_safety or []):
+        # Support both list and dict style safety reports
+        if not drug_blob:
             continue
+        reports = drug_blob if isinstance(drug_blob, list) else [drug_blob]
+        for j, rec in enumerate(reports):
+            label = (
+                rec.get("drug_name")
+                or rec.get("patient", {}).get("drug")
+                or rec.get("medicinalproduct")
+                or f"drug_{i}_{j}"
             )
+            drug_id = f"drug_{i}_{j}"
+            drug_ids.append((drug_id, label))
+            nodes.append(Node(id=drug_id, label=label, size=25, color=DRUG_COLOR))
+    # --- Add Paper nodes and connect to concepts/drugs ---
+    for k, p in enumerate(papers or []):
+        pid = f"paper_{k}"
+        title = p.get("title", f"Paper {k+1}")
+        summary = p.get("summary", "")
+        label = f"P{k+1}"
+        nodes.append(Node(
+            id=pid,
+            label=label,
+            tooltip=title,
+            size=14,
+            color=PAPER_COLOR,
+        ))
+        txt = (title + " " + summary).lower()
+        # Link to UMLS concepts if concept name in paper
+        for c in (umls or []):
+            cui = c.get("cui") if c else None
+            name = c.get("name") if c else None
+            if cui and name and isinstance(name, str) and name.lower() in txt:
+                edges.append(Edge(source=pid, target=f"concept_{cui}", color=EDGE_COLOR))
+        # Link to drug nodes if drug name appears in paper
+        for drug_id, drug_name in drug_ids:
+            if drug_name and isinstance(drug_name, str) and drug_name.lower() in txt:
+                edges.append(Edge(source=pid, target=drug_id, color=EDGE_COLOR))
+    # --- Graph config with physics enabled ---
     cfg = Config(
         width="100%",
+        height="520",
         directed=False,
+        physics=True,
+        repulsion=True,
         nodeHighlightBehavior=True,
+        highlightColor=HL_COLOR,
         collapsible=True,
         node={"labelProperty": "label"},
+        edge={"color": EDGE_COLOR, "width": 1},
     )
     return nodes, edges, cfg