Spaces:

TheQuantEd
/

CTA

Running

File size: 24,418 Bytes

59abb4f

"""
MCP Server for Precision Clinical Trial Matching Agent.
Exposes 9 tools accessible via Prompt Opinion and other MCP-compatible clients.

Run: python mcp_server.py
Or via SSE: uvicorn mcp_server:sse_app --port 8001
"""
import asyncio
import json
import os
import sys
import httpx
from dotenv import load_dotenv

load_dotenv()

from mcp.server import Server
from mcp.server.stdio import stdio_server
from mcp import types

from fhir_adapter import get_patient_profile, get_all_patient_ids
from clinicaltrials_api import search_trials_sync, get_trial_details_sync
from matching_engine import match_patient_to_trials, score_patient_for_trial
from llm_client import generate_outreach_message, summarize_trial, get_provider_status
from analytics import get_kpi_summary, get_enrollment_funnel
from neo4j_setup import neo4j_conn
from fhir_server import get_fhir_server_status, get_live_patient_profile, build_sharp_context


server = Server("clinical-trial-matching-agent")


# US state abbreviation → full name (CT.gov returns full names)
_STATE_ABBR = {
    "AL":"Alabama","AK":"Alaska","AZ":"Arizona","AR":"Arkansas","CA":"California",
    "CO":"Colorado","CT":"Connecticut","DE":"Delaware","FL":"Florida","GA":"Georgia",
    "HI":"Hawaii","ID":"Idaho","IL":"Illinois","IN":"Indiana","IA":"Iowa",
    "KS":"Kansas","KY":"Kentucky","LA":"Louisiana","ME":"Maine","MD":"Maryland",
    "MA":"Massachusetts","MI":"Michigan","MN":"Minnesota","MS":"Mississippi","MO":"Missouri",
    "MT":"Montana","NE":"Nebraska","NV":"Nevada","NH":"New Hampshire","NJ":"New Jersey",
    "NM":"New Mexico","NY":"New York","NC":"North Carolina","ND":"North Dakota","OH":"Ohio",
    "OK":"Oklahoma","OR":"Oregon","PA":"Pennsylvania","RI":"Rhode Island","SC":"South Carolina",
    "SD":"South Dakota","TN":"Tennessee","TX":"Texas","UT":"Utah","VT":"Vermont",
    "VA":"Virginia","WA":"Washington","WV":"West Virginia","WI":"Wisconsin","WY":"Wyoming",
    "DC":"District of Columbia",
}


def _error(code: str, message: str, retry_after: int | None = None) -> list[types.TextContent]:
    """Structured error response for MCP callers."""
    payload: dict = {"error": code, "message": message}
    if retry_after is not None:
        payload["retry_after"] = retry_after
    return [types.TextContent(type="text", text=json.dumps(payload))]


@server.list_tools()
async def list_tools() -> list[types.Tool]:
    return [
        types.Tool(
            name="ping",
            description="Health check for the ClinicalMatch AI agent. Returns Neo4j graph status, CT.gov API reachability, seed status, and system readiness. Call this first to confirm the agent is ready before running any workflow.",
            inputSchema={
                "type": "object",
                "properties": {},
                "required": [],
            },
        ),
        types.Tool(
            name="get_patient_matches",
            description="Get the top clinical trial matches for a specific patient with full eligibility score breakdown. Returns ranked trials with inclusion/exclusion criterion analysis, risk flags, and clinical reasoning. Ideal for a one-call eligibility summary before scheduling.",
            inputSchema={
                "type": "object",
                "properties": {
                    "patient_id": {"type": "string", "description": "Patient ID (P001–P005 for FHIR mock patients)"},
                    "top_n": {"type": "integer", "description": "Number of top matches to return (default 5, max 10)", "default": 5},
                    "condition": {"type": "string", "description": "Override condition for trial search (optional — inferred from patient FHIR data if omitted)"},
                },
                "required": ["patient_id"],
            },
        ),
        types.Tool(
            name="list_recruiting_trials",
            description="Search for actively recruiting clinical trials by condition with optional geographic filtering. Returns trials sorted by recency with site locations, enrollment targets, and phase details. Use for geographic-aware trial discovery.",
            inputSchema={
                "type": "object",
                "properties": {
                    "condition": {"type": "string", "description": "Medical condition (e.g., 'breast cancer', 'NSCLC', 'prostate cancer')"},
                    "city": {"type": "string", "description": "Filter to trials with sites near this city (optional)"},
                    "state": {"type": "string", "description": "Filter to trials with sites in this US state abbreviation, e.g. 'CA' (optional)"},
                    "phase": {"type": "string", "description": "Trial phase filter: '1', '2', '3', or '4'", "enum": ["1", "2", "3", "4"]},
                    "max_results": {"type": "integer", "description": "Maximum results to return (default 10, max 20)", "default": 10},
                },
                "required": ["condition"],
            },
        ),
        types.Tool(
            name="find_trials",
            description="Search ClinicalTrials.gov for recruiting clinical trials matching a medical condition. Returns ranked list of trials with eligibility criteria, locations, and enrollment info.",
            inputSchema={
                "type": "object",
                "properties": {
                    "condition": {"type": "string", "description": "Medical condition (e.g., 'breast cancer', 'NSCLC', 'Alzheimer's disease')"},
                    "phase": {"type": "string", "description": "Trial phase: '1', '2', '3', or '4'", "enum": ["1", "2", "3", "4"]},
                    "page_size": {"type": "integer", "description": "Number of results (max 20)", "default": 10},
                },
                "required": ["condition"],
            },
        ),
        types.Tool(
            name="screen_patient",
            description="Screen a patient against a specific clinical trial using AI-powered FHIR-based analysis. Accepts either a local patient ID or a live FHIR server patient ID with optional SMART bearer token. Returns eligibility score, inclusion/exclusion criterion assessment, clinical reasoning, and SHARP context envelope.",
            inputSchema={
                "type": "object",
                "properties": {
                    "patient_id":      {"type": "string", "description": "Local patient ID (e.g. P001) OR FHIR server patient ID"},
                    "nct_id":          {"type": "string", "description": "ClinicalTrials.gov NCT number (e.g. NCT04889131)"},
                    "fhir_token":      {"type": "string", "description": "SMART on FHIR bearer token for live FHIR server access (optional)"},
                    "use_live_fhir":   {"type": "boolean", "description": "If true, fetch patient data from the live FHIR server instead of local registry", "default": False},
                },
                "required": ["patient_id", "nct_id"],
            },
        ),
        types.Tool(
            name="match_patient_to_trials",
            description="Find the best-matching clinical trials for a patient using semantic AI matching. Accepts local or live FHIR patient ID. Returns ranked matches with SHARP context envelope for downstream agent consumption.",
            inputSchema={
                "type": "object",
                "properties": {
                    "patient_id":    {"type": "string", "description": "Patient ID (local: P001–P005, or live FHIR ID)"},
                    "condition":     {"type": "string", "description": "Override condition for search (optional — inferred from FHIR data if omitted)"},
                    "top_n":         {"type": "integer", "description": "Number of top matches to return", "default": 5},
                    "fhir_token":    {"type": "string", "description": "SMART on FHIR bearer token (optional)"},
                    "use_live_fhir": {"type": "boolean", "description": "Fetch patient from live FHIR server", "default": False},
                },
                "required": ["patient_id"],
            },
        ),
        types.Tool(
            name="generate_recruitment_outreach",
            description="Generate personalized recruitment communication for a patient-trial pair. Supports PCP referral letters, patient emails, and social media posts.",
            inputSchema={
                "type": "object",
                "properties": {
                    "patient_id": {"type": "string", "description": "Patient ID"},
                    "nct_id": {"type": "string", "description": "Trial NCT ID"},
                    "channel": {
                        "type": "string",
                        "description": "Communication channel",
                        "enum": ["patient_email", "pcp_letter", "social_post"],
                        "default": "patient_email",
                    },
                },
                "required": ["patient_id", "nct_id"],
            },
        ),
        types.Tool(
            name="get_trial_analytics",
            description="Get enrollment analytics and recruitment funnel data for a clinical trial or across all active trials.",
            inputSchema={
                "type": "object",
                "properties": {
                    "trial_id": {"type": "string", "description": "NCT ID for trial-specific analytics (omit for aggregate)"},
                },
                "required": [],
            },
        ),
        types.Tool(
            name="summarize_trial_protocol",
            description="Fetch a clinical trial from ClinicalTrials.gov and generate a plain-language AI summary for clinical coordinators.",
            inputSchema={
                "type": "object",
                "properties": {
                    "nct_id": {"type": "string", "description": "ClinicalTrials.gov NCT number"},
                },
                "required": ["nct_id"],
            },
        ),
    ]


@server.call_tool()
async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
    try:
        if name == "ping":
            # Neo4j check
            neo4j_ok = False
            node_counts = {}
            try:
                rows = neo4j_conn.run_query(
                    "MATCH (n) RETURN labels(n)[0] AS label, count(n) AS cnt"
                )
                node_counts = {r["label"]: r["cnt"] for r in rows if r.get("label")}
                neo4j_ok = True
            except Exception as e:
                neo4j_ok = False

            # CT.gov reachability
            ctgov_ok = False
            try:
                r = httpx.get(
                    "https://clinicaltrials.gov/api/v2/studies",
                    params={"query.term": "cancer", "pageSize": 1},
                    timeout=5,
                )
                ctgov_ok = r.status_code == 200
            except Exception:
                ctgov_ok = False

            seeded = node_counts.get("Patient", 0) >= 100

            fhir_status = get_fhir_server_status()
            llm_status  = get_provider_status()

            status = {
                "status": "ready" if (neo4j_ok and ctgov_ok and seeded) else "degraded",
                "neo4j": "connected" if neo4j_ok else "unavailable",
                "ctgov_api": "reachable" if ctgov_ok else "unreachable",
                "fhir_server": "reachable" if fhir_status.get("reachable") else "unreachable",
                "fhir_base_url": fhir_status.get("base_url"),
                "smart_auth": fhir_status.get("auth_method"),
                "graph_seeded": seeded,
                "node_counts": node_counts,
                "llm_provider": llm_status.get("provider"),
                "llm_model": llm_status.get("model"),
                "llm_hipaa_eligible": llm_status.get("hipaa_eligible"),
                "standards": ["FHIR R4", "MCP", "A2A", "SHARP"],
                "agent": "ClinicalMatch AI v2.0 — FHIR R4 · MCP · A2A · SHARP",
            }
            return [types.TextContent(type="text", text=json.dumps(status, indent=2))]

        elif name == "get_patient_matches":
            patient_id = arguments["patient_id"]
            top_n = min(int(arguments.get("top_n", 5)), 10)
            condition = arguments.get("condition")

            profile = get_patient_profile(patient_id)
            if not profile:
                return _error("PATIENT_NOT_FOUND", f"Patient '{patient_id}' not found. Available: P001–P005.")

            matches = match_patient_to_trials(patient_id, condition, top_n)
            if not matches:
                return _error("NO_TRIALS_FOUND", f"No trials found for patient {patient_id}.", retry_after=30)

            output = f"## Top {len(matches)} Trial Matches — {patient_id}\n"
            output += f"Patient: {profile['age']}y {profile['gender']} | Dx: {', '.join(profile['diagnosis_names'])}\n\n"
            for i, m in enumerate(matches, 1):
                output += f"### {i}. {m['title']} ({m['nct_id']})\n"
                output += f"**Score:** {m['match_score']:.0%} | **Eligible:** {'✓ YES' if m['eligible'] else '✗ NO'} | **Phase:** {m.get('phase', 'N/A')}\n"
                if m.get("match_summary"):
                    output += f"**Reasoning:** {m['match_summary'][:200]}\n"
                if m.get("risk_flags"):
                    output += f"**Risk Flags:** {'; '.join(m['risk_flags'][:3])}\n"
                locs = ", ".join(f"{l['city']}, {l['state']}" for l in m.get("locations", [])[:2])
                if locs:
                    output += f"**Sites:** {locs}\n"
                output += "\n"
            return [types.TextContent(type="text", text=output)]

        elif name == "list_recruiting_trials":
            condition = arguments["condition"]
            city = arguments.get("city", "").lower()
            state = arguments.get("state", "").upper()
            phase = arguments.get("phase")
            max_results = min(int(arguments.get("max_results", 10)), 20)

            trials = search_trials_sync(condition, phase, page_size=max_results)
            if not trials:
                return _error("NO_TRIALS_FOUND", f"No recruiting trials found for '{condition}'.", retry_after=10)

            # Apply geo filter — CT.gov returns full state names, so expand abbreviation
            if city or state:
                state_full = _STATE_ABBR.get(state.upper(), state).lower() if state else ""
                state_abbr = state.upper() if state else ""
                filtered = []
                for t in trials:
                    locs = t.get("locations", [])
                    match = any(
                        (city and city in (l.get("city", "") or "").lower()) or
                        (state and (
                            state_abbr == (l.get("state", "") or "").upper() or
                            state_full in (l.get("state", "") or "").lower()
                        ))
                        for l in locs
                    )
                    if match or not locs:
                        filtered.append(t)
                geo_note = f" near {city or ''}{', ' + state if state else ''}".strip(", ")
                trials = filtered or trials  # fallback to all if filter too narrow
            else:
                geo_note = ""

            output = f"## Recruiting Trials: {condition}{geo_note}\n"
            output += f"Found {len(trials)} trials (sorted by most recently updated)\n\n"
            for i, t in enumerate(trials, 1):
                locs = ", ".join(f"{l['city']}, {l['state']}" for l in t.get("locations", [])[:3])
                output += f"{i}. **{t['title']}** ({t['nct_id']})\n"
                output += f"   Phase: {t.get('phase','N/A')} | Sites: {t.get('location_count',0)} | Enrollment: {t.get('enrollment','N/A')}\n"
                output += f"   Sponsor: {t.get('sponsor','N/A')} | Updated: {t.get('last_updated','N/A')}\n"
                if locs:
                    output += f"   Locations: {locs}\n"
                output += f"   URL: {t.get('ctgov_url','')}\n\n"
            return [types.TextContent(type="text", text=output)]

        elif name == "find_trials":
            condition = arguments["condition"]
            phase = arguments.get("phase")
            page_size = min(int(arguments.get("page_size", 10)), 20)
            trials = search_trials_sync(condition, phase, page_size=page_size)
            output = f"Found {len(trials)} recruiting trials for '{condition}':\n\n"
            for i, trial in enumerate(trials, 1):
                locs = ", ".join(f"{l['city']}, {l['state']}" for l in trial.get("locations", [])[:2])
                output += f"{i}. **{trial['title']}** ({trial['nct_id']})\n"
                output += f"   Phase: {trial['phase']} | Status: {trial['status']} | Sites: {trial['location_count']}\n"
                output += f"   Enrollment: {trial['enrollment']} | Sponsor: {trial['sponsor']}\n"
                if locs:
                    output += f"   Locations: {locs}\n"
                output += "\n"
            return [types.TextContent(type="text", text=output)]

        elif name == "screen_patient":
            patient_id    = arguments["patient_id"]
            nct_id        = arguments["nct_id"]
            use_live_fhir = arguments.get("use_live_fhir", False)
            fhir_token    = arguments.get("fhir_token")

            # Build SHARP context envelope
            sharp_ctx = build_sharp_context(
                patient_id=patient_id,
                fhir_ref=f"Patient/{patient_id}",
            )
            if fhir_token:
                sharp_ctx["fhir_token"] = fhir_token

            # Optionally fetch from live FHIR server
            if use_live_fhir:
                live_profile = get_live_patient_profile(patient_id, sharp_context=sharp_ctx)
                if not live_profile:
                    return _error("FHIR_PATIENT_NOT_FOUND",
                                  f"Patient '{patient_id}' not found on FHIR server {sharp_ctx['patient_context']['fhir_base']}")

            trial = get_trial_details_sync(nct_id)
            if not trial:
                return _error("TRIAL_NOT_FOUND", f"Trial {nct_id} not found in ClinicalTrials.gov")
            result = score_patient_for_trial(patient_id, trial)
            if "error" in result:
                return _error("SCREENING_ERROR", result["error"])
            result["sharp_context"] = sharp_ctx

            score = result.get("overall_score", 0)
            eligible = result.get("eligible", False)
            output = f"## Eligibility Assessment: {patient_id} → {nct_id}\n\n"
            output += f"**Overall Score:** {score:.0%} | **Eligible:** {'YES' if eligible else 'NO'}\n\n"
            output += f"**Clinical Reasoning:** {result.get('summary', '')}\n\n"

            incl = result.get("inclusion_results", [])
            if incl:
                output += "**Inclusion Criteria:**\n"
                for c in incl:
                    icon = "✓" if c.get("met") else "✗"
                    output += f"  {icon} {c.get('criterion', '')} [{c.get('confidence', '')}]\n"
            excl = result.get("exclusion_results", [])
            if excl:
                output += "\n**Exclusion Criteria:**\n"
                for c in excl:
                    icon = "⚠" if c.get("triggered") else "✓"
                    output += f"  {icon} {c.get('criterion', '')} [{c.get('confidence', '')}]\n"
            flags = result.get("risk_flags", [])
            if flags:
                output += f"\n**Risk Flags:** {'; '.join(flags)}"
            return [types.TextContent(type="text", text=output)]

        elif name == "match_patient_to_trials":
            patient_id    = arguments["patient_id"]
            condition     = arguments.get("condition")
            top_n         = int(arguments.get("top_n", 5))
            use_live_fhir = arguments.get("use_live_fhir", False)
            fhir_token    = arguments.get("fhir_token")

            sharp_ctx = build_sharp_context(patient_id=patient_id, fhir_ref=f"Patient/{patient_id}")
            if fhir_token:
                sharp_ctx["fhir_token"] = fhir_token

            if use_live_fhir:
                profile = get_live_patient_profile(patient_id, sharp_context=sharp_ctx)
                if not profile:
                    return _error("FHIR_PATIENT_NOT_FOUND", f"Patient '{patient_id}' not found on FHIR server")
                if not condition and profile.get("diagnosis_names"):
                    condition = profile["diagnosis_names"][0]
            else:
                profile = get_patient_profile(patient_id)

            matches = match_patient_to_trials(patient_id, condition, top_n)
            output = f"## Top {len(matches)} Trial Matches for {patient_id}\n"
            output += f"SHARP: fhir_ref={sharp_ctx['patient_context']['fhir_ref']} session={sharp_ctx['patient_context']['session_id'][:8]}...\n"
            if profile:
                output += f"Patient: {profile['age']}y {profile['gender']} | Diagnoses: {', '.join(profile.get('diagnosis_names', []))}\n\n"
            for i, m in enumerate(matches, 1):
                output += f"{i}. **{m['title']}** ({m['nct_id']})\n"
                output += f"   Match Score: {m['match_score']:.0%} | Eligible: {'YES' if m['eligible'] else 'NO'} | Phase: {m.get('phase','N/A')}\n"
                if m.get("match_summary"):
                    output += f"   {m['match_summary'][:150]}...\n"
                output += "\n"
            return [types.TextContent(type="text", text=output)]

        elif name == "generate_recruitment_outreach":
            patient_id = arguments["patient_id"]
            nct_id = arguments["nct_id"]
            channel = arguments.get("channel", "patient_email")
            trial = get_trial_details_sync(nct_id) or {"nct_id": nct_id, "title": "Clinical Trial", "brief_summary": "", "phase": "N/A", "sponsor": "N/A", "locations": []}
            patient_profile = get_patient_profile(patient_id)
            if not patient_profile:
                return [types.TextContent(type="text", text=f"Patient {patient_id} not found")]
            message = generate_outreach_message(patient_profile, trial, channel)
            output = f"## Recruitment Outreach ({channel.replace('_', ' ').title()})\n"
            output += f"Patient: {patient_id} | Trial: {nct_id}\n\n"
            output += "---\n\n" + message
            return [types.TextContent(type="text", text=output)]

        elif name == "get_trial_analytics":
            trial_id = arguments.get("trial_id")
            kpis = get_kpi_summary()
            funnel = get_enrollment_funnel(trial_id)
            output = "## Clinical Trial Analytics\n\n"
            output += f"**Active Trials:** {kpis['active_trials']}\n"
            output += f"**Patients Identified:** {kpis['patients_identified']}\n"
            output += f"**Enrollment Rate:** {kpis['enrollment_rate']:.0%}\n"
            output += f"**Avg Days to Match:** {kpis['avg_days_to_match']}\n"
            output += f"**Cost Savings:** ${kpis['cost_saved_usd']:,}\n\n"
            output += "**Enrollment Funnel:**\n"
            for stage in funnel:
                output += f"  {stage['stage']}: {stage['count']}\n"
            return [types.TextContent(type="text", text=output)]

        elif name == "summarize_trial_protocol":
            nct_id = arguments["nct_id"]
            trial = get_trial_details_sync(nct_id)
            if not trial:
                return [types.TextContent(type="text", text=f"Trial {nct_id} not found")]
            summary = summarize_trial(trial)
            output = f"## {trial['title']} ({nct_id})\n\n"
            output += f"**Phase:** {trial['phase']} | **Status:** {trial['status']} | **Enrollment:** {trial['enrollment']}\n"
            output += f"**Sponsor:** {trial['sponsor']}\n\n"
            output += summary
            return [types.TextContent(type="text", text=output)]

        else:
            return [types.TextContent(type="text", text=f"Unknown tool: {name}")]

    except Exception as e:
        return _error("TOOL_ERROR", f"Tool '{name}' failed: {str(e)}")


async def main():
    async with stdio_server() as (read_stream, write_stream):
        await server.run(read_stream, write_stream, server.create_initialization_options())


if __name__ == "__main__":
    asyncio.run(main())