Spaces:

Yogeshwarirj
/

medpanel_api

Sleeping

App Files Files Community

Yogeshwarirj commited on Feb 24

Commit

376d861

verified ·

1 Parent(s): 149dc8b

Update medpanel.py

Browse files

Files changed (1) hide show

medpanel.py +102 -131

medpanel.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # medpanel.py
-# This is the brain of MedPanel — all 4 agents, the orchestrator, and the RAG pipeline live here.
-# app.py just calls run_medpanel() at the bottom and handles the UI.
-# If something's broken, it's probably in this file.
 import os
 import json
@@ -16,59 +16,45 @@ from Bio import Entrez
 from PIL import Image
-# ── Config ───────────────────────────────────────────────────────────
 MODEL_ID = "google/medgemma-4b-it"
-# NCBI needs an email to use their API — doesn't have to be real, just has to be there
 Entrez.email = "medpanel@example.com"
-# ── Device Setup ─────────────────────────────────────────────────────
-# force everything onto one device — avoids the tensor shape mismatch error
-# that happens when accelerate tries to split layers across CPU and GPU
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"🖥️  Using device: {DEVICE}")
-# ── Model Loading ─────────────────────────────────────────────────────
 def load_models():
-    # we load everything once at startup and keep it in memory
-    # reloading per request would take 2+ minutes each time — not an option
     print("Loading MedGemma model...")
-    # processor handles both text tokenization and image preprocessing
-    # it's what turns "65yo male with cough" into tokens the model understands
     processor = AutoProcessor.from_pretrained(
         MODEL_ID,
         token=os.environ.get("HF_TOKEN")
     )
-    # float16 instead of bfloat16 — fits better on the T4 GPU HF Spaces gives us
-    # device_map={"": DEVICE} forces all layers to one device
-    # without this, accelerate splits layers across CPU/GPU and things break badly
     model = AutoModelForImageTextToText.from_pretrained(
         MODEL_ID,
-        torch_dtype=torch.float16,
-        device_map={"": DEVICE},
         token=os.environ.get("HF_TOKEN"),
-        low_cpu_mem_usage=True,
-        attn_implementation="eager"
     )
     model.eval()
-    # if pad_token isn't set, the model hits EOS immediately and generates 0 tokens
-    # this was the cause of the empty response bug — one line fix
-    if processor.tokenizer.pad_token is None:
-        processor.tokenizer.pad_token = processor.tokenizer.eos_token
-        processor.tokenizer.pad_token_id = processor.tokenizer.eos_token_id
-        print("✅ pad_token set to eos_token")
     print("✅ MedGemma loaded!")
-    # PubMedBERT for semantic search — regular sentence transformers don't know
-    # medical terminology well enough, this one was trained on PubMed abstracts
     print("Loading PubMed embedding model...")
     embed_model = SentenceTransformer("pritamdeka/S-PubMedBert-MS-MARCO")
     print("✅ Embedding model loaded!")
@@ -76,119 +62,97 @@ def load_models():
     return processor, model, embed_model
-# load once at import time — all agents share the same model instance
 processor, model, embed_model = load_models()
-# ── Core Model Caller ─────────────────────────────────────────────────
 def call_medgemma(prompt, image=None, max_tokens=400):
-    # every agent goes through this function — it's the single point of contact with MedGemma
-    # keeps things consistent and makes it easy to add logging or retry logic later
     messages = [
         {
             "role": "user",
             "content": [
                 {"type": "text", "text": prompt},
-                # only include the image block if there's actually an image
                 *([{"type": "image", "image": image}] if image else [])
             ]
         }
     ]
     inputs = processor.apply_chat_template(
         messages,
         add_generation_prompt=True,
         tokenize=True,
         return_dict=True,
         return_tensors="pt"
-    ).to(DEVICE)
-    # track input length so we can slice it off later
-    # much more reliable than trying to split on "model\n" which breaks constantly
-    input_len = inputs["input_ids"].shape[-1]
-    print(f"[MedGemma] Input tokens: {input_len}, max_new_tokens: {max_tokens}")
     with torch.no_grad():
         output_tokens = model.generate(
             **inputs,
             max_new_tokens=max_tokens,
-            do_sample=False,
-            # without these, the model sometimes stops after 0 tokens
-            pad_token_id=processor.tokenizer.eos_token_id,
-            eos_token_id=processor.tokenizer.eos_token_id,
-            # slight repetition penalty — stops the model looping on the same phrase
-            repetition_penalty=1.1,
         )
-    # slice off the input tokens — we only want what the model actually generated
-    new_tokens = output_tokens[0][input_len:]
-    print(f"[MedGemma] Generated {len(new_tokens)} new tokens")
-    response = processor.decode(new_tokens, skip_special_tokens=True).strip()
-    print(f"[MedGemma] Response ({len(response)} chars): {response[:120]}")
-    return response
 def safe_json(text):
-    # the model doesn't always return clean JSON
-    # sometimes it wraps it in markdown, sometimes it truncates mid-object,
-    # sometimes it just writes prose — this function handles all of that
-    # and always returns a dict, never raises an exception
-    if not text or not text.strip():
-        return {"raw_response": ""}
-    # strip markdown code fences if present — ```json ... ``` or ``` ... ```
     for fence_start, fence_end in [("```json", "```"), ("```", "```")]:
         if fence_start in text:
             text = text.split(fence_start)[1].split(fence_end)[0].strip()
             break
-    # try clean JSON first
     try:
         return json.loads(text)
     except json.JSONDecodeError:
         pass
-    # if the model got cut off mid-JSON, try to close the open brackets
-    # this saves a lot of orchestrator outputs that were just one } short
-    try:
-        open_count = text.count('{')
-        close_count = text.count('}')
-        if open_count > close_count:
-            recovered = text + ('}' * (open_count - close_count))
-            return json.loads(recovered)
-    except json.JSONDecodeError:
-        pass
-    # last resort — find any {...} block in the response and try to parse that
     json_match = re.search(r'\{.*\}', text, re.DOTALL)
     try:
         return json.loads(json_match.group()) if json_match else {"raw_response": text}
     except json.JSONDecodeError:
-        # give up and return the raw text so at least something shows up in the UI
         return {"raw_response": text}
 # ── PubMed RAG ───────────────────────────────────────────────────────
 def fetch_and_retrieve(query, top_k=3):
-    # searches PubMed and returns the most semantically relevant abstracts
-    # using keyword search here would miss too much — medical terminology is inconsistent
-    # so we embed the abstracts and do vector similarity instead
     try:
-        # get paper IDs from PubMed's search API
         handle = Entrez.esearch(db="pubmed", term=query, retmax=8)
         ids = Entrez.read(handle)["IdList"]
         if not ids:
             return []
-        # fetch the actual abstract text for those IDs
         handle = Entrez.efetch(
             db="pubmed",
             id=ids,
@@ -196,10 +160,8 @@ def fetch_and_retrieve(query, top_k=3):
             retmode="text"
         )
         raw_text = handle.read()
-        # PubMed returns everything as one big blob of text
-        # split on double newlines and filter out the short header/footer chunks
         abstracts = [
             chunk.strip()
             for chunk in raw_text.split("\n\n")
@@ -209,13 +171,12 @@ def fetch_and_retrieve(query, top_k=3):
         if not abstracts:
             return []
-        # embed all abstracts and build a FAISS index on the fly
-        # yes, we rebuild the index every time — it's fast enough and keeps things simple
         embeddings = embed_model.encode(abstracts)
         index = faiss.IndexFlatL2(embeddings.shape[1])
         index.add(np.array(embeddings))
-        # find the top_k abstracts closest to our query in embedding space
         query_embedding = embed_model.encode([query])
         _, best_indices = index.search(
             np.array(query_embedding),
@@ -225,26 +186,26 @@ def fetch_and_retrieve(query, top_k=3):
         return [abstracts[i] for i in best_indices[0]]
     except Exception as e:
-        # PubMed goes down sometimes, internet is flaky on HF Spaces
-        # just return empty and let the pipeline continue without evidence
         print(f"PubMed fetch failed for '{query}': {e}")
         return []
-# ── Agent 1: Radiologist ──────────────────────────────────────────────
 def radiologist_agent(image, notes):
-    # looks at the image and returns what it sees
-    # deliberately kept separate from the internist — we don't want them anchoring each other
     if not image:
-        # no image is fine — the internist and devil's advocate can still run
         return {
             "suspected_conditions": [],
             "note": "No image provided — skipping radiology analysis"
         }
-    # MedGemma needs RGB — grayscale X-rays need to be converted
     if image.mode != "RGB":
         image = image.convert("RGB")
@@ -261,11 +222,14 @@ Return only the JSON object, no extra explanation."""
     return safe_json(call_medgemma(prompt, image))
-# ── Agent 2: Internist ────────────────────────────────────────────────
 def internist_agent(notes):
-    # works from text only — never sees the image
-    # this is intentional: we want independent reasoning, not anchoring off the radiologist
     prompt = f"""You are an experienced internal medicine physician.
 Patient clinical notes: {notes}
@@ -279,37 +243,41 @@ Return only the JSON object, no extra explanation."""
     return safe_json(call_medgemma(prompt))
-# ── Agent 3: Evidence Reviewer ────────────────────────────────────────
 def evidence_agent(r1, r2):
-    # doesn't do any diagnosing — just fetches literature relevant to what the other agents found
-    # the goal is to ground the Devil's Advocate and Orchestrator in actual published research,
-    # not just what's in MedGemma's training data
-    # combine top suspects from both agents into search queries
     queries = (
         r1.get("suspected_conditions", [])[:2] +
         r2.get("differential_diagnoses", [])[:2]
     )
     evidence = []
     for query in queries:
         results = fetch_and_retrieve(str(query), top_k=2)
         evidence.extend(results)
-    # cap at 4 — more than that starts overflowing the prompt context window
     return evidence[:4]
-# ── Agent 4: Devil's Advocate ─────────────────────────────────────────
 def devils_advocate_agent(image, notes, r1, r2, evidence):
-    # this is the one that matters most
-    # its only job is to look at what everyone else concluded and find what they missed
-    # dangerous diagnoses, rare conditions, overlooked red flags
-    # it sees all the other agents' outputs — that's the point, it needs context to challenge them
-    # truncate evidence so we don't blow up the prompt
     evidence_snippet = "\n".join(evidence[:2]) if evidence else "None available"
     prompt = f"""You are a critical care specialist and patient safety advocate.
@@ -328,20 +296,20 @@ Return a JSON object with:
 - requires_human_review: true or false
 Return only the JSON object, no extra explanation."""
-    # give it the image too — it might catch something the radiologist missed
     if image and image.mode != "RGB":
         image = image.convert("RGB")
     return safe_json(call_medgemma(prompt, image))
-# ── Orchestrator ──────────────────────────────────────────────────────
 def orchestrator_agent(notes, r1, r2, evidence, devil):
-    # reads everything the other agents produced and makes the final call
-    # primary diagnosis, escalation decision, next steps, patient summary — all here
-    # gets 1000 tokens because it has the longest prompt and we learned the hard way
-    # that 400 tokens cuts off mid-JSON and produces a blank report
     prompt = f"""You are the lead physician synthesizing a multi-specialist panel review.
 RADIOLOGIST findings:
@@ -362,38 +330,41 @@ Synthesize everything into a final clinical report as a JSON object with:
 - patient_summary: 2-sentence plain English summary for the patient
 Return only the JSON object, no extra explanation."""
-    return safe_json(call_medgemma(prompt, max_tokens=1000))
-# ── Master Pipeline ───────────────────────────────────────────────────
 def run_medpanel(image, notes):
-    # runs all 5 agents in sequence and returns the full trace + final report
-    # this is the only function app.py needs to call
     trace = []
-    # radiologist first — image analysis, independent of everything else
     print("🩻 Running Radiologist agent...")
     r1 = radiologist_agent(image, notes)
     trace.append({"agent": "Radiologist", "output": r1})
-    # internist next — clinical notes only, never sees the image
     print("🩺 Running Internist agent...")
     r2 = internist_agent(notes)
     trace.append({"agent": "Internist", "output": r2})
-    # evidence reviewer — fetches PubMed literature based on what the first two found
     print("📚 Fetching PubMed evidence...")
     evidence = evidence_agent(r1, r2)
     trace.append({"agent": "Evidence Reviewer", "abstracts_retrieved": len(evidence)})
-    # devil's advocate — sees everything and tries to find what was missed
     print("😈 Running Devil's Advocate agent...")
     devil = devils_advocate_agent(image, notes, r1, r2, evidence)
     trace.append({"agent": "Devil's Advocate", "output": devil})
-    # orchestrator — synthesizes all 4 outputs into the final report
     print("🏥 Synthesizing final report...")
     final_report = orchestrator_agent(notes, r1, r2, evidence, devil)
     trace.append({"agent": "Orchestrator", "output": final_report})
@@ -401,6 +372,6 @@ def run_medpanel(image, notes):
     print("✅ MedPanel analysis complete!")
     return {
-        "panel_trace": trace,   # full agent-by-agent breakdown for the trace tab
-        "final_report": final_report  # what actually shows up in the report tab
     }

 # medpanel.py
+# Core logic for the MedPanel multi-agent diagnostic system.
+# This file contains all 4 agents + orchestrator + RAG pipeline.
+# Imported by app.py which runs the Gradio interface on HuggingFace Spaces.
 import os
 import json
 from PIL import Image
+# ── Model Configuration ──────────────────────────────────────────────
+# We load these once at startup so they're ready for every request
 MODEL_ID = "google/medgemma-4b-it"
+# NCBI requires an email for PubMed access — just for identification purposes
 Entrez.email = "medpanel@example.com"
+# ── Load Models ──────────────────────────────────────────────────────
 def load_models():
+    """
+    Loads MedGemma and the PubMed embedding model into memory.
+    Called once when the app starts up on HuggingFace Spaces.
+    Returns processor, model, and embed_model.
+    """
     print("Loading MedGemma model...")
+    # Load the processor — handles both text tokenization and image preprocessing
     processor = AutoProcessor.from_pretrained(
         MODEL_ID,
         token=os.environ.get("HF_TOKEN")
     )
+    # Load MedGemma in bfloat16 to fit within GPU memory limits
     model = AutoModelForImageTextToText.from_pretrained(
         MODEL_ID,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
         token=os.environ.get("HF_TOKEN"),
+         low_cpu_mem_usage=True,
+        attn_implementation="eager"
     )
     model.eval()
     print("✅ MedGemma loaded!")
+    # Load the PubMed-specific embedding model for semantic search
     print("Loading PubMed embedding model...")
     embed_model = SentenceTransformer("pritamdeka/S-PubMedBert-MS-MARCO")
     print("✅ Embedding model loaded!")
     return processor, model, embed_model
+# Initialize all models at module load time
 processor, model, embed_model = load_models()
+# ── Base Caller ──────────────────────────────────────────────────────
 def call_medgemma(prompt, image=None, max_tokens=400):
+    """
+    Sends a prompt (and optional image) to MedGemma and returns the response.
+    This is the single point of contact with the model for all agents.
+    """
+    # Build message in MedGemma's expected chat format
     messages = [
         {
             "role": "user",
             "content": [
                 {"type": "text", "text": prompt},
                 *([{"type": "image", "image": image}] if image else [])
             ]
         }
     ]
+    # Tokenize and move to the same device as the model
     inputs = processor.apply_chat_template(
         messages,
         add_generation_prompt=True,
         tokenize=True,
         return_dict=True,
         return_tensors="pt"
+    ).to(model.device)
+    # Generate response — no_grad saves memory, do_sample=False is deterministic
     with torch.no_grad():
         output_tokens = model.generate(
             **inputs,
             max_new_tokens=max_tokens,
+            do_sample=False
         )
+    # Decode and strip the echoed prompt — we only want the model's reply
+    full_response = processor.decode(output_tokens[0], skip_special_tokens=True)
+    return full_response.split("model\n")[-1].strip()
 def safe_json(text):
+    """
+    Safely extracts a JSON object from the model's response.
+    Handles markdown code fences, extra text, and malformed JSON.
+    Always returns a dict — never crashes.
+    """
+    # Strip markdown fences like ```json ... ``` if present
     for fence_start, fence_end in [("```json", "```"), ("```", "```")]:
         if fence_start in text:
             text = text.split(fence_start)[1].split(fence_end)[0].strip()
             break
+    # Try standard JSON parsing first
     try:
         return json.loads(text)
     except json.JSONDecodeError:
         pass
+    # Fall back to regex — find any { ... } block in the response
     json_match = re.search(r'\{.*\}', text, re.DOTALL)
     try:
         return json.loads(json_match.group()) if json_match else {"raw_response": text}
     except json.JSONDecodeError:
         return {"raw_response": text}
 # ── PubMed RAG ───────────────────────────────────────────────────────
 def fetch_and_retrieve(query, top_k=3):
+    """
+    Searches PubMed for relevant abstracts using the given query.
+    Uses FAISS + PubMedBERT embeddings to find the most semantically
+    similar abstracts rather than just keyword matching.
+    Returns a list of abstract strings.
+    """
     try:
+        # Search PubMed for matching paper IDs
         handle = Entrez.esearch(db="pubmed", term=query, retmax=8)
         ids = Entrez.read(handle)["IdList"]
         if not ids:
             return []
+        # Fetch the actual abstract text for those papers
         handle = Entrez.efetch(
             db="pubmed",
             id=ids,
             retmode="text"
         )
+        # Split the bulk text into individual abstracts, filter out short chunks
         raw_text = handle.read()
         abstracts = [
             chunk.strip()
             for chunk in raw_text.split("\n\n")
         if not abstracts:
             return []
+        # Build FAISS index from abstract embeddings
         embeddings = embed_model.encode(abstracts)
         index = faiss.IndexFlatL2(embeddings.shape[1])
         index.add(np.array(embeddings))
+        # Find the top_k most relevant abstracts for our query
         query_embedding = embed_model.encode([query])
         _, best_indices = index.search(
             np.array(query_embedding),
         return [abstracts[i] for i in best_indices[0]]
     except Exception as e:
+        # If PubMed is unavailable, return empty rather than crashing
         print(f"PubMed fetch failed for '{query}': {e}")
         return []
+# ── Agent 1: Radiologist ─────────────────────────────────────────────
 def radiologist_agent(image, notes):
+    """
+    Analyzes the medical image and returns structured radiology findings.
+    If no image is provided, returns a safe empty result.
+    """
     if not image:
         return {
             "suspected_conditions": [],
             "note": "No image provided — skipping radiology analysis"
         }
+    # Convert to RGB if the image is grayscale — MedGemma requires RGB
     if image.mode != "RGB":
         image = image.convert("RGB")
     return safe_json(call_medgemma(prompt, image))
+# ── Agent 2: Internist ───────────────────────────────────────────────
 def internist_agent(notes):
+    """
+    Analyzes clinical notes as an internal medicine physician.
+    Returns differential diagnoses, risk factors, and urgency level.
+    Works from text only — no image.
+    """
     prompt = f"""You are an experienced internal medicine physician.
 Patient clinical notes: {notes}
     return safe_json(call_medgemma(prompt))
+# ── Agent 3: Evidence Reviewer ───────────────────────────────────────
 def evidence_agent(r1, r2):
+    """
+    Fetches supporting medical literature from PubMed based on what
+    the Radiologist and Internist suspected.
+    Returns up to 4 relevant abstracts.
+    """
+    # Combine top conditions from both agents into search queries
     queries = (
         r1.get("suspected_conditions", [])[:2] +
         r2.get("differential_diagnoses", [])[:2]
     )
+    # Search PubMed for each condition and collect abstracts
     evidence = []
     for query in queries:
         results = fetch_and_retrieve(str(query), top_k=2)
         evidence.extend(results)
+    # Cap at 4 to avoid overflowing the model's context window
     return evidence[:4]
+# ── Agent 4: Devil's Advocate ────────────────────────────────────────
 def devils_advocate_agent(image, notes, r1, r2, evidence):
+    """
+    Adversarial agent that challenges the other agents' conclusions.
+    Specifically looks for dangerous diagnoses that were missed.
+    This is the agent that catches TB when base MedGemma misses it.
+    """
+    # Short evidence snippet so we don't overflow the prompt
     evidence_snippet = "\n".join(evidence[:2]) if evidence else "None available"
     prompt = f"""You are a critical care specialist and patient safety advocate.
 - requires_human_review: true or false
 Return only the JSON object, no extra explanation."""
+    # Pass image if available so the devil's advocate can see it too
     if image and image.mode != "RGB":
         image = image.convert("RGB")
     return safe_json(call_medgemma(prompt, image))
+# ── Orchestrator ─────────────────────────────────────────────────────
 def orchestrator_agent(notes, r1, r2, evidence, devil):
+    """
+    Synthesizes all four agents' outputs into a single final report.
+    Decides on the primary diagnosis, confidence, escalation, and next steps.
+    """
     prompt = f"""You are the lead physician synthesizing a multi-specialist panel review.
 RADIOLOGIST findings:
 - patient_summary: 2-sentence plain English summary for the patient
 Return only the JSON object, no extra explanation."""
+    return safe_json(call_medgemma(prompt))
+# ── Master Pipeline ──────────────────────────────────────────────────
 def run_medpanel(image, notes):
+    """
+    Runs the full MedPanel multi-agent pipeline.
+    Accepts a PIL image (or None) and a string of clinical notes.
+    Returns a dict with panel_trace (each agent's output) and final_report.
+    """
     trace = []
+    # Step 1: Radiologist — analyze the image
     print("🩻 Running Radiologist agent...")
     r1 = radiologist_agent(image, notes)
     trace.append({"agent": "Radiologist", "output": r1})
+    # Step 2: Internist — analyze the clinical notes
     print("🩺 Running Internist agent...")
     r2 = internist_agent(notes)
     trace.append({"agent": "Internist", "output": r2})
+    # Step 3: Evidence Reviewer — fetch PubMed literature
     print("📚 Fetching PubMed evidence...")
     evidence = evidence_agent(r1, r2)
     trace.append({"agent": "Evidence Reviewer", "abstracts_retrieved": len(evidence)})
+    # Step 4: Devil's Advocate — challenge the findings
     print("😈 Running Devil's Advocate agent...")
     devil = devils_advocate_agent(image, notes, r1, r2, evidence)
     trace.append({"agent": "Devil's Advocate", "output": devil})
+    # Step 5: Orchestrator — synthesize the final report
     print("🏥 Synthesizing final report...")
     final_report = orchestrator_agent(notes, r1, r2, evidence, devil)
     trace.append({"agent": "Orchestrator", "output": final_report})
     print("✅ MedPanel analysis complete!")
     return {
+        "panel_trace": trace,
+        "final_report": final_report
     }