Spaces:

saad003
/

rad-retrieval-api

Sleeping

App Files Files Community

saad003 commited on 12 days ago

Commit

2abe25a

verified ·

1 Parent(s): e3ac39c

Update app.py

Browse files

Files changed (1) hide show

app.py +188 -270

app.py CHANGED Viewed

@@ -1,9 +1,6 @@
 # app.py
 import io
 import os
-import random
-import re
-from typing import Dict
 import faiss
 import torch
@@ -15,36 +12,32 @@ from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 from huggingface_hub import hf_hub_download
-from transformers import (
-    CLIPProcessor,
-    CLIPModel,
-    AutoTokenizer,
-    AutoModelForSeq2SeqLM,
-)
-# ---------- FastAPI app ----------
 app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],  # later restrict to your frontend domain
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
-# ---------- Config ----------
-EMBED_REPO_ID = "saad003/Red01"      # FAISS + metadata
-IMAGE_REPO_ID = "saad003/images04"   # images04 with test/valid/train01–train07
 BASE_IMAGE_URL = f"https://huggingface.co/datasets/{IMAGE_REPO_ID}/resolve/main"
 HF_TOKEN = os.environ.get("HF_TOKEN")
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print("Using device:", device)
-# ---------- Download index + metadata ----------
 print("Downloading FAISS index & metadata from Hugging Face...")
 INDEX_PATH = hf_hub_download(
@@ -66,173 +59,147 @@ index = faiss.read_index(INDEX_PATH)
 print("Loading metadata CSV...")
 metadata = pd.read_csv(META_PATH)
 assert index.ntotal == len(metadata), "Index size and metadata rows mismatch!"
-# ---------- Load CLIP (retrieval) ----------
 print("Loading PubMedCLIP model for retrieval...")
 CLIP_MODEL_NAME = "flaviagiammarino/pubmed-clip-vit-base-patch32"
 clip_model = CLIPModel.from_pretrained(CLIP_MODEL_NAME).to(device)
 clip_processor = CLIPProcessor.from_pretrained(CLIP_MODEL_NAME)
 clip_model.eval()
-# ---------- Load FLAN-T5 (caption synthesis) ----------
-print("Loading FLAN-T5 for diagnosis synthesis from similar captions...")
-REFINER_MODEL_ID = "google/flan-t5-base"
-refiner_tokenizer = AutoTokenizer.from_pretrained(REFINER_MODEL_ID)
-refiner_model = AutoModelForSeq2SeqLM.from_pretrained(
-    REFINER_MODEL_ID
-).to(device)
-refiner_model.eval()
-print("Backend ready ✅")
-# ---------- Helper: image path mapping ----------
-def id_to_image_url(image_id: str) -> str:
-    """
-    Map ROCO image IDs to folders in saad003/images04.
-    test  -> test/
-    valid -> valid/
-    train -> train01 ... train07 based on numeric ID
-    """
-    image_id = image_id.strip()
-    base = BASE_IMAGE_URL
-    if "_test_" in image_id:
         folder = "test"
-    elif "_valid_" in image_id:
         folder = "valid"
-    elif "_train_" in image_id:
-        num_str = image_id.split("_")[-1]
         try:
-            n = int(num_str)
-        except ValueError:
-            n = 0
-        if 1 <= n <= 9000:
             folder = "train01"
-        elif 9001 <= n <= 18000:
-            folder = "train02"
-        elif 18001 <= n <= 27000:
-            folder = "train03"
-        elif 27001 <= n <= 36000:
-            folder = "train04"
-        elif 36001 <= n <= 45000:
-            folder = "train05"
-        elif 45001 <= n <= 54000:
-            folder = "train06"
         else:
-            folder = "train07"
-    else:
-        folder = ""
-    if folder:
-        return f"{base}/{folder}/{image_id}.jpg"
-    else:
-        return f"{base}/{image_id}.jpg"
-# ---------- Helper: modality detection ----------
-MODALITY_KEYWORDS = {
-    "CT": [
-        "ct ",
-        "ctscan",
-        "ct scan",
-        "computed tomography",
-        "tomography",
-        "non-contrast ct",
-        "contrast-enhanced ct",
-    ],
-    "MRI": [
-        "mri ",
-        "magnetic resonance",
-        "t1-weighted",
-        "t2-weighted",
-        "flair sequence",
-        "diffusion-weighted",
-        "dwi",
-    ],
-    "X-ray": [
-        "x-ray",
-        "x ray",
-        "radiograph",
-        "plain film",
-        "chest film",
-        "postoperative x",
-        "post-operative x",
-        "cxr",
-    ],
-    "Ultrasound": [
-        "ultrasound",
-        "sonogram",
-        "sonography",
-        "usg",
-        "doppler",
-        "echocardiogram",
-        "echocardiography",
-    ],
-    "PET/CT": [
-        "pet-ct",
-        "pet/ct",
-        "pet scan",
-        "positron emission tomography",
-    ],
-    "Fluoroscopy": [
-        "fluoroscopy",
-        "fluoroscopic",
-        "angiogram",
-        "angiography",
-        "barium swallow",
-        "barium enema",
-    ],
-}
-def detect_modality(caption: str) -> str:
-    if not caption:
-        return "Unknown"
-    text = caption.lower()
-    for modality, keywords in MODALITY_KEYWORDS.items():
-        for kw in keywords:
-            if kw in text:
-                return modality
-    if "mra" in text:
-        return "MRI"
-    if "cta " in text or "ct angiography" in text:
         return "CT"
     return "Unknown"
-# ---------- Helper: random scoring ----------
-def generate_random_scores() -> Dict[str, float]:
-    rng = random.Random()
-    modality_score = rng.uniform(85.0, 93.0)   # percent
-    cui_at_k = rng.uniform(0.30, 0.61)
-    bert = rng.uniform(0.20, 0.40)
-    medbert = rng.uniform(0.20, 0.35)
-    return {
-        "modality_score": round(modality_score, 1),
-        "cui_at_k": round(cui_at_k, 3),
-        "bertscore": round(bert, 3),
-        "medbertscore": round(medbert, 3),
-    }
-# ---------- Helper: FAISS search ----------
 def search_similar_by_image(image: Image.Image, k: int = 5) -> pd.DataFrame:
     """
-    Encode query image with CLIP, search FAISS,
-    filter out self-match, and return top-k results.
     """
     inputs = clip_processor(images=image, return_tensors="pt").to(device)
     with torch.no_grad():
@@ -241,167 +208,118 @@ def search_similar_by_image(image: Image.Image, k: int = 5) -> pd.DataFrame:
     feats = feats / feats.norm(p=2, dim=-1, keepdim=True)
     feats = feats.cpu().numpy().astype("float32")
-    search_k = min(index.ntotal, k + 5)
-    D, I = index.search(feats, search_k)
     rows = metadata.iloc[I[0]].copy()
     rows["score"] = D[0]
-    # drop exact self-match
-    rows = rows[rows["score"] < 0.999].copy()
-    rows["image_url"] = rows["ID"].apply(id_to_image_url)
-    rows = rows.sort_values("score", ascending=False).head(k)
-    if "concepts_manual" not in rows.columns:
-        rows["concepts_manual"] = ""
-    return rows[["ID", "caption", "concepts_manual", "score", "image_url"]]
-# ---------- Helper: caption cleaning & synthesis ----------
-def clean_caption(text: str) -> str:
-    """
-    Clean generated caption:
-    - strip
-    - remove obvious prompt leftovers
-    - ensure single sentence, nice punctuation
-    """
-    if not text:
-        return ""
-    text = text.strip()
-    # Drop any leading instruction-like fragments
-    text = re.sub(
-        r"^(you are an expert radiologist[:,]?\s*)",
-        "",
-        text,
-        flags=re.IGNORECASE,
     )
-    text = re.sub(
-        r"(findings? from similar radiology cases[:,]?\s*)",
-        "",
-        text,
-        flags=re.IGNORECASE,
-    )
-    # Replace multiple separators
-    text = text.replace(" ;", ";")
-    text = re.sub(r"\s+[,;]\s*", ", ", text)
-    # Collapse spaces
-    text = " ".join(text.split())
-    # If there are multiple sentences, keep only the first one
-    parts = re.split(r"(?<=[.!?])\s+", text)
-    if parts:
-        text = parts[0]
-    # Ensure period
-    if text and not text.endswith((".", "!", "?")):
-        text += "."
-    # Capitalize first letter
-    if text:
-        text = text[0].upper() + text[1:]
-    return text
-def synthesize_caption_from_similar_captions(captions: list[str]) -> str:
     """
-    Use FLAN-T5 to create a diagnosis sentence from captions of similar images.
     """
-    captions = [c.strip() for c in captions if c and isinstance(c, str)]
-    if not captions:
-        return ""
-    # Use at most 5-6 captions to keep prompt short
-    caps = captions[:6]
-    numbered = "\n".join(
-        f"{i+1}) {c}" for i, c in enumerate(caps)
-    )
     prompt = (
-        "Radiology findings from similar cases:\n"
-        f"{numbered}\n\n"
-        "Based on these, write ONE concise radiology impression sentence "
-        "describing the most likely diagnosis and key findings for the "
-        "current image. Do not mention numbers or 'similar cases'."
     )
-    inputs = refiner_tokenizer(
-        prompt,
         return_tensors="pt",
-        truncation=True,
-        max_length=512,
-    ).to(device)
     with torch.no_grad():
-        out_ids = refiner_model.generate(
             **inputs,
-            max_new_tokens=48,
-            num_beams=4,
-            length_penalty=0.9,
-            no_repeat_ngram_size=4,
         )
-    raw = refiner_tokenizer.decode(out_ids[0], skip_special_tokens=True)
-    return clean_caption(raw)
-# ---------- Routes ----------
 @app.get("/")
 def root():
-    return {
-        "status": "ok",
-        "message": "Radiology retrieval + FLAN-T5 synthesis from similar captions",
-    }
 @app.post("/search_by_image")
 async def search_by_image(file: UploadFile = File(...), k: int = 5):
     """
-    Upload a radiology image.
-    Returns:
-      - query_caption: synthesized diagnosis from captions of similar images
-      - modality: detected imaging modality
-      - scores: random quality metrics
-      - results: similar images (similarity + concepts + image_url)
     """
     content = await file.read()
     image = Image.open(io.BytesIO(content)).convert("RGB")
-    k = int(k)
     # 1) Retrieval
     results_df = search_similar_by_image(image, k=k)
     results = results_df.to_dict(orient="records")
-    # 2) Synthesize caption only from similar image captions
-    similar_caps_list = results_df["caption"].astype(str).tolist()
     try:
-        final_caption = synthesize_caption_from_similar_captions(
-            similar_caps_list
-        )
     except Exception as e:
-        print("Error synthesizing caption:", e)
-        final_caption = ""
-    # 3) Modality & scores
-    modality = detect_modality(final_caption or "")
-    scores = generate_random_scores()
     return JSONResponse(
         {
-            "query_caption": final_caption,
             "modality": modality,
-            "scores": scores,
             "results": results,
         }
     )

 # app.py
 import io
 import os
 import faiss
 import torch
 from fastapi.responses import JSONResponse
 from huggingface_hub import hf_hub_download
+from transformers import CLIPProcessor, CLIPModel
+from transformers import AutoProcessor, AutoModelForVision2Seq
+from peft import PeftConfig, PeftModel
+# ---------------- FastAPI app ----------------
 app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
+# ---------------- Config ----------------
+# FAISS index + metadata
+EMBED_REPO_ID = "saad003/Red01"
+# All radiology images (with test / valid / train01..07 folders)
+IMAGE_REPO_ID = "saad003/images04"
 BASE_IMAGE_URL = f"https://huggingface.co/datasets/{IMAGE_REPO_ID}/resolve/main"
 HF_TOKEN = os.environ.get("HF_TOKEN")
+# ---------------- Download index + metadata ----------------
 print("Downloading FAISS index & metadata from Hugging Face...")
 INDEX_PATH = hf_hub_download(
 print("Loading metadata CSV...")
 metadata = pd.read_csv(META_PATH)
+# Sanity-check sizes
 assert index.ntotal == len(metadata), "Index size and metadata rows mismatch!"
+# ---------------- CLIP retrieval model ----------------
 print("Loading PubMedCLIP model for retrieval...")
 CLIP_MODEL_NAME = "flaviagiammarino/pubmed-clip-vit-base-patch32"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print("Using device:", device)
 clip_model = CLIPModel.from_pretrained(CLIP_MODEL_NAME).to(device)
 clip_processor = CLIPProcessor.from_pretrained(CLIP_MODEL_NAME)
 clip_model.eval()
+# ---------------- Med-BLIP-2 captioning model ----------------
+# This is a BLIP-2 model fine-tuned on ROCO via QLoRA
+print("Loading Med-BLIP-2 captioning model...")
+CAPTION_ADAPTER_ID = "NouRed/Med-BLIP-2-QLoRA-ROCO"
+peft_config = PeftConfig.from_pretrained(CAPTION_ADAPTER_ID)
+BASE_CAPTION_MODEL = peft_config.base_model_name_or_path  # should be Salesforce/blip2-opt-2.7b
+caption_processor = AutoProcessor.from_pretrained(BASE_CAPTION_MODEL)
+dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+base_caption_model = AutoModelForVision2Seq.from_pretrained(
+    BASE_CAPTION_MODEL,
+    torch_dtype=dtype,
+)
+caption_model = PeftModel.from_pretrained(
+    base_caption_model,
+    CAPTION_ADAPTER_ID,
+)
+caption_model.to(device)
+caption_model.eval()
+print("Backend ready ✅")
+# ---------------- Helper: build image URL ----------------
+def id_to_image_url(image_id: str, split: str) -> str:
+    """
+    Map ROCO ID + split to the correct folder in saad003/images04.
+    Folders:
+      - test/...
+      - valid/...
+      - train01/ .. train07/  (train images split by numeric range)
+    """
+    if split == "test":
         folder = "test"
+    elif split == "valid":
         folder = "valid"
+    else:
+        # train split, we route to train01..train07 based on ID number
+        # Example ID: ROCOv2_2023_train_036004  -> num = 36004
         try:
+            num_str = image_id.split("_")[-1]
+            num = int(num_str)
+        except Exception:
+            # fallback, just put in train01
             folder = "train01"
         else:
+            # Roughly 9k images per shard, based on how you uploaded them
+            if num <= 9000:
+                folder = "train01"
+            elif num <= 18000:
+                folder = "train02"
+            elif num <= 27000:
+                folder = "train03"
+            elif num <= 36000:
+                folder = "train04"
+            elif num <= 45000:
+                folder = "train05"
+            elif num <= 54000:
+                folder = "train06"
+            else:
+                folder = "train07"
+    return f"{BASE_IMAGE_URL}/{folder}/{image_id}.jpg"
+# ---------------- Helper: modality detection ----------------
+def infer_modality_from_text(text: str) -> str:
+    """
+    Simple keyword-based modality detection from the generated caption.
+    Tries to be generous with synonyms.
+    """
+    t = text.lower()
+    ct_keywords = [
+        "ct scan", "computed tomography", "ct of the", "ct angiography",
+        "cta", "contrast-enhanced ct", "non-contrast ct", "non contrast ct",
+    ]
+    mri_keywords = [
+        "mri", "mr imaging", "magnetic resonance",
+        "t1-weighted", "t2-weighted", "flair sequence", "diffusion-weighted imaging",
+    ]
+    xray_keywords = [
+        "x-ray", "x ray", "radiograph", "plain film",
+        "chest film", "chest xray", "chest x-ray", "anteroposterior", "posteroanterior",
+    ]
+    ultrasound_keywords = [
+        "ultrasound", "sonography", "sonogram", "echogenic", "doppler",
+    ]
+    nuclear_keywords = [
+        "pet-ct", "pet ct", "pet/ct", "spect", "nuclear medicine", "scintigraphy",
+    ]
+    mammo_keywords = [
+        "mammogram", "mammography", "craniocaudal", "mediolateral oblique",
+    ]
+    def has_any(keys):
+        return any(k in t for k in keys)
+    if has_any(ct_keywords):
         return "CT"
+    if has_any(mri_keywords):
+        return "MRI"
+    if has_any(xray_keywords):
+        return "X-ray"
+    if has_any(ultrasound_keywords):
+        return "Ultrasound"
+    if has_any(nuclear_keywords):
+        return "Nuclear medicine / PET"
+    if has_any(mammo_keywords):
+        return "Mammography"
     return "Unknown"
+# ---------------- Helper: FAISS retrieval ----------------
 def search_similar_by_image(image: Image.Image, k: int = 5) -> pd.DataFrame:
     """
+    Encode query image with PubMedCLIP, search FAISS, return DataFrame with:
+      ID, split, caption, concepts_manual, score, image_url
+    Also removes the *exact* self-match (score very close to 1.0)
+    so the query image is not shown again in the similar-images list.
     """
     inputs = clip_processor(images=image, return_tensors="pt").to(device)
     with torch.no_grad():
     feats = feats / feats.norm(p=2, dim=-1, keepdim=True)
     feats = feats.cpu().numpy().astype("float32")
+    D, I = index.search(feats, k + 1)  # search a bit more so we can drop the self-match
     rows = metadata.iloc[I[0]].copy()
     rows["score"] = D[0]
+    # Drop suspected identical match (usually score == 1.0)
+    rows = rows[rows["score"] < 0.9999]
+    # Limit to requested top-k after filtering
+    rows = rows.head(k)
+    # Add image URLs
+    rows["image_url"] = rows.apply(
+        lambda r: id_to_image_url(str(r["ID"]), str(r["split"])), axis=1
     )
+    # Keep only what we actually need
+    return rows[["ID", "split", "caption", "concepts_manual", "score", "image_url"]]
+# ---------------- Helper: BLIP-2 caption using similar captions ----------------
+def generate_query_caption(image: Image.Image, similar_captions=None) -> str:
     """
+    Use Med-BLIP-2 to generate a diagnosis-style caption.
+    We condition the text prompt on captions from top-k similar images.
     """
+    similar_captions = similar_captions or []
+    # Take at most 3 similar captions and truncate each a bit so the prompt doesn't explode
+    cleaned_similar = []
+    for cap in similar_captions[:3]:
+        cap = str(cap).strip()
+        if len(cap) > 260:
+            cap = cap[:260] + "..."
+        cleaned_similar.append(cap)
+    similar_block = ""
+    if cleaned_similar:
+        joined = " || ".join(cleaned_similar)
+        similar_block = f" Findings from similar radiology cases: {joined}"
     prompt = (
+        "You are an expert radiologist. Based only on the image and the findings below, "
+        "write a concise diagnostic summary in 2–3 short sentences. "
+        "Use precise medical terminology and avoid repeating words or phrases."
+        + similar_block
     )
+    inputs = caption_processor(
+        images=image,
+        text=prompt,
         return_tensors="pt",
+    ).to(device, dtype)
     with torch.no_grad():
+        generated_ids = caption_model.generate(
             **inputs,
+            max_new_tokens=96,
+            num_beams=3,
+            do_sample=False,
+            repetition_penalty=1.25,
+            no_repeat_ngram_size=3,
         )
+    caption = caption_processor.batch_decode(
+        generated_ids, skip_special_tokens=True
+    )[0]
+    return caption.strip()
+# ---------------- Routes ----------------
 @app.get("/")
 def root():
+    return {"status": "ok", "message": "Radiology retrieval + Med-BLIP-2 captioning API"}
 @app.post("/search_by_image")
 async def search_by_image(file: UploadFile = File(...), k: int = 5):
     """
+    Request:
+      - file: uploaded radiology image
+      - k: number of similar images
+    Response:
+      - query_caption: Med-BLIP-2 diagnosis summary for the query
+      - modality: inferred imaging modality
+      - results: list of similar images with their captions, concepts, score, image_url
     """
     content = await file.read()
     image = Image.open(io.BytesIO(content)).convert("RGB")
     # 1) Retrieval
     results_df = search_similar_by_image(image, k=k)
     results = results_df.to_dict(orient="records")
+    # 2) Use captions of similar images as extra context
+    similar_caps_for_prompt = results_df["caption"].tolist()
+    # 3) Captioning for the query image
     try:
+        query_caption = generate_query_caption(image, similar_caps_for_prompt)
     except Exception as e:
+        print("Error generating caption:", e)
+        query_caption = ""
+    # 4) Modality inference from the generated caption
+    modality = infer_modality_from_text(query_caption)
     return JSONResponse(
         {
+            "query_caption": query_caption,
             "modality": modality,
             "results": results,
         }
     )