Spaces:

saad003
/

rad-retrieval-api

Paused

App Files Files Community

saad003 commited on 10 days ago

Commit

602ea6a

verified ·

1 Parent(s): 4f357eb

Update app.py

Browse files

Files changed (1) hide show

app.py +100 -45

app.py CHANGED Viewed

@@ -20,6 +20,8 @@ from transformers import (
     CLIPModel,
     BlipForConditionalGeneration,
     AutoProcessor,
 )
 # ---------- FastAPI app ----------
@@ -35,21 +37,16 @@ app.add_middleware(
 # ---------- Config ----------
-# Dataset with FAISS index + radiology_metadata.csv
-EMBED_REPO_ID = "saad003/Red01"
-# Dataset with all radiology images (test, valid, train01–train07)
-IMAGE_REPO_ID = "saad003/images04"
 BASE_IMAGE_URL = f"https://huggingface.co/datasets/{IMAGE_REPO_ID}/resolve/main"
-# Optional: token if Red01 is private
 HF_TOKEN = os.environ.get("HF_TOKEN")
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print("Using device:", device)
-# use fp16 on GPU to speed up BLIP, fp32 on CPU
-caption_dtype = torch.float16 if device == "cuda" else torch.float32
 # ---------- Download index + metadata ----------
 print("Downloading FAISS index & metadata from Hugging Face...")
@@ -73,7 +70,6 @@ index = faiss.read_index(INDEX_PATH)
 print("Loading metadata CSV...")
 metadata = pd.read_csv(META_PATH)
 assert index.ntotal == len(metadata), "Index size and metadata rows mismatch!"
 # ---------- Load CLIP (retrieval) ----------
@@ -84,17 +80,27 @@ clip_model = CLIPModel.from_pretrained(CLIP_MODEL_NAME).to(device)
 clip_processor = CLIPProcessor.from_pretrained(CLIP_MODEL_NAME)
 clip_model.eval()
-# ---------- Load BLIP (radiology captioning) ----------
 print("Loading BLIP ROCO radiology captioning model...")
 CAPTION_MODEL_ID = "WafaaFraih/blip-roco-radiology-captioning"
 caption_processor = AutoProcessor.from_pretrained(CAPTION_MODEL_ID)
 caption_model = BlipForConditionalGeneration.from_pretrained(
     CAPTION_MODEL_ID,
-    torch_dtype=caption_dtype,
 ).to(device)
 caption_model.eval()
 print("Backend ready ✅")
@@ -142,7 +148,6 @@ def id_to_image_url(image_id: str) -> str:
     if folder:
         return f"{base}/{folder}/{image_id}.jpg"
     else:
-        # fallback – should not happen, but safe
         return f"{base}/{image_id}.jpg"
@@ -152,9 +157,9 @@ MODALITY_KEYWORDS = {
     "CT": [
         "ct ",
         "ctscan",
         "computed tomography",
         "tomography",
-        "ct scan",
         "non-contrast ct",
         "contrast-enhanced ct",
     ],
@@ -222,9 +227,6 @@ def detect_modality(caption: str) -> str:
 # ---------- Helper: random scoring ----------
 def generate_random_scores() -> Dict[str, float]:
-    """
-    Return random scores in the ranges you specified.
-    """
     rng = random.Random()
     modality_score = rng.uniform(85.0, 93.0)   # percent
@@ -272,11 +274,11 @@ def search_similar_by_image(image: Image.Image, k: int = 5) -> pd.DataFrame:
     return rows[["ID", "caption", "concepts_manual", "score", "image_url"]]
-# ---------- Helper: caption cleaning & generation ----------
 def clean_caption(text: str) -> str:
     """
-    Clean BLIP captions:
     - strip
     - split into clauses and remove duplicates
     - normalize spacing and punctuation
@@ -286,7 +288,6 @@ def clean_caption(text: str) -> str:
     text = text.strip()
-    # break into clauses
     parts = re.split(r"[,.]", text)
     parts = [p.strip() for p in parts if p.strip()]
@@ -298,12 +299,11 @@ def clean_caption(text: str) -> str:
             seen.add(key)
             unique_parts.append(p)
-    if not unique_parts:
-        cleaned = text
-    else:
         cleaned = ", ".join(unique_parts)
-    # remove repeated 'respectively'
     cleaned = re.sub(
         r"(respectively,?\s+)+", "respectively ", cleaned, flags=re.IGNORECASE
     )
@@ -311,19 +311,18 @@ def clean_caption(text: str) -> str:
     cleaned = " ".join(cleaned.split())
     if cleaned and not cleaned.endswith("."):
         cleaned += "."
-    cleaned = cleaned[0].upper() + cleaned[1:] if cleaned else cleaned
     return cleaned
-def generate_query_caption(image: Image.Image) -> str:
     """
-    Generate a radiology caption using BLIP (ROCO).
-    Tuned decoding to reduce repetition and keep it concise.
     """
     inputs = caption_processor(images=image, return_tensors="pt").to(
-        device, dtype=caption_dtype
     )
     with torch.no_grad():
         out_ids = caption_model.generate(
             **inputs,
@@ -334,11 +333,53 @@ def generate_query_caption(image: Image.Image) -> str:
             length_penalty=0.9,
             early_stopping=True,
         )
-    raw_caption = caption_processor.batch_decode(
-        out_ids, skip_special_tokens=True
-    )[0]
-    return clean_caption(raw_caption)
 # ---------- Routes ----------
@@ -347,7 +388,7 @@ def generate_query_caption(image: Image.Image) -> str:
 def root():
     return {
         "status": "ok",
-        "message": "Radiology retrieval + BLIP radiology captioning API",
     }
@@ -356,33 +397,47 @@ async def search_by_image(file: UploadFile = File(...), k: int = 5):
     """
     Upload a radiology image.
     Returns:
-      - query_caption: BLIP caption for the query image
-      - modality: detected imaging modality from caption
       - scores: random quality metrics
       - results: similar images (similarity + concepts + image_url)
     """
     content = await file.read()
     image = Image.open(io.BytesIO(content)).convert("RGB")
-    # Retrieval
-    results_df = search_similar_by_image(image, k=int(k))
     results = results_df.to_dict(orient="records")
-    # Caption + modality
     try:
-        query_caption = generate_query_caption(image)
     except Exception as e:
-        print("Error generating caption:", e)
-        query_caption = None
-    modality = detect_modality(query_caption or "")
-    # Random scores
     scores = generate_random_scores()
     return JSONResponse(
         {
-            "query_caption": query_caption,
             "modality": modality,
             "scores": scores,
             "results": results,

     CLIPModel,
     BlipForConditionalGeneration,
     AutoProcessor,
+    AutoTokenizer,
+    AutoModelForSeq2SeqLM,
 )
 # ---------- FastAPI app ----------
 # ---------- Config ----------
+EMBED_REPO_ID = "saad003/Red01"      # FAISS + metadata
+IMAGE_REPO_ID = "saad003/images04"   # images04 with test/valid/train01–train07
 BASE_IMAGE_URL = f"https://huggingface.co/datasets/{IMAGE_REPO_ID}/resolve/main"
 HF_TOKEN = os.environ.get("HF_TOKEN")
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print("Using device:", device)
+cap_dtype = torch.float16 if device == "cuda" else torch.float32
 # ---------- Download index + metadata ----------
 print("Downloading FAISS index & metadata from Hugging Face...")
 print("Loading metadata CSV...")
 metadata = pd.read_csv(META_PATH)
 assert index.ntotal == len(metadata), "Index size and metadata rows mismatch!"
 # ---------- Load CLIP (retrieval) ----------
 clip_processor = CLIPProcessor.from_pretrained(CLIP_MODEL_NAME)
 clip_model.eval()
+# ---------- Load BLIP (image -> draft caption) ----------
 print("Loading BLIP ROCO radiology captioning model...")
 CAPTION_MODEL_ID = "WafaaFraih/blip-roco-radiology-captioning"
 caption_processor = AutoProcessor.from_pretrained(CAPTION_MODEL_ID)
 caption_model = BlipForConditionalGeneration.from_pretrained(
     CAPTION_MODEL_ID,
+    torch_dtype=cap_dtype,
 ).to(device)
 caption_model.eval()
+# ---------- Load FLAN-T5 (text refinement using similar captions) ----------
+print("Loading FLAN-T5 for caption refinement...")
+REFINER_MODEL_ID = "google/flan-t5-base"
+refiner_tokenizer = AutoTokenizer.from_pretrained(REFINER_MODEL_ID)
+refiner_model = AutoModelForSeq2SeqLM.from_pretrained(
+    REFINER_MODEL_ID
+).to(device)
+refiner_model.eval()
 print("Backend ready ✅")
     if folder:
         return f"{base}/{folder}/{image_id}.jpg"
     else:
         return f"{base}/{image_id}.jpg"
     "CT": [
         "ct ",
         "ctscan",
+        "ct scan",
         "computed tomography",
         "tomography",
         "non-contrast ct",
         "contrast-enhanced ct",
     ],
 # ---------- Helper: random scoring ----------
 def generate_random_scores() -> Dict[str, float]:
     rng = random.Random()
     modality_score = rng.uniform(85.0, 93.0)   # percent
     return rows[["ID", "caption", "concepts_manual", "score", "image_url"]]
+# ---------- Caption cleaning & generation ----------
 def clean_caption(text: str) -> str:
     """
+    Clean captions:
     - strip
     - split into clauses and remove duplicates
     - normalize spacing and punctuation
     text = text.strip()
     parts = re.split(r"[,.]", text)
     parts = [p.strip() for p in parts if p.strip()]
             seen.add(key)
             unique_parts.append(p)
+    if unique_parts:
         cleaned = ", ".join(unique_parts)
+    else:
+        cleaned = text
     cleaned = re.sub(
         r"(respectively,?\s+)+", "respectively ", cleaned, flags=re.IGNORECASE
     )
     cleaned = " ".join(cleaned.split())
     if cleaned and not cleaned.endswith("."):
         cleaned += "."
+    if cleaned:
+        cleaned = cleaned[0].upper() + cleaned[1:]
     return cleaned
+def generate_draft_caption(image: Image.Image) -> str:
     """
+    Draft caption directly from image using BLIP.
     """
     inputs = caption_processor(images=image, return_tensors="pt").to(
+        device, dtype=cap_dtype
     )
     with torch.no_grad():
         out_ids = caption_model.generate(
             **inputs,
             length_penalty=0.9,
             early_stopping=True,
         )
+    raw = caption_processor.batch_decode(out_ids, skip_special_tokens=True)[0]
+    return clean_caption(raw)
+def refine_caption_with_similar_cases(
+    draft_caption: str,
+    similar_captions: str,
+) -> str:
+    """
+    Use FLAN-T5 to rewrite a final diagnosis sentence based on:
+    - draft caption from BLIP (current image)
+    - captions from similar images
+    """
+    if not draft_caption:
+        draft_caption = "No draft description available."
+    if not similar_captions:
+        # nothing to refine with; just return draft
+        return draft_caption
+    prompt = (
+        "You are an expert radiologist.\n\n"
+        "Draft findings from the current image:\n"
+        f"{draft_caption}\n\n"
+        "Findings from similar radiology cases:\n"
+        f"{similar_captions}\n\n"
+        "Based on all of this, write ONE concise radiology impression "
+        "sentence describing the most probable diagnosis and key findings "
+        "for the current image. Do not mention 'similar cases' or 'draft'."
+    )
+    inputs = refiner_tokenizer(
+        prompt,
+        return_tensors="pt",
+        truncation=True,
+        max_length=512,
+    ).to(device)
+    with torch.no_grad():
+        out_ids = refiner_model.generate(
+            **inputs,
+            max_new_tokens=64,
+            num_beams=4,
+            length_penalty=0.9,
+            no_repeat_ngram_size=4,
+        )
+    refined = refiner_tokenizer.decode(out_ids[0], skip_special_tokens=True)
+    return clean_caption(refined)
 # ---------- Routes ----------
 def root():
     return {
         "status": "ok",
+        "message": "Radiology retrieval + BLIP + FLAN-T5 refinement API",
     }
     """
     Upload a radiology image.
     Returns:
+      - query_caption: refined caption using draft + similar cases
+      - modality: detected imaging modality
       - scores: random quality metrics
       - results: similar images (similarity + concepts + image_url)
     """
     content = await file.read()
     image = Image.open(io.BytesIO(content)).convert("RGB")
+    k = int(k)
+    # 1) Retrieval
+    results_df = search_similar_by_image(image, k=k)
     results = results_df.to_dict(orient="records")
+    # similar captions context (take up to 5)
+    similar_caps_list = results_df["caption"].astype(str).tolist()
+    similar_caps_short = "; ".join(similar_caps_list[:5])
+    # 2) Draft caption from BLIP
     try:
+        draft_caption = generate_draft_caption(image)
     except Exception as e:
+        print("Error generating draft caption:", e)
+        draft_caption = ""
+    # 3) Refine caption with similar case captions
+    try:
+        final_caption = refine_caption_with_similar_cases(
+            draft_caption, similar_caps_short
+        )
+    except Exception as e:
+        print("Error refining caption:", e)
+        final_caption = draft_caption or None
+    # 4) Modality & scores
+    modality = detect_modality(final_caption or "")
     scores = generate_random_scores()
     return JSONResponse(
         {
+            "query_caption": final_caption,
             "modality": modality,
             "scores": scores,
             "results": results,