Spaces:

saad003
/

rad-retrieval-api

Running

App Files Files Community

saad003 commited on 12 days ago

Commit

aaf4ae5

verified ·

1 Parent(s): 928975f

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -15

app.py CHANGED Viewed

@@ -18,10 +18,10 @@ from transformers import BlipForConditionalGeneration, AutoProcessor
 # ---------- FastAPI app ----------
 app = FastAPI()
-# CORS so your React app can call this API
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],      # later you can restrict to your frontend domain
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
@@ -31,11 +31,11 @@ app.add_middleware(
 # Dataset with FAISS index + radiology_metadata.csv
 EMBED_REPO_ID = "saad003/Red01"
-# Dataset with all radiology images you uploaded
 IMAGE_REPO_ID = "saad003/images02"
 BASE_IMAGE_URL = f"https://huggingface.co/datasets/{IMAGE_REPO_ID}/resolve/main"
-# Optional: token if Red01 is private
 HF_TOKEN = os.environ.get("HF_TOKEN")
 # ---------- Download index + metadata ----------
@@ -61,10 +61,11 @@ index = faiss.read_index(INDEX_PATH)
 print("Loading metadata CSV...")
 metadata = pd.read_csv(META_PATH)
-# Make sure the index and metadata have same length
 assert index.ntotal == len(metadata), "Index size and metadata rows mismatch!"
 # ---------- Load CLIP (retrieval) ----------
 print("Loading PubMedCLIP model for retrieval...")
 CLIP_MODEL_NAME = "flaviagiammarino/pubmed-clip-vit-base-patch32"
@@ -86,13 +87,33 @@ caption_model.eval()
 print("Backend ready ✅")
-# ---------- Helper: build image URL ----------
-def id_to_image_url(image_id: str) -> str:
     """
-    Build a public URL to the image in saad003/images02.
-    Assumes filenames are exactly f\"{image_id}.jpg\".
     """
-    return f"{BASE_IMAGE_URL}/{image_id}.jpg"
 # ---------- Helper: search by image ----------
@@ -106,19 +127,19 @@ def search_similar_by_image(image: Image.Image, k: int = 5) -> pd.DataFrame:
     with torch.no_grad():
         feats = clip_model.get_image_features(**inputs)
-    # Normalize (very important, matches how you created the index)
     feats = feats / feats.norm(p=2, dim=-1, keepdim=True)
     feats = feats.cpu().numpy().astype("float32")
-    # Search in FAISS
     D, I = index.search(feats, k)  # D: distances/similarity, I: indices
     # Get metadata rows for top-k indices
     rows = metadata.iloc[I[0]].copy()
     rows["score"] = D[0]
-    # Add image_url for each result
-    rows["image_url"] = rows["ID"].apply(id_to_image_url)
     return rows[["ID", "split", "caption", "concepts_manual", "score", "image_url"]]
@@ -136,6 +157,39 @@ def generate_query_caption(image: Image.Image) -> str:
     return caption.strip()
 # ---------- Routes ----------
 @app.get("/")
@@ -147,8 +201,10 @@ def root():
 async def search_by_image(file: UploadFile = File(...), k: int = 5):
     """
     Upload a radiology image.
     Returns:
-      - query_caption: generated caption for the query image
       - results: list of similar images with their captions, concepts, score, image_url
     """
     content = await file.read()
@@ -165,9 +221,13 @@ async def search_by_image(file: UploadFile = File(...), k: int = 5):
         print("Error generating caption:", e)
         query_caption = None
     return JSONResponse(
         {
             "query_caption": query_caption,
             "results": results,
         }
     )

 # ---------- FastAPI app ----------
 app = FastAPI()
+# Allow your React app to call this API
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],      # You can later restrict to your domain
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 # Dataset with FAISS index + radiology_metadata.csv
 EMBED_REPO_ID = "saad003/Red01"
+# Dataset with all radiology images (you uploaded here)
 IMAGE_REPO_ID = "saad003/images02"
 BASE_IMAGE_URL = f"https://huggingface.co/datasets/{IMAGE_REPO_ID}/resolve/main"
+# Optional: token if Red01 is private (set HF_TOKEN secret in Space)
 HF_TOKEN = os.environ.get("HF_TOKEN")
 # ---------- Download index + metadata ----------
 print("Loading metadata CSV...")
 metadata = pd.read_csv(META_PATH)
+# Sanity check
 assert index.ntotal == len(metadata), "Index size and metadata rows mismatch!"
 # ---------- Load CLIP (retrieval) ----------
+# IMPORTANT: must match the model you used to build the index.
 print("Loading PubMedCLIP model for retrieval...")
 CLIP_MODEL_NAME = "flaviagiammarino/pubmed-clip-vit-base-patch32"
 print("Backend ready ✅")
+# ---------- Helper: build image URL from img_path ----------
+def img_path_to_image_url(img_path: str) -> str:
     """
+    Use the original img_path from Kaggle and map it to your HF dataset.
+    Example img_path in CSV:
+      /kaggle/input/radiology/8333645/train_images/train/ROCOv2_2023_train_000001.jpg
+    If you uploaded folders train_images/..., test_images/..., valid_images/... into
+    saad003/images02, the relative path after '8333645/' is what we want.
+    So URL becomes:
+      https://huggingface.co/datasets/saad003/images02/resolve/main/train_images/train/ROCOv2_2023_train_000001.jpg
     """
+    if not isinstance(img_path, str):
+        return None
+    # Try to cut everything up to the Kaggle dataset root
+    marker = "8333645/"
+    if marker in img_path:
+        rel = img_path.split(marker, 1)[1]
+    else:
+        # Fallback: just take the filename
+        rel = os.path.basename(img_path)
+    rel = rel.lstrip("/")  # safety
+    return f"{BASE_IMAGE_URL}/{rel}"
 # ---------- Helper: search by image ----------
     with torch.no_grad():
         feats = clip_model.get_image_features(**inputs)
+    # Normalize (very important, must match index construction)
     feats = feats / feats.norm(p=2, dim=-1, keepdim=True)
     feats = feats.cpu().numpy().astype("float32")
+    # Search FAISS
     D, I = index.search(feats, k)  # D: distances/similarity, I: indices
     # Get metadata rows for top-k indices
     rows = metadata.iloc[I[0]].copy()
     rows["score"] = D[0]
+    # Add image_url using original img_path column
+    rows["image_url"] = rows["img_path"].apply(img_path_to_image_url)
     return rows[["ID", "split", "caption", "concepts_manual", "score", "image_url"]]
     return caption.strip()
+# ---------- Helper: infer modality from caption ----------
+def infer_modality_from_caption(caption: str) -> str:
+    """
+    Simple heuristic to map a caption to imaging modality.
+    """
+    if not caption:
+        return "Unknown"
+    text = caption.lower()
+    # CT
+    if any(word in text for word in ["ct scan", "computed tomography", "ct of", "ct image", "ct of the"]):
+        return "CT"
+    # MRI
+    if any(word in text for word in ["mri", "magnetic resonance"]):
+        return "MRI"
+    # X-ray / radiograph
+    if any(word in text for word in ["x-ray", "x ray", "radiograph", "chest xray", "chest x-ray"]):
+        return "X-ray"
+    # Ultrasound
+    if any(word in text for word in ["ultrasound", "sonography", "sonogram"]):
+        return "Ultrasound"
+    # PET / PET-CT
+    if any(word in text for word in ["pet-ct", "pet ct", "pet scan", "positron emission tomography"]):
+        return "PET/CT"
+    return "Unknown"
 # ---------- Routes ----------
 @app.get("/")
 async def search_by_image(file: UploadFile = File(...), k: int = 5):
     """
     Upload a radiology image.
     Returns:
+      - query_caption: generated caption for the query image (BLIP)
+      - modality: inferred imaging modality from the caption
       - results: list of similar images with their captions, concepts, score, image_url
     """
     content = await file.read()
         print("Error generating caption:", e)
         query_caption = None
+    # 3) Infer modality
+    modality = infer_modality_from_caption(query_caption or "")
     return JSONResponse(
         {
             "query_caption": query_caption,
+            "modality": modality,
             "results": results,
         }
     )