Spaces:

bombshelll
/

brain-hierarchical-captioning

Sleeping

App Files Files Community

bombshelll commited on Jun 21, 2025

Commit

f2ba684

1 Parent(s): 227593e

Add hierarchical classification and captioning app

Browse files

Files changed (1) hide show

app.py +53 -24

app.py CHANGED Viewed

@@ -2,10 +2,11 @@ import gradio as gr
 from PIL import Image
 import torch
 from transformers import VisionEncoderDecoderModel, AutoTokenizer, ViTFeatureExtractor, AutoImageProcessor, AutoModelForImageClassification
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load image captioning model
 caption_model = VisionEncoderDecoderModel.from_pretrained("bombshelll/ViT_BioMedBert_Captioning_ROCO").to(device)
 tokenizer = AutoTokenizer.from_pretrained("bombshelll/ViT_BioMedBert_Captioning_ROCO")
 feature_extractor = ViTFeatureExtractor.from_pretrained("bombshelll/ViT_BioMedBert_Captioning_ROCO")
@@ -13,7 +14,7 @@ feature_extractor = ViTFeatureExtractor.from_pretrained("bombshelll/ViT_BioMedBe
 # Load classification models
 def load_classifier(model_id):
     processor = AutoImageProcessor.from_pretrained(model_id)
-    model = AutoModelForImageClassification.from_pretrained(model_id)
     return processor, model
 classifiers = {
@@ -23,7 +24,7 @@ classifiers = {
     "tumor_type": load_classifier("bombshelll/swin-brain-tumor-type-classification")
 }
-# Inference functions
 def classify_image(image):
     results = {}
     for name, (processor, model) in classifiers.items():
@@ -35,22 +36,23 @@ def classify_image(image):
             results[name] = label
     return results
 def generate_captions(image, keywords):
     pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values.to(device)
-    # Without keywords
     caption_model.eval()
     with torch.no_grad():
         output_ids = caption_model.generate(pixel_values, max_length=80)
     caption1 = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    # With keywords
     prompt = " ".join(keywords)
     prompt_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
     with torch.no_grad():
         output_ids = caption_model.generate(
             pixel_values,
-            decoder_input_ids=prompt_ids,
             max_length=80,
             num_beams=4,
             no_repeat_ngram_size=3,
@@ -60,24 +62,51 @@ def generate_captions(image, keywords):
     return caption1, caption2
-# Main app logic
-def run_pipeline(image):
     classification = classify_image(image)
     keywords = list(classification.values())
     caption1, caption2 = generate_captions(image, keywords)
-    return classification, caption1, caption2
-# Gradio Interface
-interface = gr.Interface(
-    fn=run_pipeline,
-    inputs=gr.Image(type="pil"),
-    outputs=[
-        gr.JSON(label="Classification Result"),
-        gr.Textbox(label="Caption without Keywords"),
-        gr.Textbox(label="Caption with Keywords")
-    ],
-    title="🧠 Brain Hierarchical Classification + Captioning",
-    description="Upload an MRI/CT brain image. The system will classify (plane, modality, abnormality, tumor) and generate two captions: one plain and one guided by the classification keywords."
-)
-interface.launch()

 from PIL import Image
 import torch
 from transformers import VisionEncoderDecoderModel, AutoTokenizer, ViTFeatureExtractor, AutoImageProcessor, AutoModelForImageClassification
+from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load captioning model
 caption_model = VisionEncoderDecoderModel.from_pretrained("bombshelll/ViT_BioMedBert_Captioning_ROCO").to(device)
 tokenizer = AutoTokenizer.from_pretrained("bombshelll/ViT_BioMedBert_Captioning_ROCO")
 feature_extractor = ViTFeatureExtractor.from_pretrained("bombshelll/ViT_BioMedBert_Captioning_ROCO")
 # Load classification models
 def load_classifier(model_id):
     processor = AutoImageProcessor.from_pretrained(model_id)
+    model = AutoModelForImageClassification.from_pretrained(model_id).to(device)
     return processor, model
 classifiers = {
     "tumor_type": load_classifier("bombshelll/swin-brain-tumor-type-classification")
 }
+# Classification function
 def classify_image(image):
     results = {}
     for name, (processor, model) in classifiers.items():
             results[name] = label
     return results
+# Caption generation
 def generate_captions(image, keywords):
     pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values.to(device)
+    # Caption without keywords
     caption_model.eval()
     with torch.no_grad():
         output_ids = caption_model.generate(pixel_values, max_length=80)
     caption1 = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    # Caption with keywords
     prompt = " ".join(keywords)
     prompt_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
     with torch.no_grad():
         output_ids = caption_model.generate(
             pixel_values,
+            decoder_input_ids=prompt_ids[:, :-1],
             max_length=80,
             num_beams=4,
             no_repeat_ngram_size=3,
     return caption1, caption2
+# Main pipeline
+def run_pipeline(image, actual_caption):
     classification = classify_image(image)
     keywords = list(classification.values())
     caption1, caption2 = generate_captions(image, keywords)
+    # Format classification result as string
+    classification_str = (
+        f"🧭 Plane: {classification.get('plane')}\n"
+        f"🖼️ Modality: {classification.get('modality')}\n"
+        f"🧬 Abnormality: {classification.get('abnormality')}\n"
+    )
+    if "tumor_type" in classification:
+        classification_str += f"🔬 Tumor Type: {classification.get('tumor_type')}\n"
+    # BLEU Score calculation
+    if actual_caption.strip():
+        ref = [actual_caption.lower().split()]
+        hyp = caption2.lower().split()
+        score = sentence_bleu(ref, hyp, smoothing_function=SmoothingFunction().method1)
+        bleu = f"📊 BLEU Score: {score:.2f}"
+    else:
+        bleu = "📊 BLEU Score: -"
+    # Output
+    result_text = f"{classification_str}\n\n✏️ Caption without Keywords:\n{caption1}\n\n✨ Caption with Keywords:\n{caption2}\n\n{bleu}"
+    return result_text
+# Gradio UI
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="pink")) as demo:
+    gr.Markdown(
+        """
+        <h1 style='text-align: center;'>🧠 Brain Hierarchical Classification + Captioning</h1>
+        <p style='text-align: center;'>Upload an MRI/CT brain image. The system will classify the image (plane, modality, abnormality, tumor) and generate two captions, along with a BLEU score if ground truth is given.</p>
+        """
+    )
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type="pil", label="🖼️ Upload Brain MRI/CT")
+            actual_caption = gr.Textbox(label="🧠 Ground Truth Caption (optional)")
+            btn = gr.Button("🚀 Submit")
+        with gr.Column():
+            output_box = gr.Textbox(label="📝 Result", lines=20)
+    btn.click(fn=run_pipeline, inputs=[image_input, actual_caption], outputs=output_box)
+demo.launch()