Spaces:

bombshelll
/

brain-hierarchical-captioning

Sleeping

App Files Files Community

bombshelll commited on Jun 21, 2025

Commit

6d6d9b8

1 Parent(s): 6453d14

Refine Gradio UI

Browse files

Files changed (1) hide show

app.py +32 -26

app.py CHANGED Viewed

@@ -11,12 +11,10 @@ warnings.filterwarnings("ignore", category=UserWarning)
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load captioning model
 caption_model = VisionEncoderDecoderModel.from_pretrained("bombshelll/ViT_BioMedBert_Captioning_ROCO").to(device)
 tokenizer = AutoTokenizer.from_pretrained("bombshelll/ViT_BioMedBert_Captioning_ROCO")
 feature_extractor = ViTFeatureExtractor.from_pretrained("bombshelll/ViT_BioMedBert_Captioning_ROCO")
-# Load classification models
 def load_classifier(model_id):
     processor = AutoImageProcessor.from_pretrained(model_id)
     model = AutoModelForImageClassification.from_pretrained(model_id).to(device)
@@ -29,7 +27,6 @@ classifiers = {
     "tumor_type": load_classifier("bombshelll/swin-brain-tumor-type-classification")
 }
-# Classification function
 def classify_image(image):
     results = {}
     for name, (processor, model) in classifiers.items():
@@ -41,7 +38,6 @@ def classify_image(image):
             results[name] = label
     return results
-# Preprocessing caption
 def preprocess_caption(text):
     text = str(text).lower()
     text = text.replace("magnetic resonance imaging", "mri")
@@ -59,17 +55,14 @@ def preprocess_caption(text):
     text = text.replace("-", " ")
     return text.split()
-# Caption generation
 def generate_captions(image, keywords):
     pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values.to(device)
-    # Caption without keywords
     caption_model.eval()
     with torch.no_grad():
         output_ids = caption_model.generate(pixel_values, max_length=80)
     caption1 = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    # Caption with keywords
     prompt = " ".join(keywords)
     prompt_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
     with torch.no_grad():
@@ -85,22 +78,23 @@ def generate_captions(image, keywords):
     return caption1, caption2
-# Main pipeline
 def run_pipeline(image, actual_caption):
     classification = classify_image(image)
     keywords = list(classification.values())
     caption1, caption2 = generate_captions(image, keywords)
-    # BLEU Score
     if actual_caption.strip():
         ref = [preprocess_caption(actual_caption)]
-        hyp = preprocess_caption(caption2)
-        score = sentence_bleu(ref, hyp, smoothing_function=nltk.translate.bleu_score.SmoothingFunction().method1)
-        bleu = f"{score:.2f}"
     else:
-        bleu = "-"
-    # Format outputs
     result_sections = {
         "classification": (
             f"Plane: {classification.get('plane')}\n"
@@ -110,31 +104,43 @@ def run_pipeline(image, actual_caption):
         ),
         "caption1": caption1,
         "caption2": caption2,
-        "bleu": bleu
     }
-    return result_sections["classification"], result_sections["caption1"], result_sections["caption2"], result_sections["bleu"]
-# Gradio UI
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="pink"), css="*{font-family:'Poppins', sans-serif;}") as demo:
     gr.Markdown(
         """
         <h1 style='text-align: center;'>🧠 Brain Hierarchical Classification + Captioning</h1>
-        <p style='text-align: center;'>Upload an MRI/CT brain image. The system will classify the image (plane, modality, abnormality, tumor type) and generate two captions. Optionally, provide a ground truth caption to get BLEU score.</p>
         """,
         elem_id="title"
     )
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(type="pil", label="🖼️ Upload Brain MRI/CT")
-            actual_caption = gr.Textbox(label="🧠 Ground Truth Caption (optional)")
             btn = gr.Button("🚀 Submit")
         with gr.Column():
-            cls_box = gr.Textbox(label="🧾 Classification Result", lines=4)
-            cap1_box = gr.Textbox(label="✏️ Caption without Keyword Integration", lines=4)
-            cap2_box = gr.Textbox(label="✨ Caption with Keyword Integration", lines=4)
-            bleu_box = gr.Textbox(label="📊 BLEU Score", lines=1)
-    btn.click(fn=run_pipeline, inputs=[image_input, actual_caption], outputs=[cls_box, cap1_box, cap2_box, bleu_box])
 demo.launch()

 device = "cuda" if torch.cuda.is_available() else "cpu"
 caption_model = VisionEncoderDecoderModel.from_pretrained("bombshelll/ViT_BioMedBert_Captioning_ROCO").to(device)
 tokenizer = AutoTokenizer.from_pretrained("bombshelll/ViT_BioMedBert_Captioning_ROCO")
 feature_extractor = ViTFeatureExtractor.from_pretrained("bombshelll/ViT_BioMedBert_Captioning_ROCO")
 def load_classifier(model_id):
     processor = AutoImageProcessor.from_pretrained(model_id)
     model = AutoModelForImageClassification.from_pretrained(model_id).to(device)
     "tumor_type": load_classifier("bombshelll/swin-brain-tumor-type-classification")
 }
 def classify_image(image):
     results = {}
     for name, (processor, model) in classifiers.items():
             results[name] = label
     return results
 def preprocess_caption(text):
     text = str(text).lower()
     text = text.replace("magnetic resonance imaging", "mri")
     text = text.replace("-", " ")
     return text.split()
 def generate_captions(image, keywords):
     pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values.to(device)
     caption_model.eval()
     with torch.no_grad():
         output_ids = caption_model.generate(pixel_values, max_length=80)
     caption1 = tokenizer.decode(output_ids[0], skip_special_tokens=True)
     prompt = " ".join(keywords)
     prompt_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
     with torch.no_grad():
     return caption1, caption2
 def run_pipeline(image, actual_caption):
     classification = classify_image(image)
     keywords = list(classification.values())
     caption1, caption2 = generate_captions(image, keywords)
     if actual_caption.strip():
         ref = [preprocess_caption(actual_caption)]
+        hyp1 = preprocess_caption(caption1)
+        hyp2 = preprocess_caption(caption2)
+        score1 = sentence_bleu(ref, hyp1, smoothing_function=nltk.translate.bleu_score.SmoothingFunction().method1)
+        score2 = sentence_bleu(ref, hyp2, smoothing_function=nltk.translate.bleu_score.SmoothingFunction().method1)
+        bleu1 = f"{score1:.2f}"
+        bleu2 = f"{score2:.2f}"
     else:
+        bleu1 = "-"
+        bleu2 = "-"
     result_sections = {
         "classification": (
             f"Plane: {classification.get('plane')}\n"
         ),
         "caption1": caption1,
         "caption2": caption2,
+        "bleu1": bleu1,
+        "bleu2": bleu2
     }
+    return (
+        result_sections["classification"],
+        result_sections["caption1"],
+        result_sections["bleu1"],
+        result_sections["caption2"],
+        result_sections["bleu2"]
+    )
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="pink"), css="*{font-family:'Poppins', sans-serif;}") as demo:
     gr.Markdown(
         """
+        <link href="https://fonts.googleapis.com/css2?family=Poppins&display=swap" rel="stylesheet">
         <h1 style='text-align: center;'>🧠 Brain Hierarchical Classification + Captioning</h1>
+        <p style='text-align: center;'>Upload an MRI/CT brain image. The system will classify the image (plane, modality, abnormality, tumor type) and generate two captions. Optionally, provide a ground truth caption to get BLEU scores.</p>
         """,
         elem_id="title"
     )
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(type="pil", label="🖼️ Upload Brain MRI/CT")
+            actual_caption = gr.Textbox(label="💬 Ground Truth Caption (optional)")
             btn = gr.Button("🚀 Submit")
         with gr.Column():
+            cls_box = gr.Textbox(label="📋 Classification Result", lines=4)
+            cap1_box = gr.Textbox(label="📝 Caption without Keyword Integration", lines=4)
+            bleu1_box = gr.Textbox(label="📊 BLEU Score (No Keyword)", lines=1)
+            cap2_box = gr.Textbox(label="🧠 Caption with Keyword Integration", lines=4)
+            bleu2_box = gr.Textbox(label="📈 BLEU Score (With Keyword)", lines=1)
+    btn.click(
+        fn=run_pipeline,
+        inputs=[image_input, actual_caption],
+        outputs=[cls_box, cap1_box, bleu1_box, cap2_box, bleu2_box]
+    )
 demo.launch()