Spaces:

bombshelll
/

brain-hierarchical-captioning

Running

App Files Files Community

bombshelll commited on Jun 21, 2025

Commit

227593e

1 Parent(s): 6f25734

Add hierarchical classification and captioning app

Browse files

Files changed (2) hide show

app.py +83 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import gradio as gr
+from PIL import Image
+import torch
+from transformers import VisionEncoderDecoderModel, AutoTokenizer, ViTFeatureExtractor, AutoImageProcessor, AutoModelForImageClassification
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load image captioning model
+caption_model = VisionEncoderDecoderModel.from_pretrained("bombshelll/ViT_BioMedBert_Captioning_ROCO").to(device)
+tokenizer = AutoTokenizer.from_pretrained("bombshelll/ViT_BioMedBert_Captioning_ROCO")
+feature_extractor = ViTFeatureExtractor.from_pretrained("bombshelll/ViT_BioMedBert_Captioning_ROCO")
+# Load classification models
+def load_classifier(model_id):
+    processor = AutoImageProcessor.from_pretrained(model_id)
+    model = AutoModelForImageClassification.from_pretrained(model_id)
+    return processor, model
+classifiers = {
+    "plane": load_classifier("bombshelll/swin-brain-plane-classification"),
+    "modality": load_classifier("bombshelll/swin-brain-modality-classification"),
+    "abnormality": load_classifier("bombshelll/swin-brain-abnormalities-classification"),
+    "tumor_type": load_classifier("bombshelll/swin-brain-tumor-type-classification")
+}
+# Inference functions
+def classify_image(image):
+    results = {}
+    for name, (processor, model) in classifiers.items():
+        inputs = processor(image, return_tensors="pt").to(device)
+        with torch.no_grad():
+            logits = model(**inputs).logits
+        label = model.config.id2label[logits.argmax(-1).item()]
+        if name != "tumor_type" or results.get("abnormality") == "tumor":
+            results[name] = label
+    return results
+def generate_captions(image, keywords):
+    pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values.to(device)
+    # Without keywords
+    caption_model.eval()
+    with torch.no_grad():
+        output_ids = caption_model.generate(pixel_values, max_length=80)
+    caption1 = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    # With keywords
+    prompt = " ".join(keywords)
+    prompt_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+    with torch.no_grad():
+        output_ids = caption_model.generate(
+            pixel_values,
+            decoder_input_ids=prompt_ids,
+            max_length=80,
+            num_beams=4,
+            no_repeat_ngram_size=3,
+            length_penalty=2.0
+        )
+    caption2 = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    return caption1, caption2
+# Main app logic
+def run_pipeline(image):
+    classification = classify_image(image)
+    keywords = list(classification.values())
+    caption1, caption2 = generate_captions(image, keywords)
+    return classification, caption1, caption2
+# Gradio Interface
+interface = gr.Interface(
+    fn=run_pipeline,
+    inputs=gr.Image(type="pil"),
+    outputs=[
+        gr.JSON(label="Classification Result"),
+        gr.Textbox(label="Caption without Keywords"),
+        gr.Textbox(label="Caption with Keywords")
+    ],
+    title="🧠 Brain Hierarchical Classification + Captioning",
+    description="Upload an MRI/CT brain image. The system will classify (plane, modality, abnormality, tumor) and generate two captions: one plain and one guided by the classification keywords."
+)
+interface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch
+transformers
+Pillow