Spaces:

adilsiraju
/

Medical-Case-Classifier

Sleeping

App Files Files Community

adilsiraju commited on Sep 25, 2025

Commit

09d0e11

1 Parent(s): fa63877

New Model

Browse files

Files changed (9) hide show

app.py +66 -40
appcopy.md +61 -0
medical_classifier_model/config.json +55 -0
medical_classifier_model/label_encoder.pkl +3 -0
medical_classifier_model/model.safetensors +3 -0
medical_classifier_model/special_tokens_map.json +7 -0
medical_classifier_model/tokenizer.json +0 -0
medical_classifier_model/tokenizer_config.json +58 -0
medical_classifier_model/vocab.txt +0 -0

app.py CHANGED Viewed

@@ -1,59 +1,85 @@
 import gradio as gr
-from transformers import pipeline
-# Define the candidate labels for classification
-medical_specialties = [
-    "Cardiovascular Pulmonary",
-    "Orthopedic",
-    "Nephrology",
-    "ENT Otolaryngology",
-    "Obstetrics Gynecology",
-    "Ophthalmology",
-    "Gastroenterology",
-    "Neurology",
-    "Radiology",
-    "Psychiatry Psychology",
-    "Pediatrics Neonatal",
-    "Hematology Oncology",
-    "Neurosurgery"
-]
-# Initialize the zero-shot classification pipeline
-# A better-performing, fine-tuned model could be used here.
-classifier = pipeline(
-    "zero-shot-classification",
-    model="facebook/bart-large-mnli",
-    device=-1  # Use -1 for CPU, or 0 for GPU if available
-)
-def classify_medical_text(text):
     """
-    Classifies a medical text into one of the predefined medical specialties.
     """
-    if not text:
-        return {"Error": "Please provide some text to classify."}
-    # Perform zero-shot classification
-    result = classifier(text, medical_specialties)
-    # Format the output for better display
-    labels = result['labels']
-    scores = result['scores']
-    # Return the results as a dictionary for Gradio to display
-    return {label: score for label, score in zip(labels, scores)}
 # Create the Gradio interface
 iface = gr.Interface(
-    fn=classify_medical_text,
     inputs=gr.Textbox(
         lines=10,
         placeholder="Paste a medical document or text here...",
         label="Medical Text"
     ),
-    outputs=gr.Label(num_top_classes=len(medical_specialties)),
     title="Medical Document Classifier",
-    description="This application uses a zero-shot classification model to predict the medical specialty of a given text."
 )
 # Launch the interface

 import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import pickle
+# Load the saved model, tokenizer, and label encoder
+try:
+    # Use the correct path where you saved your model
+    model_path = "./medical_classifier_model"
+    # Check for GPU availability
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # Load the model and move it to the correct device
+    model = AutoModelForSequenceClassification.from_pretrained(model_path)
+    model.to(device)
+    # Load the tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(model_path)
+    # Load the label encoder
+    with open(f'{model_path}/label_encoder.pkl', 'rb') as f:
+        label_encoder = pickle.load(f)
+    # Get the class names from the label encoder
+    class_names = list(label_encoder.classes_)
+    print("Model, tokenizer, and label encoder loaded successfully!")
+except Exception as e:
+    print(f"Error loading model components: {e}")
+    # Fallback or exit if loading fails
+    model, tokenizer, label_encoder, class_names = None, None, None, []
+def predict_medical_specialty(text):
     """
+    Predicts the medical specialty of a given text using the fine-tuned model.
     """
+    if not text or not all([model, tokenizer, label_encoder]):
+        return {"Error": "Model not loaded correctly. Please check server logs."}
+    # Ensure the model is in evaluation mode
+    model.eval()
+    # Tokenize the input text and prepare it for the model
+    inputs = tokenizer(
+        text,
+        truncation=True,
+        padding="max_length",
+        max_length=128,
+        return_tensors="pt"
+    ).to(device) # Move the input tensors to the same device as the model
+    with torch.no_grad():
+        # Get model outputs
+        outputs = model(**inputs)
+        # Apply softmax to get probabilities
+        probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
+    # Get the top class predictions and their scores
+    scores, indices = torch.topk(probabilities, k=len(class_names))
+    # Map the indices back to their original specialty names
+    predicted_labels = label_encoder.inverse_transform(indices.squeeze().cpu().numpy())
+    # Create a dictionary of results
+    result_dict = {label: score.item() for label, score in zip(predicted_labels, scores.squeeze())}
+    return result_dict
 # Create the Gradio interface
 iface = gr.Interface(
+    fn=predict_medical_specialty,
     inputs=gr.Textbox(
         lines=10,
         placeholder="Paste a medical document or text here...",
         label="Medical Text"
     ),
+    outputs=gr.Label(num_top_classes=len(class_names)),
     title="Medical Document Classifier",
+    description="This application uses a fine-tuned Bio_ClinicalBERT model to predict the medical specialty of a given text."
 )
 # Launch the interface

appcopy.md ADDED Viewed

	@@ -0,0 +1,61 @@

+import gradio as gr
+from transformers import pipeline
+# Define the candidate labels for classification
+medical_specialties = [
+    "Cardiovascular Pulmonary",
+    "Orthopedic",
+    "Nephrology",
+    "ENT Otolaryngology",
+    "Obstetrics Gynecology",
+    "Ophthalmology",
+    "Gastroenterology",
+    "Neurology",
+    "Radiology",
+    "Psychiatry Psychology",
+    "Pediatrics Neonatal",
+    "Hematology Oncology",
+    "Neurosurgery"
+]
+# Initialize the zero-shot classification pipeline
+# A better-performing, fine-tuned model could be used here.
+classifier = pipeline(
+    "zero-shot-classification",
+    model="facebook/bart-large-mnli",
+    device=-1  # Use -1 for CPU, or 0 for GPU if available
+)
+def classify_medical_text(text):
+    """
+    Classifies a medical text into one of the predefined medical specialties.
+    """
+    if not text:
+        return {"Error": "Please provide some text to classify."}
+    # Perform zero-shot classification
+    result = classifier(text, medical_specialties)
+    # Format the output for better display
+    labels = result['labels']
+    scores = result['scores']
+    # Return the results as a dictionary for Gradio to display
+    return {label: score for label, score in zip(labels, scores)}
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=classify_medical_text,
+    inputs=gr.Textbox(
+        lines=10,
+        placeholder="Paste a medical document or text here...",
+        label="Medical Text"
+    ),
+    outputs=gr.Label(num_top_classes=len(medical_specialties)),
+    title="Medical Document Classifier",
+    description="This application uses a zero-shot classification model to predict the medical specialty of a given text."
+)
+# Launch the interface
+if __name__ == "__main__":
+    iface.launch()

medical_classifier_model/config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6",
+    "7": "LABEL_7",
+    "8": "LABEL_8",
+    "9": "LABEL_9",
+    "10": "LABEL_10",
+    "11": "LABEL_11",
+    "12": "LABEL_12"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_10": 10,
+    "LABEL_11": 11,
+    "LABEL_12": 12,
+    "LABEL_2": 2,
+    "LABEL_3": 3,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6,
+    "LABEL_7": 7,
+    "LABEL_8": 8,
+    "LABEL_9": 9
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "transformers_version": "4.56.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 28996
+}

medical_classifier_model/label_encoder.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5bab7c256fe67b2cf75cc80ddcf92d92eda398d465ad84f1f4e2b1726306b3a2
+size 1591

medical_classifier_model/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2fde2b928afcf154ac6f041fef201e113e1fba1b34a58526364afa88481bf2d9
+size 433304604

medical_classifier_model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

medical_classifier_model/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

medical_classifier_model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

medical_classifier_model/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff