Spaces:

Fredaaaaaa
/

severity

Sleeping

App Files Files Community

Fredaaaaaa commited on Aug 21, 2025

Commit

0f8c85b

verified ·

1 Parent(s): 1cfa4ef

Update inference.py

Browse files

Files changed (1) hide show

inference.py +110 -55

inference.py CHANGED Viewed

@@ -1,62 +1,117 @@
-# inference.py
 import torch
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
-from torch.utils.data import DataLoader, Dataset
-import numpy as np
-import pandas as pd
-class DrugInteractionDataset(Dataset):
-    """Custom Dataset for drug interaction data."""
-    def __init__(self, description, tokenizer, max_length=512):
-        self.description = [description]
-        self.tokenizer = tokenizer
-        self.max_length = max_length
-    def __len__(self):
-        return 1
-    def __getitem__(self, idx):
-        encoding = self.tokenizer(
-            self.description[idx],
-            padding='max_length',
-            truncation=True,
-            max_length=self.max_length,
-            return_tensors='pt'
         )
-        return {
-            'input_ids': encoding['input_ids'].squeeze(),
-            'attention_mask': encoding['attention_mask'].squeeze(),
-        }
 class DDIPredictor:
-    def __init__(self, model_repo="Fredaaaaaa/drug_interaction_severity"):
-        self.tokenizer = AutoTokenizer.from_pretrained(model_repo)
-        self.model = AutoModelForSequenceClassification.from_pretrained(model_repo)
         self.model.eval()
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.model.to(self.device)
-    def predict(self, interaction_description):
-        # Create dataset and dataloader
-        dataset = DrugInteractionDataset(interaction_description, self.tokenizer)
-        dataloader = DataLoader(dataset, batch_size=1)
-        # Get prediction
-        with torch.no_grad():
-            for batch in dataloader:
-                input_ids = batch['input_ids'].to(self.device)
-                attention_mask = batch['attention_mask'].to(self.device)
-                outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
-                logits = outputs.logits
-                probabilities = torch.softmax(logits, dim=-1).cpu().numpy()[0]
-                prediction = torch.argmax(logits, dim=-1).cpu().item()
-        # Map prediction to label (adjust based on your training labels)
-        label_map = {0: "No Interaction", 1: "Mild", 2: "Moderate", 3: "Severe"}  # Update based on your classes
-        confidence = probabilities[prediction] * 100
-        return {
-            "prediction": label_map.get(prediction, "Unknown"),
-            "confidence": confidence,
-            "probabilities": {k: v for k, v in enumerate(probabilities)}
-        }

 import torch
+from transformers import AutoTokenizer, AutoModel
+import joblib
+from huggingface_hub import hf_hub_download
+import json
+class DrugInteractionClassifier(torch.nn.Module):
+    def __init__(self, n_classes, bert_model_name="emilyalsentzer/Bio_ClinicalBERT"):
+        super(DrugInteractionClassifier, self).__init__()
+        self.bert = AutoModel.from_pretrained(bert_model_name)
+        self.classifier = torch.nn.Sequential(
+            torch.nn.Linear(self.bert.config.hidden_size, 256),
+            torch.nn.ReLU(),
+            torch.nn.Dropout(0.3),
+            torch.nn.Linear(256, n_classes)
         )
+    def forward(self, input_ids, attention_mask):
+        bert_output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
+        pooled_output = bert_output[0][:, 0, :]
+        return self.classifier(pooled_output)
 class DDIPredictor:
+    def __init__(self, repo_id="Fredaaaaaa/drug_interaction_severity"):
+        self.repo_id = repo_id
+        # Download model files from Hugging Face
+        self.config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
+        self.model_path = hf_hub_download(repo_id=repo_id, filename="pytorch_model.bin")
+        self.label_encoder_path = hf_hub_download(repo_id=repo_id, filename="label_encoder.joblib")
+        # Load config
+        with open(self.config_path, "r") as f:
+            self.config = json.load(f)
+        # Load tokenizer from repo
+        self.tokenizer = AutoTokenizer.from_pretrained(repo_id)
+        # Load label encoder
+        self.label_encoder = joblib.load(self.label_encoder_path)
+        # Initialize model
+        self.model = DrugInteractionClassifier(
+            n_classes=self.config["num_labels"],
+            bert_model_name=self.config["bert_model_name"]
+        )
+        # Load weights
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model.load_state_dict(
+            torch.load(self.model_path, map_location=device, weights_only=True)
+        )
+        self.model.to(device)
         self.model.eval()
+        self.device = device
+        print(f"✅ Model loaded successfully from {repo_id} on {device}")
+    def predict(self, text, confidence_threshold=0.0):
+        """Predict drug interaction severity"""
+        if not text or not text.strip():
+            return {
+                "prediction": "Invalid Input",
+                "confidence": 0.0,
+                "probabilities": {label: 0.0 for label in self.label_encoder.classes_}
+            }
+        try:
+            # Tokenize
+            inputs = self.tokenizer(
+                text,
+                max_length=self.config["max_length"],
+                padding=True,
+                truncation=True,
+                return_tensors="pt"
+            )
+            inputs = {k: v.to(self.device) for k, v in inputs.items()}
+            # Predict
+            with torch.no_grad():
+                outputs = self.model(inputs["input_ids"], inputs["attention_mask"])
+                probabilities = torch.softmax(outputs, dim=1)
+                confidence, predicted_idx = torch.max(probabilities, dim=1)
+            predicted_label = self.label_encoder.inverse_transform([predicted_idx.item()])[0]
+            # Get all probabilities
+            all_probs = {
+                self.label_encoder.inverse_transform([i])[0]: prob.item()
+                for i, prob in enumerate(probabilities[0])
+            }
+            return {
+                "prediction": predicted_label,
+                "confidence": confidence.item(),
+                "probabilities": all_probs
+            }
+        except Exception as e:
+            return {
+                "prediction": f"Error: {str(e)}",
+                "confidence": 0.0,
+                "probabilities": {label: 0.0 for label in self.label_encoder.classes_}
+            }
+# Simple test
+if __name__ == "__main__":
+    try:
+        predictor = DDIPredictor("Fredaaaaaa/drug_interaction_severity")
+        test_text = "Drug interaction may increase bleeding risk"
+        result = predictor.predict(test_text)
+        print("✅ Test successful!")
+        print(f"Prediction: {result['prediction']}")
+        print(f"Confidence: {result['confidence']:.3f}")
+    except Exception as e:
+        print(f"❌ Error: {e}")