department_classifier_space

Sleeping

App Files Files Community

mr-kush commited on Oct 11, 2025

Commit

b3cf3ad

1 Parent(s): 289081c

Add DepartmentPredictor class for department classification model

Browse files

Files changed (1) hide show

predict_dept_model.py +61 -0

predict_dept_model.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
+import torch
+import os
+class DepartmentPredictor:
+    def __init__(self, model_repo="mr-kush/sambodhan-department-classification-model",
+                 cache_dir="/app/model_cache"):
+        """Load model and tokenizer once at startup."""
+        self.model_repo = model_repo
+        self.cache_dir = cache_dir
+        # Ensure cache folder exists
+        os.makedirs(self.cache_dir, exist_ok=True)
+        # Device selection
+        self.device = 0 if torch.cuda.is_available() else -1
+        print("🔄 Loading tokenizer and model...")
+        # Load tokenizer and model
+        self.tokenizer = AutoTokenizer.from_pretrained(self.model_repo, cache_dir=self.cache_dir)
+        self.model = AutoModelForSequenceClassification.from_pretrained(self.model_repo, cache_dir=self.cache_dir)
+        # Create classification pipeline
+        self.classifier = pipeline(
+            "text-classification",
+            model=self.model,
+            tokenizer=self.tokenizer,
+            device=self.device,
+            return_all_scores=True
+        )
+        print("✅ Model and tokenizer loaded successfully.")
+    def predict(self, texts):
+        """Predict departments with scores for a single text or a batch."""
+        if isinstance(texts, str):
+            texts = [texts]
+        results = self.classifier(texts)
+        formatted_results = []
+        for preds in results:
+            # Sort by descending confidence
+            preds = sorted(preds, key=lambda x: x["score"], reverse=True)
+            top_pred = preds[0]
+            label = top_pred["label"]
+            confidence = round(top_pred["score"], 4)
+            scores_dict = {p["label"]: round(p["score"], 4) for p in preds}
+            formatted_results.append({
+                "label": label,
+                "confidence": confidence,
+                "scores": scores_dict
+            })
+        # Return single dict if only one input
+        return formatted_results[0] if len(formatted_results) == 1 else formatted_results
+    @staticmethod
+    def load_model():
+        """Helper to preload the model during Docker build."""
+        _ = DepartmentPredictor()