""" ML Prediction Service with LAZY LOADING & REMOTE MODEL FETCHING """ import os from typing import List, Dict, Any, Optional # [QUAN TRỌNG] Import thư viện để tải model từ kho riêng from huggingface_hub import hf_hub_download # Only set HF cache for local development # if not os.getenv("RENDER") and not os.getenv("SPACE_ID"): # os.environ['HF_HOME'] = 'G:/huggingface_cache' class MLPredictionService: """ ML Service with lazy loading. Fetches heavy model weights from external Hugging Face Model Repo to bypass the 1GB limit of Space Git Repo. """ def __init__(self): """Initialize service without loading model (lazy loading)""" # Model components self.model: Optional[Any] = None self.tokenizer: Optional[Any] = None self.device: Optional[str] = None self.model_loaded = False # [SỬA ĐỔI] Không set đường dẫn cứng ở đây nữa vì file không còn ở máy # Chúng ta sẽ định nghĩa Repo ID chứa model ở đây self.MODEL_REPO_ID = "vtdung23/my-phobert-models" self.MODEL_FILENAME = "best_phoBER.pth" print("✅ ML Service initialized (Model will download & load on first request)") def _load_model(self): """Load model and tokenizer (called on first request)""" if self.model_loaded: return print("🔄 Loading ML model (first request)...") # Import heavy dependencies only when needed import torch from transformers import AutoTokenizer, RobertaForSequenceClassification # Determine device self.device = "cuda" if torch.cuda.is_available() else "cpu" print(f"📍 Using device: {self.device}") # [SỬA ĐỔI 1] Load Tokenizer từ gốc vinai/phobert-base # Vì folder tokenizer local đã bị xóa, ta load thẳng từ thư viện gốc cho an toàn print("📦 Loading tokenizer from vinai/phobert-base...") self.tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base", use_fast=False) # [SỬA ĐỔI 2] Tải file weights từ Kho Model riêng về print(f"⬇️ Downloading weights from repo: {self.MODEL_REPO_ID}...") try: model_path = hf_hub_download( repo_id=self.MODEL_REPO_ID, filename=self.MODEL_FILENAME, repo_type="model" # Quan trọng: báo đây là kho Model ) print(f"✅ Downloaded weights to: {model_path}") except Exception as e: print(f"❌ Error downloading model: {e}") raise e # Load model architecture print("🧠 Loading PhoBERT architecture...") self.model = RobertaForSequenceClassification.from_pretrained( "vinai/phobert-base", num_labels=5, # Đảm bảo số này khớp với lúc bạn train (0,1,2,3,4 hay 1-5?) problem_type="single_label_classification" ) # Load fine-tuned weights print("⚙️ Loading trained weights into architecture...") state_dict = torch.load(model_path, map_location=self.device, weights_only=False) self.model.load_state_dict(state_dict) # Set to evaluation mode and move to device self.model.eval() self.model.to(self.device) self.model_loaded = True print("✅ Model loaded successfully and ready to serve!") def predict_single(self, text: str) -> Dict[str, Any]: """Predict rating for a single comment""" # Lazy load model on first request self._load_model() import torch import torch.nn.functional as F # 1. Vietnamese preprocessing processed_text = self.preprocess(text) # 2. Tokenize encoded = self.tokenizer( processed_text, padding=True, truncation=True, max_length=256, return_tensors="pt" ) # Move tensors to device encoded = {k: v.to(self.device) for k, v in encoded.items()} # 3. Inference with torch.no_grad(): outputs = self.model(**encoded) logits = outputs.logits probs = F.softmax(logits, dim=1) # 4. Get prediction + confidence predicted_class = torch.argmax(probs, dim=1).item() confidence = probs[0][predicted_class].item() # 5. Convert 0-based label -> rating 1-5 # (Giả sử model train label 0 tương ứng 1 sao) rating = predicted_class + 1 return { 'rating': rating, 'confidence': confidence } def predict_batch(self, texts: List[str]) -> List[Dict[str, any]]: """Predict ratings for multiple comments""" results = [] for text in texts: # Có thể tối ưu bằng cách batch tokenize, nhưng loop đơn giản cho an toàn prediction = self.predict_single(text) results.append({ 'text': text, 'rating': prediction['rating'], 'confidence': prediction['confidence'] }) return results def preprocess(self, text: str) -> str: """Preprocess Vietnamese text""" from underthesea import word_tokenize text = word_tokenize(text, format="text") return text # Singleton instance ml_service = MLPredictionService() def get_ml_service() -> MLPredictionService: return ml_service