Spaces:

vtdung23
/

Predict_Rating

Sleeping

File size: 5,809 Bytes

"""

ML Prediction Service with LAZY LOADING & REMOTE MODEL FETCHING

"""
import os
from typing import List, Dict, Any, Optional
# [QUAN TRỌNG] Import thư viện để tải model từ kho riêng
from huggingface_hub import hf_hub_download

# Only set HF cache for local development
# if not os.getenv("RENDER") and not os.getenv("SPACE_ID"):
#     os.environ['HF_HOME'] = 'G:/huggingface_cache'

class MLPredictionService:
    """

    ML Service with lazy loading.

    Fetches heavy model weights from external Hugging Face Model Repo

    to bypass the 1GB limit of Space Git Repo.

    """

    def __init__(self):
        """Initialize service without loading model (lazy loading)"""
        # Model components
        self.model: Optional[Any] = None
        self.tokenizer: Optional[Any] = None
        self.device: Optional[str] = None
        self.model_loaded = False
        
        # [SỬA ĐỔI] Không set đường dẫn cứng ở đây nữa vì file không còn ở máy
        # Chúng ta sẽ định nghĩa Repo ID chứa model ở đây
        self.MODEL_REPO_ID = "vtdung23/my-phobert-models"
        self.MODEL_FILENAME = "best_phoBER.pth"
        
        print("✅ ML Service initialized (Model will download & load on first request)")
    
    def _load_model(self):
        """Load model and tokenizer (called on first request)"""
        if self.model_loaded:
            return
        
        print("🔄 Loading ML model (first request)...")
        
        # Import heavy dependencies only when needed
        import torch
        from transformers import AutoTokenizer, RobertaForSequenceClassification
        
        # Determine device
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"📍 Using device: {self.device}")
        
        # [SỬA ĐỔI 1] Load Tokenizer từ gốc vinai/phobert-base
        # Vì folder tokenizer local đã bị xóa, ta load thẳng từ thư viện gốc cho an toàn
        print("📦 Loading tokenizer from vinai/phobert-base...")
        self.tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base", use_fast=False)
        
        # [SỬA ĐỔI 2] Tải file weights từ Kho Model riêng về
        print(f"⬇️ Downloading weights from repo: {self.MODEL_REPO_ID}...")
        try:
            model_path = hf_hub_download(
                repo_id=self.MODEL_REPO_ID,
                filename=self.MODEL_FILENAME,
                repo_type="model" # Quan trọng: báo đây là kho Model
            )
            print(f"✅ Downloaded weights to: {model_path}")
        except Exception as e:
            print(f"❌ Error downloading model: {e}")
            raise e

        # Load model architecture
        print("🧠 Loading PhoBERT architecture...")
        self.model = RobertaForSequenceClassification.from_pretrained(
            "vinai/phobert-base",
            num_labels=5, # Đảm bảo số này khớp với lúc bạn train (0,1,2,3,4 hay 1-5?)
            problem_type="single_label_classification"
        )
        
        # Load fine-tuned weights
        print("⚙️ Loading trained weights into architecture...")
        state_dict = torch.load(model_path, map_location=self.device, weights_only=False)
        self.model.load_state_dict(state_dict)
        
        # Set to evaluation mode and move to device
        self.model.eval()
        self.model.to(self.device)
        
        self.model_loaded = True
        print("✅ Model loaded successfully and ready to serve!")
            
    def predict_single(self, text: str) -> Dict[str, Any]:
        """Predict rating for a single comment"""
        # Lazy load model on first request
        self._load_model()
        
        import torch
        import torch.nn.functional as F

        # 1. Vietnamese preprocessing
        processed_text = self.preprocess(text)

        # 2. Tokenize
        encoded = self.tokenizer(
            processed_text,
            padding=True,
            truncation=True,
            max_length=256,
            return_tensors="pt"
        )
        
        # Move tensors to device
        encoded = {k: v.to(self.device) for k, v in encoded.items()}

        # 3. Inference
        with torch.no_grad():
            outputs = self.model(**encoded)
            logits = outputs.logits
            probs = F.softmax(logits, dim=1)

        # 4. Get prediction + confidence
        predicted_class = torch.argmax(probs, dim=1).item()
        confidence = probs[0][predicted_class].item()

        # 5. Convert 0-based label -> rating 1-5
        # (Giả sử model train label 0 tương ứng 1 sao)
        rating = predicted_class + 1

        return {
            'rating': rating,
            'confidence': confidence
        }
    
    def predict_batch(self, texts: List[str]) -> List[Dict[str, any]]:
        """Predict ratings for multiple comments"""
        results = []
        for text in texts:
            # Có thể tối ưu bằng cách batch tokenize, nhưng loop đơn giản cho an toàn
            prediction = self.predict_single(text)
            results.append({
                'text': text,
                'rating': prediction['rating'],
                'confidence': prediction['confidence']
            })
        return results
    
    def preprocess(self, text: str) -> str:
        """Preprocess Vietnamese text"""
        from underthesea import word_tokenize
        text = word_tokenize(text, format="text")
        return text

# Singleton instance
ml_service = MLPredictionService()

def get_ml_service() -> MLPredictionService:
    return ml_service