File size: 5,809 Bytes
c09e844
 
 
 
 
 
 
 
 
d0244e1
 
c09e844
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
"""

ML Prediction Service with LAZY LOADING & REMOTE MODEL FETCHING

"""
import os
from typing import List, Dict, Any, Optional
# [QUAN TRỌNG] Import thư viện để tải model từ kho riêng
from huggingface_hub import hf_hub_download

# Only set HF cache for local development
# if not os.getenv("RENDER") and not os.getenv("SPACE_ID"):
#     os.environ['HF_HOME'] = 'G:/huggingface_cache'

class MLPredictionService:
    """

    ML Service with lazy loading.

    Fetches heavy model weights from external Hugging Face Model Repo

    to bypass the 1GB limit of Space Git Repo.

    """

    def __init__(self):
        """Initialize service without loading model (lazy loading)"""
        # Model components
        self.model: Optional[Any] = None
        self.tokenizer: Optional[Any] = None
        self.device: Optional[str] = None
        self.model_loaded = False
        
        # [SỬA ĐỔI] Không set đường dẫn cứng ở đây nữa vì file không còn ở máy
        # Chúng ta sẽ định nghĩa Repo ID chứa model ở đây
        self.MODEL_REPO_ID = "vtdung23/my-phobert-models"
        self.MODEL_FILENAME = "best_phoBER.pth"
        
        print("✅ ML Service initialized (Model will download & load on first request)")
    
    def _load_model(self):
        """Load model and tokenizer (called on first request)"""
        if self.model_loaded:
            return
        
        print("🔄 Loading ML model (first request)...")
        
        # Import heavy dependencies only when needed
        import torch
        from transformers import AutoTokenizer, RobertaForSequenceClassification
        
        # Determine device
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"📍 Using device: {self.device}")
        
        # [SỬA ĐỔI 1] Load Tokenizer từ gốc vinai/phobert-base
        # Vì folder tokenizer local đã bị xóa, ta load thẳng từ thư viện gốc cho an toàn
        print("📦 Loading tokenizer from vinai/phobert-base...")
        self.tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base", use_fast=False)
        
        # [SỬA ĐỔI 2] Tải file weights từ Kho Model riêng về
        print(f"⬇️ Downloading weights from repo: {self.MODEL_REPO_ID}...")
        try:
            model_path = hf_hub_download(
                repo_id=self.MODEL_REPO_ID,
                filename=self.MODEL_FILENAME,
                repo_type="model" # Quan trọng: báo đây là kho Model
            )
            print(f"✅ Downloaded weights to: {model_path}")
        except Exception as e:
            print(f"❌ Error downloading model: {e}")
            raise e

        # Load model architecture
        print("🧠 Loading PhoBERT architecture...")
        self.model = RobertaForSequenceClassification.from_pretrained(
            "vinai/phobert-base",
            num_labels=5, # Đảm bảo số này khớp với lúc bạn train (0,1,2,3,4 hay 1-5?)
            problem_type="single_label_classification"
        )
        
        # Load fine-tuned weights
        print("⚙️ Loading trained weights into architecture...")
        state_dict = torch.load(model_path, map_location=self.device, weights_only=False)
        self.model.load_state_dict(state_dict)
        
        # Set to evaluation mode and move to device
        self.model.eval()
        self.model.to(self.device)
        
        self.model_loaded = True
        print("✅ Model loaded successfully and ready to serve!")
            
    def predict_single(self, text: str) -> Dict[str, Any]:
        """Predict rating for a single comment"""
        # Lazy load model on first request
        self._load_model()
        
        import torch
        import torch.nn.functional as F

        # 1. Vietnamese preprocessing
        processed_text = self.preprocess(text)

        # 2. Tokenize
        encoded = self.tokenizer(
            processed_text,
            padding=True,
            truncation=True,
            max_length=256,
            return_tensors="pt"
        )
        
        # Move tensors to device
        encoded = {k: v.to(self.device) for k, v in encoded.items()}

        # 3. Inference
        with torch.no_grad():
            outputs = self.model(**encoded)
            logits = outputs.logits
            probs = F.softmax(logits, dim=1)

        # 4. Get prediction + confidence
        predicted_class = torch.argmax(probs, dim=1).item()
        confidence = probs[0][predicted_class].item()

        # 5. Convert 0-based label -> rating 1-5
        # (Giả sử model train label 0 tương ứng 1 sao)
        rating = predicted_class + 1

        return {
            'rating': rating,
            'confidence': confidence
        }
    
    def predict_batch(self, texts: List[str]) -> List[Dict[str, any]]:
        """Predict ratings for multiple comments"""
        results = []
        for text in texts:
            # Có thể tối ưu bằng cách batch tokenize, nhưng loop đơn giản cho an toàn
            prediction = self.predict_single(text)
            results.append({
                'text': text,
                'rating': prediction['rating'],
                'confidence': prediction['confidence']
            })
        return results
    
    def preprocess(self, text: str) -> str:
        """Preprocess Vietnamese text"""
        from underthesea import word_tokenize
        text = word_tokenize(text, format="text")
        return text

# Singleton instance
ml_service = MLPredictionService()

def get_ml_service() -> MLPredictionService:
    return ml_service