Spaces:
Sleeping
Sleeping
File size: 5,809 Bytes
c09e844 d0244e1 c09e844 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
"""
ML Prediction Service with LAZY LOADING & REMOTE MODEL FETCHING
"""
import os
from typing import List, Dict, Any, Optional
# [QUAN TRỌNG] Import thư viện để tải model từ kho riêng
from huggingface_hub import hf_hub_download
# Only set HF cache for local development
# if not os.getenv("RENDER") and not os.getenv("SPACE_ID"):
# os.environ['HF_HOME'] = 'G:/huggingface_cache'
class MLPredictionService:
"""
ML Service with lazy loading.
Fetches heavy model weights from external Hugging Face Model Repo
to bypass the 1GB limit of Space Git Repo.
"""
def __init__(self):
"""Initialize service without loading model (lazy loading)"""
# Model components
self.model: Optional[Any] = None
self.tokenizer: Optional[Any] = None
self.device: Optional[str] = None
self.model_loaded = False
# [SỬA ĐỔI] Không set đường dẫn cứng ở đây nữa vì file không còn ở máy
# Chúng ta sẽ định nghĩa Repo ID chứa model ở đây
self.MODEL_REPO_ID = "vtdung23/my-phobert-models"
self.MODEL_FILENAME = "best_phoBER.pth"
print("✅ ML Service initialized (Model will download & load on first request)")
def _load_model(self):
"""Load model and tokenizer (called on first request)"""
if self.model_loaded:
return
print("🔄 Loading ML model (first request)...")
# Import heavy dependencies only when needed
import torch
from transformers import AutoTokenizer, RobertaForSequenceClassification
# Determine device
self.device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"📍 Using device: {self.device}")
# [SỬA ĐỔI 1] Load Tokenizer từ gốc vinai/phobert-base
# Vì folder tokenizer local đã bị xóa, ta load thẳng từ thư viện gốc cho an toàn
print("📦 Loading tokenizer from vinai/phobert-base...")
self.tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base", use_fast=False)
# [SỬA ĐỔI 2] Tải file weights từ Kho Model riêng về
print(f"⬇️ Downloading weights from repo: {self.MODEL_REPO_ID}...")
try:
model_path = hf_hub_download(
repo_id=self.MODEL_REPO_ID,
filename=self.MODEL_FILENAME,
repo_type="model" # Quan trọng: báo đây là kho Model
)
print(f"✅ Downloaded weights to: {model_path}")
except Exception as e:
print(f"❌ Error downloading model: {e}")
raise e
# Load model architecture
print("🧠 Loading PhoBERT architecture...")
self.model = RobertaForSequenceClassification.from_pretrained(
"vinai/phobert-base",
num_labels=5, # Đảm bảo số này khớp với lúc bạn train (0,1,2,3,4 hay 1-5?)
problem_type="single_label_classification"
)
# Load fine-tuned weights
print("⚙️ Loading trained weights into architecture...")
state_dict = torch.load(model_path, map_location=self.device, weights_only=False)
self.model.load_state_dict(state_dict)
# Set to evaluation mode and move to device
self.model.eval()
self.model.to(self.device)
self.model_loaded = True
print("✅ Model loaded successfully and ready to serve!")
def predict_single(self, text: str) -> Dict[str, Any]:
"""Predict rating for a single comment"""
# Lazy load model on first request
self._load_model()
import torch
import torch.nn.functional as F
# 1. Vietnamese preprocessing
processed_text = self.preprocess(text)
# 2. Tokenize
encoded = self.tokenizer(
processed_text,
padding=True,
truncation=True,
max_length=256,
return_tensors="pt"
)
# Move tensors to device
encoded = {k: v.to(self.device) for k, v in encoded.items()}
# 3. Inference
with torch.no_grad():
outputs = self.model(**encoded)
logits = outputs.logits
probs = F.softmax(logits, dim=1)
# 4. Get prediction + confidence
predicted_class = torch.argmax(probs, dim=1).item()
confidence = probs[0][predicted_class].item()
# 5. Convert 0-based label -> rating 1-5
# (Giả sử model train label 0 tương ứng 1 sao)
rating = predicted_class + 1
return {
'rating': rating,
'confidence': confidence
}
def predict_batch(self, texts: List[str]) -> List[Dict[str, any]]:
"""Predict ratings for multiple comments"""
results = []
for text in texts:
# Có thể tối ưu bằng cách batch tokenize, nhưng loop đơn giản cho an toàn
prediction = self.predict_single(text)
results.append({
'text': text,
'rating': prediction['rating'],
'confidence': prediction['confidence']
})
return results
def preprocess(self, text: str) -> str:
"""Preprocess Vietnamese text"""
from underthesea import word_tokenize
text = word_tokenize(text, format="text")
return text
# Singleton instance
ml_service = MLPredictionService()
def get_ml_service() -> MLPredictionService:
return ml_service |