mlibbot-backend / utils /intent.py
RafaelYefta's picture
Upload 49 files
c237f1b verified
from pathlib import Path
import re
import joblib
from .preprocess import clean_text
base = Path(__file__).resolve().parent.parent
model_dir = base / "model"
intent_model = "logreg_tfidf" #"logreg_tfidf", "logreg_indobert"
model_file_mapping = {
"logreg_tfidf": "intent_model_logreg_tfidf.pkl",
"logreg_indobert": "intent_model_logreg_indobert.pkl",
}
if intent_model not in model_file_mapping:
raise ValueError(f"Unknown INTENT MODEL NAME: {intent_model}")
intent_model_path = model_dir / model_file_mapping[intent_model]
intent_pipeline = joblib.load(intent_model_path)
def _preprocess_intent(text: str) -> str:
text = clean_text(text)
if not isinstance(text, str):
text = str(text)
text = text.lower()
text = re.sub(r"http\S+|www\.\S+", " ", text)
text = re.sub(r"[^0-9a-zA-ZÀ-ÿ\s]", " ", text)
text = re.sub(r"\s+", " ", text).strip()
return text
def predict_intent_proba(text: str):
s = _preprocess_intent(text)
proba = intent_pipeline.predict_proba([s])[0]
labels = intent_pipeline.classes_
return {lbl: float(p) for lbl, p in zip(labels, proba)}
def predict_intent_conf(text: str):
proba_dict = predict_intent_proba(text)
best_label = max(proba_dict, key=proba_dict.get)
best_score = float(proba_dict[best_label])
best_percent = round(best_score * 100, 1)
return best_label, best_score, best_percent, proba_dict