| |
| |
| |
| import json, pickle, re |
| import numpy as np |
| import tensorflow as tf |
| from tensorflow.keras.preprocessing.sequence import pad_sequences |
| import gradio as gr |
|
|
| |
| |
| |
| MODEL_PATH = "model.keras" |
| TOK_PATH = "tokenizer.pkl" |
| LE_PATH = "label_encoder.pkl" |
| CFG_PATH = "config.json" |
|
|
| model = tf.keras.models.load_model(MODEL_PATH) |
|
|
| with open(TOK_PATH, "rb") as f: |
| tokenizer = pickle.load(f) |
|
|
| with open(LE_PATH, "rb") as f: |
| le = pickle.load(f) |
|
|
| with open(CFG_PATH, "r", encoding="utf-8") as f: |
| cfg = json.load(f) |
|
|
| MAX_LEN = int(cfg["max_len"]) |
|
|
| |
| |
| |
| NEG_LIGHT = re.compile( |
| r"(giao(\s*hàng)?|ship|vận\s*chuyển).*(chậm|trễ|lâu|delay)", |
| re.I |
| ) |
|
|
| NEG_STRONG = re.compile( |
| r"(tệ|kém|lỗi|hỏng|bực|thất\s*vọng|không\s*đáng\s*tiền)", |
| re.I |
| ) |
|
|
| POS_STRONG = re.compile( |
| r"(rất\s*tốt|rất\s*hài\s*lòng|rất\s*ưng\s*ý|cực\s*kỳ\s*tốt)", |
| re.I |
| ) |
|
|
| POS_LIGHT = re.compile( |
| r"(tốt|chất\s*lượng|hài\s*lòng|ưng\s*ý|ổn|xịn)", |
| re.I |
| ) |
|
|
| |
| |
| |
| def predict_label(text: str) -> str: |
| text = "" if text is None else str(text).strip() |
| if not text: |
| return "" |
|
|
| |
| seq = tokenizer.texts_to_sequences([text]) |
| X = pad_sequences(seq, maxlen=MAX_LEN, padding="post", truncating="post") |
| prob = model.predict(X, verbose=0)[0] |
|
|
| prob_map = {le.classes_[i]: float(prob[i]) for i in range(len(prob))} |
| p_pos = prob_map.get("Positive", 0.0) |
| p_neg = prob_map.get("Negative", 0.0) |
|
|
| |
| pos_strong = bool(POS_STRONG.search(text)) |
| pos_light = bool(POS_LIGHT.search(text)) |
| neg_strong = bool(NEG_STRONG.search(text)) |
| neg_light = bool(NEG_LIGHT.search(text)) |
|
|
| |
| if neg_strong and not pos_strong: |
| return "Negative" |
|
|
| if pos_strong and neg_light and not neg_strong: |
| return "Positive" |
|
|
| if pos_light and neg_light: |
| return "Neutral" |
|
|
| return le.classes_[int(np.argmax(prob))] |
|
|
| |
| |
| |
| demo = gr.Interface( |
| fn=predict_label, |
| inputs=gr.Textbox( |
| lines=3, |
| placeholder="Nhập bình luận cần phân tích cảm xúc..." |
| ), |
| outputs=gr.Textbox(label="Kết quả"), |
| title="Sentiment Analysis - FastText + BiLSTM (Tiki Reviews)", |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|