kerodat2004's picture
Update app.py
0613242 verified
# =========================
# GRADIO: FastText + BiLSTM + Mixed Rule (HF)
# =========================
import json, pickle, re
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
import gradio as gr
# =========================
# LOAD ARTIFACTS (HF relative paths)
# =========================
MODEL_PATH = "model.keras"
TOK_PATH = "tokenizer.pkl"
LE_PATH = "label_encoder.pkl"
CFG_PATH = "config.json"
model = tf.keras.models.load_model(MODEL_PATH)
with open(TOK_PATH, "rb") as f:
tokenizer = pickle.load(f)
with open(LE_PATH, "rb") as f:
le = pickle.load(f)
with open(CFG_PATH, "r", encoding="utf-8") as f:
cfg = json.load(f)
MAX_LEN = int(cfg["max_len"])
# =========================
# RULE KEYWORDS (STRONG / LIGHT)
# =========================
NEG_LIGHT = re.compile(
r"(giao(\s*hàng)?|ship|vận\s*chuyển).*(chậm|trễ|lâu|delay)",
re.I
)
NEG_STRONG = re.compile(
r"(tệ|kém|lỗi|hỏng|bực|thất\s*vọng|không\s*đáng\s*tiền)",
re.I
)
POS_STRONG = re.compile(
r"(rất\s*tốt|rất\s*hài\s*lòng|rất\s*ưng\s*ý|cực\s*kỳ\s*tốt)",
re.I
)
POS_LIGHT = re.compile(
r"(tốt|chất\s*lượng|hài\s*lòng|ưng\s*ý|ổn|xịn)",
re.I
)
# =========================
# PREDICT WITH MIXED LOGIC
# =========================
def predict_label(text: str) -> str:
text = "" if text is None else str(text).strip()
if not text:
return ""
# model prediction
seq = tokenizer.texts_to_sequences([text])
X = pad_sequences(seq, maxlen=MAX_LEN, padding="post", truncating="post")
prob = model.predict(X, verbose=0)[0]
prob_map = {le.classes_[i]: float(prob[i]) for i in range(len(prob))}
p_pos = prob_map.get("Positive", 0.0)
p_neg = prob_map.get("Negative", 0.0)
# rule signals
pos_strong = bool(POS_STRONG.search(text))
pos_light = bool(POS_LIGHT.search(text))
neg_strong = bool(NEG_STRONG.search(text))
neg_light = bool(NEG_LIGHT.search(text))
# decision logic
if neg_strong and not pos_strong:
return "Negative"
if pos_strong and neg_light and not neg_strong:
return "Positive"
if pos_light and neg_light:
return "Neutral"
return le.classes_[int(np.argmax(prob))]
# =========================
# GRADIO UI
# =========================
demo = gr.Interface(
fn=predict_label,
inputs=gr.Textbox(
lines=3,
placeholder="Nhập bình luận cần phân tích cảm xúc..."
),
outputs=gr.Textbox(label="Kết quả"),
title="Sentiment Analysis - FastText + BiLSTM (Tiki Reviews)",
)
if __name__ == "__main__":
demo.launch()