Spaces:

kerodat2004
/

sentiment-tiki-app-FastText-BiLSTM

Sleeping

App Files Files Community

sentiment-tiki-app-FastText-BiLSTM / app.py

kerodat2004

Update app.py

0613242 verified 4 months ago

raw

history blame contribute delete

2.71 kB

	# =========================
	# GRADIO: FastText + BiLSTM + Mixed Rule (HF)
	# =========================
	import json, pickle, re
	import numpy as np
	import tensorflow as tf
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	import gradio as gr

	# =========================
	# LOAD ARTIFACTS (HF relative paths)
	# =========================
	MODEL_PATH = "model.keras"
	TOK_PATH = "tokenizer.pkl"
	LE_PATH = "label_encoder.pkl"
	CFG_PATH = "config.json"

	model = tf.keras.models.load_model(MODEL_PATH)

	with open(TOK_PATH, "rb") as f:
	tokenizer = pickle.load(f)

	with open(LE_PATH, "rb") as f:
	le = pickle.load(f)

	with open(CFG_PATH, "r", encoding="utf-8") as f:
	cfg = json.load(f)

	MAX_LEN = int(cfg["max_len"])

	# =========================
	# RULE KEYWORDS (STRONG / LIGHT)
	# =========================
	NEG_LIGHT = re.compile(
	r"(giao(\shàng)?\|ship\|vận\schuyển).*(chậm\|trễ\|lâu\|delay)",
	re.I
	)

	NEG_STRONG = re.compile(
	r"(tệ\|kém\|lỗi\|hỏng\|bực\|thất\svọng\|không\sđáng\s*tiền)",
	re.I
	)

	POS_STRONG = re.compile(
	r"(rất\stốt\|rất\shài\slòng\|rất\sưng\sý\|cực\skỳ\s*tốt)",
	re.I
	)

	POS_LIGHT = re.compile(
	r"(tốt\|chất\slượng\|hài\slòng\|ưng\s*ý\|ổn\|xịn)",
	re.I
	)

	# =========================
	# PREDICT WITH MIXED LOGIC
	# =========================
	def predict_label(text: str) -> str:
	text = "" if text is None else str(text).strip()
	if not text:
	return ""

	# model prediction
	seq = tokenizer.texts_to_sequences([text])
	X = pad_sequences(seq, maxlen=MAX_LEN, padding="post", truncating="post")
	prob = model.predict(X, verbose=0)[0]

	prob_map = {le.classes_[i]: float(prob[i]) for i in range(len(prob))}
	p_pos = prob_map.get("Positive", 0.0)
	p_neg = prob_map.get("Negative", 0.0)

	# rule signals
	pos_strong = bool(POS_STRONG.search(text))
	pos_light = bool(POS_LIGHT.search(text))
	neg_strong = bool(NEG_STRONG.search(text))
	neg_light = bool(NEG_LIGHT.search(text))

	# decision logic
	if neg_strong and not pos_strong:
	return "Negative"

	if pos_strong and neg_light and not neg_strong:
	return "Positive"

	if pos_light and neg_light:
	return "Neutral"

	return le.classes_[int(np.argmax(prob))]

	# =========================
	# GRADIO UI
	# =========================
	demo = gr.Interface(
	fn=predict_label,
	inputs=gr.Textbox(
	lines=3,
	placeholder="Nhập bình luận cần phân tích cảm xúc..."
	),
	outputs=gr.Textbox(label="Kết quả"),
	title="Sentiment Analysis - FastText + BiLSTM (Tiki Reviews)",
	)

	if __name__ == "__main__":
	demo.launch()