Spaces:
Running
Running
| import gradio as gr | |
| import joblib | |
| import numpy as np | |
| from tensorflow.keras.models import load_model | |
| from huggingface_hub import snapshot_download | |
| import unicodedata | |
| # ====== Mapping nhãn ====== | |
| label_map = { | |
| 0: "Tiêu cực", | |
| 1: "Trung lập", | |
| 2: "Tích cực" | |
| } | |
| MAX_LEN = 200 | |
| # ====== Load model ====== | |
| local_dir = snapshot_download( | |
| repo_id="phucn001/SentimentAnalysisModels", | |
| local_dir="./Models" | |
| ) | |
| cnn_lstm_model = load_model(f"{local_dir}/CNNLSTM/best_model.h5", compile=False) | |
| cnn_lstm_tokenizer = joblib.load(f"{local_dir}/CNNLSTM/tokenizer.pkl") | |
| cnn_label_encoder = joblib.load(f"{local_dir}/CNNLSTM/label_encoder.pkl") | |
| def normalize_text(text): | |
| return unicodedata.normalize("NFC", text).strip().lower() | |
| reverse_label_map = {normalize_text(v): k for k, v in label_map.items()} | |
| def predict_cnn_lstm(text): | |
| seq = cnn_lstm_tokenizer.texts_to_sequences([text]) | |
| padded = np.zeros((1, MAX_LEN)) | |
| padded[0, -len(seq[0]):] = seq[0][:MAX_LEN] | |
| probs = cnn_lstm_model.predict(padded, verbose=0)[0] | |
| # reorder theo label_map | |
| probs_reordered = np.zeros_like(probs) | |
| for i, label in enumerate(cnn_label_encoder.classes_): | |
| norm_label = normalize_text(label) | |
| new_index = reverse_label_map[norm_label] | |
| probs_reordered[new_index] = probs[i] | |
| pred = int(np.argmax(probs_reordered)) | |
| return { | |
| "label": label_map[pred], | |
| "probabilities": {label_map[i]: float(probs_reordered[i]) for i in range(len(probs_reordered))} | |
| } | |
| demo = gr.Interface( | |
| fn=predict_cnn_lstm, | |
| inputs=gr.Textbox(lines=2, placeholder="Nhập câu bình luận..."), | |
| outputs="json", | |
| title="Sentiment Analysis - CNN-LSTM" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |