Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline | |
| from underthesea import word_tokenize | |
| # from src.text_preprocessor import preprocess_text | |
| MODEL_NAME = "ndyah2020/phobert-base-v2-vsmec-finetuned" | |
| # MODEL_NAME = "wonrax/phobert-base-vietnamese-sentiment" # | |
| def load_sentiment_pipeline(): | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False) | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) | |
| sentiment_pipeline = pipeline( | |
| "sentiment-analysis", | |
| model=model, | |
| tokenizer=tokenizer, | |
| truncation=True, | |
| max_length=256, | |
| ) | |
| st.success("✅ Mô hình (local) đã sẵn sàng!") | |
| return sentiment_pipeline | |
| except Exception as e: | |
| st.error(f"❌ Lỗi khi tải mô hình local: {e}") | |
| return None | |
| def predict_sentiment(text: str, sentiment_pipeline): | |
| if not text or sentiment_pipeline is None: | |
| return "Lỗi", 0.0 | |
| try: | |
| segmented_text = " ".join(word_tokenize(text)) | |
| result = sentiment_pipeline(segmented_text)[0] | |
| label_map = { | |
| "NEG": "NEGATIVE", | |
| "POS": "POSITIVE", | |
| "NEU": "NEUTRAL" | |
| } | |
| raw_label = result["label"].upper() | |
| confidence = float(result["score"]) | |
| CONFIDENCE_THRESHOLD = 0.5 | |
| if confidence < CONFIDENCE_THRESHOLD: | |
| # Nếu độ tin cậy quá thấp, ép về Trung tính | |
| label = label_map["NEU"] | |
| label += " (không rõ)" | |
| else: | |
| # Nếu độ tin cậy đủ cao, dùng nhãn dự đoán | |
| label = label_map.get(raw_label, "Không xác định") | |
| if confidence >= 0.85: | |
| label += " (rất rõ)" | |
| elif confidence >= 0.7: | |
| label += " (khá rõ)" | |
| else: | |
| label += " (hơi nhẹ)" | |
| return label, confidence | |
| except Exception as e: | |
| print(f"Lỗi khi dự đoán: {e}") | |
| return "Lỗi", 0.0 | |