File size: 11,566 Bytes

#!/usr/bin/env python3
"""
Demonstration script for using Pulse Core 1 - Vietnamese Sentiment Analysis System from Hugging Face Hub.
Shows how to download and use the pre-trained sentiment models for both general sentiment and banking aspect sentiment.
"""

from huggingface_hub import hf_hub_download
import joblib


def predict_text(model, text):
    """Make prediction on a single text (consistent with inference.py)"""
    try:
        probabilities = model.predict_proba([text])[0]

        # Get top 3 predictions sorted by probability
        top_indices = probabilities.argsort()[-3:][::-1]
        top_predictions = []
        for idx in top_indices:
            category = model.classes_[idx]
            prob = probabilities[idx]
            top_predictions.append((category, prob))

        # The prediction should be the top category
        prediction = top_predictions[0][0]
        confidence = top_predictions[0][1]

        return prediction, confidence, top_predictions
    except Exception as e:
        print(f"Error making prediction: {e}")
        return None, 0, []


def load_model_from_hub(model_type="vlsp2016"):
    """Load the pre-trained Pulse Core 1 sentiment model from Hugging Face Hub
    Args:
        model_type: 'vlsp2016' for general sentiment or 'uts2017' for banking aspect sentiment
    """
    if model_type == "vlsp2016":
        filename = "vlsp2016_sentiment_20250929_075529.joblib"
        print("Downloading Pulse Core 1 (Vietnamese General Sentiment) model from Hugging Face Hub...")
        classes_desc = "sentiment classes (positive, negative, neutral)"
    else:
        filename = "uts2017_sentiment_20250928_131716.joblib"
        print("Downloading Pulse Core 1 (Vietnamese Banking Aspect Sentiment) model from Hugging Face Hub...")
        classes_desc = "aspect-sentiment combinations"

    try:
        model_path = hf_hub_download("undertheseanlp/pulse_core_1", filename)
        print(f"Model downloaded to: {model_path}")

        print("Loading model...")
        model = joblib.load(model_path)
        print(f"Model loaded successfully. Classes: {len(model.classes_)} {classes_desc}")
        print(f"Model type: {type(model.named_steps['clf']).__name__}")
        return model, model_type
    except Exception as e:
        print(f"Error downloading model: {e}")
        print("This might mean the model file hasn't been uploaded to Hugging Face Hub yet.")
        print("Please check the repository: https://huggingface.co/undertheseanlp/pulse_core_1")
        raise


def predict_sentiment_examples(model, model_type):
    """Demonstrate predictions on Vietnamese sentiment examples"""
    if model_type == "vlsp2016":
        print("\n" + "="*60)
        print("VIETNAMESE GENERAL SENTIMENT ANALYSIS EXAMPLES")
        print("="*60)

        # Vietnamese general sentiment examples
        examples = [
            ("positive", "Sản phẩm này rất tốt, tôi rất hài lòng"),
            ("negative", "Chất lượng dịch vụ tệ quá"),
            ("neutral", "Giá cả hợp lý, có thể chấp nhận được"),
            ("positive", "Nhân viên phục vụ rất nhiệt tình"),
            ("negative", "Đồ ăn không ngon, sẽ không quay lại"),
            ("positive", "Giao hàng nhanh chóng, đóng gói cẩn thận"),
            ("neutral", "Sản phẩm bình thường, không có gì đặc biệt"),
            ("positive", "Rất đáng tiền, chất lượng tuyệt vời"),
            ("negative", "Không như mong đợi, khá thất vọng"),
            ("positive", "Dịch vụ khách hàng tốt, giải quyết nhanh chóng")
        ]
        print("Testing Vietnamese general sentiment analysis:")
    else:
        print("\n" + "="*60)
        print("VIETNAMESE BANKING ASPECT SENTIMENT ANALYSIS EXAMPLES")
        print("="*60)

        # Vietnamese banking examples with expected aspect-sentiment combinations
        examples = [
            ("CUSTOMER_SUPPORT#negative", "Dịch vụ chăm sóc khách hàng rất tệ"),
            ("CUSTOMER_SUPPORT#positive", "Nhân viên hỗ trợ rất nhiệt tình"),
            ("TRADEMARK#positive", "Ngân hàng ACB có uy tín tốt"),
            ("TRADEMARK#negative", "Thương hiệu ngân hàng này không đáng tin cậy"),
            ("LOAN#positive", "Lãi suất vay mua nhà rất ưu đãi"),
            ("LOAN#negative", "Lãi suất vay quá cao, không chấp nhận được"),
            ("INTEREST_RATE#negative", "Lãi suất tiết kiệm thấp quá"),
            ("INTEREST_RATE#positive", "Lãi suất gửi tiết kiệm khá hấp dẫn"),
            ("CARD#negative", "Thẻ tín dụng bị khóa không rõ lý do"),
            ("CARD#positive", "Thẻ ATM rất tiện lợi khi sử dụng"),
            ("INTERNET_BANKING#negative", "Internet banking hay bị lỗi"),
            ("INTERNET_BANKING#positive", "Ứng dụng ngân hàng điện tử dễ sử dụng"),
            ("MONEY_TRANSFER#negative", "Phí chuyển tiền quá đắt"),
            ("PROMOTION#positive", "Chương trình khuyến mãi rất hấp dẫn"),
            ("SECURITY#positive", "Bảo mật tài khoản rất tốt")
        ]
        print("Testing Vietnamese banking aspect sentiment analysis:")

    print("-" * 60)

    for expected_label, text in examples:
        try:
            prediction, confidence, top_predictions = predict_text(model, text)

            if prediction:
                print(f"Text: {text}")
                print(f"Expected: {expected_label}")
                print(f"Predicted: {prediction}")
                print(f"Confidence: {confidence:.3f}")

                # Show top 3 predictions
                print("Top 3 predictions:")
                for i, (category, prob) in enumerate(top_predictions, 1):
                    print(f"  {i}. {category}: {prob:.3f}")

            print("-" * 60)

        except Exception as e:
            print(f"Error predicting '{text}': {e}")
            print("-" * 60)


def interactive_mode(model, model_type):
    """Interactive mode for testing custom text"""
    print("\n" + "="*60)
    if model_type == "vlsp2016":
        print("INTERACTIVE MODE - VIETNAMESE GENERAL SENTIMENT ANALYSIS")
        print("="*60)
        print("Enter Vietnamese text to analyze sentiment (type 'quit' to exit):")
    else:
        print("INTERACTIVE MODE - VIETNAMESE BANKING ASPECT SENTIMENT ANALYSIS")
        print("="*60)
        print("Enter Vietnamese banking text to analyze aspect and sentiment (type 'quit' to exit):")

    while True:
        try:
            user_input = input("\nText: ").strip()

            if user_input.lower() in ['quit', 'exit', 'q']:
                break

            if not user_input:
                continue

            prediction, confidence, top_predictions = predict_text(model, user_input)

            if prediction:
                if model_type == "vlsp2016":
                    print(f"Predicted sentiment: {prediction}")
                else:
                    print(f"Predicted aspect-sentiment: {prediction}")
                print(f"Confidence: {confidence:.3f}")

                # Show top 3 predictions
                print("Top 3 predictions:")
                for i, (category, prob) in enumerate(top_predictions, 1):
                    print(f"  {i}. {category}: {prob:.3f}")

        except KeyboardInterrupt:
            print("\nExiting...")
            break
        except Exception as e:
            print(f"Error: {e}")


def simple_usage_examples():
    """Show simple usage examples for HuggingFace Hub models"""
    print("\n" + "="*60)
    print("HUGGINGFACE HUB USAGE EXAMPLES")
    print("="*60)

    print("Code examples:")
    print("""
# Pulse Core 1 Models (Vietnamese Sentiment Analysis)
from huggingface_hub import hf_hub_download
import joblib

# Option 1: General Sentiment Analysis (VLSP2016)
general_model = joblib.load(
    hf_hub_download("undertheseanlp/pulse_core_1", "vlsp2016_sentiment_20250929_075529.joblib")
)

# Make prediction on general text
general_text = "Sản phẩm này rất tốt"
prediction = general_model.predict([general_text])[0]
print(f"Sentiment: {prediction}")

# Option 2: Banking Aspect Sentiment Analysis (UTS2017_Bank)
banking_model = joblib.load(
    hf_hub_download("undertheseanlp/pulse_core_1", "uts2017_sentiment_20250928_131716.joblib")
)

# Make prediction on banking text
bank_text = "Tôi muốn mở tài khoản tiết kiệm"
prediction = banking_model.predict([bank_text])[0]
print(f"Aspect-Sentiment: {prediction}")

# For detailed predictions with confidence scores
probabilities = banking_model.predict_proba([bank_text])[0]
top_indices = probabilities.argsort()[-3:][::-1]
for idx in top_indices:
    category = banking_model.classes_[idx]
    prob = probabilities[idx]
    print(f"{category}: {prob:.3f}")

# For local file inference, use inference.py instead
""")


def main():
    """Main demonstration function"""
    print("Pulse Core 1 - Vietnamese Sentiment Analysis System")
    print("=" * 60)

    try:
        # Show simple usage examples
        simple_usage_examples()

        # Test both models
        print("\n" + "="*60)
        print("TESTING PULSE CORE 1 MODELS")
        print("="*60)

        # Test VLSP2016 general sentiment model
        print("\n1. Testing VLSP2016 General Sentiment Model")
        print("-" * 40)
        vlsp_model, vlsp_type = load_model_from_hub("vlsp2016")
        predict_sentiment_examples(vlsp_model, vlsp_type)

        # Test UTS2017 banking aspect sentiment model
        print("\n2. Testing UTS2017 Banking Aspect Sentiment Model")
        print("-" * 40)
        uts_model, uts_type = load_model_from_hub("uts2017")
        predict_sentiment_examples(uts_model, uts_type)

        # Check if we're in an interactive environment
        try:
            import sys
            if hasattr(sys, 'ps1') or sys.stdin.isatty():
                choice = input("\nEnter interactive mode? Choose model type (vlsp2016/uts2017/n): ").strip().lower()

                if choice == 'vlsp2016':
                    interactive_mode(vlsp_model, "vlsp2016")
                elif choice == 'uts2017':
                    interactive_mode(uts_model, "uts2017")

        except (EOFError, OSError):
            print("\nInteractive mode not available in this environment.")
            print("Run this script in a regular terminal to use interactive mode.")

        print("\nDemonstration complete!")
        print("\nPulse Core 1 models are available on Hugging Face Hub:")
        print("- Repository: undertheseanlp/pulse_core_1")
        print("- VLSP2016 Model: vlsp2016_sentiment_20250929_075529.joblib")
        print("  * Task: Vietnamese General Sentiment Analysis")
        print("  * Classes: 3 sentiment polarities")
        print("  * Test accuracy: 71.14%")
        print("- UTS2017 Model: uts2017_sentiment_20250928_131716.joblib")
        print("  * Task: Vietnamese Banking Aspect Sentiment Analysis")
        print("  * Classes: 35 aspect-sentiment combinations")
        print("  * Test accuracy: 71.72%")
        print("- Model type: Support Vector Classification (SVC)")

    except ImportError:
        print("Error: huggingface_hub is required. Install with:")
        print("  pip install huggingface_hub")
    except Exception as e:
        print(f"Error loading models: {e}")
        print("\nMake sure you have internet connection and try again.")


if __name__ == "__main__":
    main()