File size: 11,566 Bytes
43b1fa0
 
6b2c2e0
 
43b1fa0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b2c2e0
 
 
 
 
 
 
 
 
 
 
 
 
43b1fa0
76a11b5
 
 
 
 
 
6b2c2e0
76a11b5
6b2c2e0
76a11b5
 
 
 
 
43b1fa0
 
6b2c2e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43b1fa0
 
 
6b2c2e0
43b1fa0
 
 
 
 
6b2c2e0
43b1fa0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b2c2e0
 
43b1fa0
6b2c2e0
 
 
 
 
 
 
 
43b1fa0
 
 
 
 
 
 
 
 
 
 
 
 
 
6b2c2e0
 
 
 
43b1fa0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b2c2e0
43b1fa0
 
 
6b2c2e0
 
 
 
 
 
 
 
 
 
 
 
76a11b5
43b1fa0
 
 
 
6b2c2e0
43b1fa0
 
 
6b2c2e0
43b1fa0
 
6b2c2e0
43b1fa0
 
 
 
 
 
 
 
 
6b2c2e0
43b1fa0
 
 
 
 
 
6b2c2e0
43b1fa0
6b2c2e0
43b1fa0
 
6b2c2e0
 
 
 
 
 
 
 
 
 
 
43b1fa0
 
 
 
 
6b2c2e0
43b1fa0
6b2c2e0
 
 
 
43b1fa0
 
 
 
 
 
6b2c2e0
43b1fa0
6b2c2e0
 
 
 
 
 
 
 
76a11b5
43b1fa0
 
 
 
 
6b2c2e0
43b1fa0
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
#!/usr/bin/env python3
"""
Demonstration script for using Pulse Core 1 - Vietnamese Sentiment Analysis System from Hugging Face Hub.
Shows how to download and use the pre-trained sentiment models for both general sentiment and banking aspect sentiment.
"""

from huggingface_hub import hf_hub_download
import joblib


def predict_text(model, text):
    """Make prediction on a single text (consistent with inference.py)"""
    try:
        probabilities = model.predict_proba([text])[0]

        # Get top 3 predictions sorted by probability
        top_indices = probabilities.argsort()[-3:][::-1]
        top_predictions = []
        for idx in top_indices:
            category = model.classes_[idx]
            prob = probabilities[idx]
            top_predictions.append((category, prob))

        # The prediction should be the top category
        prediction = top_predictions[0][0]
        confidence = top_predictions[0][1]

        return prediction, confidence, top_predictions
    except Exception as e:
        print(f"Error making prediction: {e}")
        return None, 0, []


def load_model_from_hub(model_type="vlsp2016"):
    """Load the pre-trained Pulse Core 1 sentiment model from Hugging Face Hub
    Args:
        model_type: 'vlsp2016' for general sentiment or 'uts2017' for banking aspect sentiment
    """
    if model_type == "vlsp2016":
        filename = "vlsp2016_sentiment_20250929_075529.joblib"
        print("Downloading Pulse Core 1 (Vietnamese General Sentiment) model from Hugging Face Hub...")
        classes_desc = "sentiment classes (positive, negative, neutral)"
    else:
        filename = "uts2017_sentiment_20250928_131716.joblib"
        print("Downloading Pulse Core 1 (Vietnamese Banking Aspect Sentiment) model from Hugging Face Hub...")
        classes_desc = "aspect-sentiment combinations"

    try:
        model_path = hf_hub_download("undertheseanlp/pulse_core_1", filename)
        print(f"Model downloaded to: {model_path}")

        print("Loading model...")
        model = joblib.load(model_path)
        print(f"Model loaded successfully. Classes: {len(model.classes_)} {classes_desc}")
        print(f"Model type: {type(model.named_steps['clf']).__name__}")
        return model, model_type
    except Exception as e:
        print(f"Error downloading model: {e}")
        print("This might mean the model file hasn't been uploaded to Hugging Face Hub yet.")
        print("Please check the repository: https://huggingface.co/undertheseanlp/pulse_core_1")
        raise


def predict_sentiment_examples(model, model_type):
    """Demonstrate predictions on Vietnamese sentiment examples"""
    if model_type == "vlsp2016":
        print("\n" + "="*60)
        print("VIETNAMESE GENERAL SENTIMENT ANALYSIS EXAMPLES")
        print("="*60)

        # Vietnamese general sentiment examples
        examples = [
            ("positive", "Sản phẩm này rất tốt, tôi rất hài lòng"),
            ("negative", "Chất lượng dịch vụ tệ quá"),
            ("neutral", "Giá cả hợp lý, có thể chấp nhận được"),
            ("positive", "Nhân viên phục vụ rất nhiệt tình"),
            ("negative", "Đồ ăn không ngon, sẽ không quay lại"),
            ("positive", "Giao hàng nhanh chóng, đóng gói cẩn thận"),
            ("neutral", "Sản phẩm bình thường, không có gì đặc biệt"),
            ("positive", "Rất đáng tiền, chất lượng tuyệt vời"),
            ("negative", "Không như mong đợi, khá thất vọng"),
            ("positive", "Dịch vụ khách hàng tốt, giải quyết nhanh chóng")
        ]
        print("Testing Vietnamese general sentiment analysis:")
    else:
        print("\n" + "="*60)
        print("VIETNAMESE BANKING ASPECT SENTIMENT ANALYSIS EXAMPLES")
        print("="*60)

        # Vietnamese banking examples with expected aspect-sentiment combinations
        examples = [
            ("CUSTOMER_SUPPORT#negative", "Dịch vụ chăm sóc khách hàng rất tệ"),
            ("CUSTOMER_SUPPORT#positive", "Nhân viên hỗ trợ rất nhiệt tình"),
            ("TRADEMARK#positive", "Ngân hàng ACB có uy tín tốt"),
            ("TRADEMARK#negative", "Thương hiệu ngân hàng này không đáng tin cậy"),
            ("LOAN#positive", "Lãi suất vay mua nhà rất ưu đãi"),
            ("LOAN#negative", "Lãi suất vay quá cao, không chấp nhận được"),
            ("INTEREST_RATE#negative", "Lãi suất tiết kiệm thấp quá"),
            ("INTEREST_RATE#positive", "Lãi suất gửi tiết kiệm khá hấp dẫn"),
            ("CARD#negative", "Thẻ tín dụng bị khóa không rõ lý do"),
            ("CARD#positive", "Thẻ ATM rất tiện lợi khi sử dụng"),
            ("INTERNET_BANKING#negative", "Internet banking hay bị lỗi"),
            ("INTERNET_BANKING#positive", "Ứng dụng ngân hàng điện tử dễ sử dụng"),
            ("MONEY_TRANSFER#negative", "Phí chuyển tiền quá đắt"),
            ("PROMOTION#positive", "Chương trình khuyến mãi rất hấp dẫn"),
            ("SECURITY#positive", "Bảo mật tài khoản rất tốt")
        ]
        print("Testing Vietnamese banking aspect sentiment analysis:")

    print("-" * 60)

    for expected_label, text in examples:
        try:
            prediction, confidence, top_predictions = predict_text(model, text)

            if prediction:
                print(f"Text: {text}")
                print(f"Expected: {expected_label}")
                print(f"Predicted: {prediction}")
                print(f"Confidence: {confidence:.3f}")

                # Show top 3 predictions
                print("Top 3 predictions:")
                for i, (category, prob) in enumerate(top_predictions, 1):
                    print(f"  {i}. {category}: {prob:.3f}")

            print("-" * 60)

        except Exception as e:
            print(f"Error predicting '{text}': {e}")
            print("-" * 60)


def interactive_mode(model, model_type):
    """Interactive mode for testing custom text"""
    print("\n" + "="*60)
    if model_type == "vlsp2016":
        print("INTERACTIVE MODE - VIETNAMESE GENERAL SENTIMENT ANALYSIS")
        print("="*60)
        print("Enter Vietnamese text to analyze sentiment (type 'quit' to exit):")
    else:
        print("INTERACTIVE MODE - VIETNAMESE BANKING ASPECT SENTIMENT ANALYSIS")
        print("="*60)
        print("Enter Vietnamese banking text to analyze aspect and sentiment (type 'quit' to exit):")

    while True:
        try:
            user_input = input("\nText: ").strip()

            if user_input.lower() in ['quit', 'exit', 'q']:
                break

            if not user_input:
                continue

            prediction, confidence, top_predictions = predict_text(model, user_input)

            if prediction:
                if model_type == "vlsp2016":
                    print(f"Predicted sentiment: {prediction}")
                else:
                    print(f"Predicted aspect-sentiment: {prediction}")
                print(f"Confidence: {confidence:.3f}")

                # Show top 3 predictions
                print("Top 3 predictions:")
                for i, (category, prob) in enumerate(top_predictions, 1):
                    print(f"  {i}. {category}: {prob:.3f}")

        except KeyboardInterrupt:
            print("\nExiting...")
            break
        except Exception as e:
            print(f"Error: {e}")


def simple_usage_examples():
    """Show simple usage examples for HuggingFace Hub models"""
    print("\n" + "="*60)
    print("HUGGINGFACE HUB USAGE EXAMPLES")
    print("="*60)

    print("Code examples:")
    print("""
# Pulse Core 1 Models (Vietnamese Sentiment Analysis)
from huggingface_hub import hf_hub_download
import joblib

# Option 1: General Sentiment Analysis (VLSP2016)
general_model = joblib.load(
    hf_hub_download("undertheseanlp/pulse_core_1", "vlsp2016_sentiment_20250929_075529.joblib")
)

# Make prediction on general text
general_text = "Sản phẩm này rất tốt"
prediction = general_model.predict([general_text])[0]
print(f"Sentiment: {prediction}")

# Option 2: Banking Aspect Sentiment Analysis (UTS2017_Bank)
banking_model = joblib.load(
    hf_hub_download("undertheseanlp/pulse_core_1", "uts2017_sentiment_20250928_131716.joblib")
)

# Make prediction on banking text
bank_text = "Tôi muốn mở tài khoản tiết kiệm"
prediction = banking_model.predict([bank_text])[0]
print(f"Aspect-Sentiment: {prediction}")

# For detailed predictions with confidence scores
probabilities = banking_model.predict_proba([bank_text])[0]
top_indices = probabilities.argsort()[-3:][::-1]
for idx in top_indices:
    category = banking_model.classes_[idx]
    prob = probabilities[idx]
    print(f"{category}: {prob:.3f}")

# For local file inference, use inference.py instead
""")


def main():
    """Main demonstration function"""
    print("Pulse Core 1 - Vietnamese Sentiment Analysis System")
    print("=" * 60)

    try:
        # Show simple usage examples
        simple_usage_examples()

        # Test both models
        print("\n" + "="*60)
        print("TESTING PULSE CORE 1 MODELS")
        print("="*60)

        # Test VLSP2016 general sentiment model
        print("\n1. Testing VLSP2016 General Sentiment Model")
        print("-" * 40)
        vlsp_model, vlsp_type = load_model_from_hub("vlsp2016")
        predict_sentiment_examples(vlsp_model, vlsp_type)

        # Test UTS2017 banking aspect sentiment model
        print("\n2. Testing UTS2017 Banking Aspect Sentiment Model")
        print("-" * 40)
        uts_model, uts_type = load_model_from_hub("uts2017")
        predict_sentiment_examples(uts_model, uts_type)

        # Check if we're in an interactive environment
        try:
            import sys
            if hasattr(sys, 'ps1') or sys.stdin.isatty():
                choice = input("\nEnter interactive mode? Choose model type (vlsp2016/uts2017/n): ").strip().lower()

                if choice == 'vlsp2016':
                    interactive_mode(vlsp_model, "vlsp2016")
                elif choice == 'uts2017':
                    interactive_mode(uts_model, "uts2017")

        except (EOFError, OSError):
            print("\nInteractive mode not available in this environment.")
            print("Run this script in a regular terminal to use interactive mode.")

        print("\nDemonstration complete!")
        print("\nPulse Core 1 models are available on Hugging Face Hub:")
        print("- Repository: undertheseanlp/pulse_core_1")
        print("- VLSP2016 Model: vlsp2016_sentiment_20250929_075529.joblib")
        print("  * Task: Vietnamese General Sentiment Analysis")
        print("  * Classes: 3 sentiment polarities")
        print("  * Test accuracy: 71.14%")
        print("- UTS2017 Model: uts2017_sentiment_20250928_131716.joblib")
        print("  * Task: Vietnamese Banking Aspect Sentiment Analysis")
        print("  * Classes: 35 aspect-sentiment combinations")
        print("  * Test accuracy: 71.72%")
        print("- Model type: Support Vector Classification (SVC)")

    except ImportError:
        print("Error: huggingface_hub is required. Install with:")
        print("  pip install huggingface_hub")
    except Exception as e:
        print(f"Error loading models: {e}")
        print("\nMake sure you have internet connection and try again.")


if __name__ == "__main__":
    main()