File size: 11,006 Bytes

0dfdd57

"""
Comprehensive testing suite for rmtariq/multilingual-emotion-classifier
This script provides various testing capabilities for the emotion classification model.

Usage:
    python test_model.py --test-type [quick|comprehensive|interactive|benchmark]
    
Author: rmtariq
Repository: https://huggingface.co/rmtariq/multilingual-emotion-classifier
"""

import argparse
import time
from transformers import pipeline
import torch

class EmotionModelTester:
    """Comprehensive testing suite for the multilingual emotion classifier"""
    
    def __init__(self, model_name="rmtariq/multilingual-emotion-classifier"):
        self.model_name = model_name
        self.classifier = None
        self.load_model()
    
    def load_model(self):
        """Load the emotion classification model"""
        print(f"📥 Loading model: {self.model_name}")
        try:
            self.classifier = pipeline(
                "text-classification",
                model=self.model_name,
                device=0 if torch.cuda.is_available() else -1
            )
            device = "GPU" if torch.cuda.is_available() else "CPU"
            print(f"✅ Model loaded successfully on {device}")
        except Exception as e:
            print(f"❌ Error loading model: {e}")
            raise
    
    def quick_test(self):
        """Quick test with essential examples"""
        print("\n🚀 QUICK TEST")
        print("=" * 50)
        
        test_cases = [
            # English examples
            ("I am so happy today!", "happy", "🇬🇧"),
            ("This makes me really angry!", "anger", "🇬🇧"),
            ("I love you so much!", "love", "🇬🇧"),
            ("I'm scared of spiders", "fear", "🇬🇧"),
            ("This news makes me sad", "sadness", "🇬🇧"),
            ("What a surprise!", "surprise", "🇬🇧"),
            
            # Malay examples
            ("Saya sangat gembira!", "happy", "🇲🇾"),
            ("Aku marah dengan keadaan ini", "anger", "🇲🇾"),
            ("Aku sayang kamu", "love", "🇲🇾"),
            ("Saya takut dengan ini", "fear", "🇲🇾"),
            
            # Previously problematic cases (now fixed)
            ("Ini adalah hari jadi terbaik", "happy", "🇲🇾"),
            ("Terbaik!", "happy", "🇲🇾"),
            ("Ini adalah hari yang baik", "happy", "🇲🇾")
        ]
        
        correct = 0
        total = len(test_cases)
        
        for i, (text, expected, flag) in enumerate(test_cases, 1):
            result = self.classifier(text)
            predicted = result[0]['label'].lower()
            confidence = result[0]['score']
            
            is_correct = predicted == expected
            if is_correct:
                correct += 1
            
            status = "✅" if is_correct else "❌"
            print(f"{i:2d}. {status} {flag} '{text[:40]}...'")
            print(f"    → {predicted} ({confidence:.1%}) [Expected: {expected}]")
        
        accuracy = correct / total
        print(f"\n📊 Quick Test Results: {accuracy:.1%} ({correct}/{total})")
        
        if accuracy >= 0.9:
            print("🎉 EXCELLENT! Model performing at high level!")
        elif accuracy >= 0.8:
            print("👍 GOOD! Model performing well!")
        else:
            print("⚠️ NEEDS ATTENTION. Some issues detected.")
        
        return accuracy
    
    def comprehensive_test(self):
        """Comprehensive test covering all aspects"""
        print("\n🔬 COMPREHENSIVE TEST")
        print("=" * 50)
        
        # Test categories
        test_categories = {
            "English Basic": [
                ("I feel fantastic today!", "happy"),
                ("I'm furious about this!", "anger"),
                ("I adore this place!", "love"),
                ("I'm terrified of heights", "fear"),
                ("I'm heartbroken", "sadness"),
                ("I can't believe it!", "surprise")
            ],
            "Malay Basic": [
                ("Gembira sangat hari ini", "happy"),
                ("Marah betul dengan dia", "anger"),
                ("Sayang sangat kat kamu", "love"),
                ("Takut gila dengan benda tu", "fear"),
                ("Sedih betul dengar berita", "sadness"),
                ("Terkejut dengan kejadian", "surprise")
            ],
            "Malay Fixed Issues": [
                ("Ini adalah hari jadi terbaik", "happy"),
                ("Hari jadi terbaik saya", "happy"),
                ("Terbaik!", "happy"),
                ("Hari yang baik", "happy"),
                ("Pengalaman terbaik", "happy"),
                ("Masa terbaik", "happy")
            ],
            "Edge Cases": [
                ("Happy birthday!", "happy"),
                ("Best day ever!", "happy"),
                ("Good news!", "happy"),
                ("Selamat hari jadi", "happy"),
                ("Berita baik", "happy"),
                ("Hasil terbaik", "happy")
            ]
        }
        
        overall_correct = 0
        overall_total = 0
        
        for category, cases in test_categories.items():
            print(f"\n📋 {category}:")
            print("-" * 30)
            
            category_correct = 0
            for text, expected in cases:
                result = self.classifier(text)
                predicted = result[0]['label'].lower()
                confidence = result[0]['score']
                
                is_correct = predicted == expected
                if is_correct:
                    category_correct += 1
                    overall_correct += 1
                
                overall_total += 1
                
                status = "✅" if is_correct else "❌"
                print(f"  {status} '{text[:35]}...' → {predicted} ({confidence:.1%})")
            
            category_accuracy = category_correct / len(cases)
            print(f"  📊 {category} Accuracy: {category_accuracy:.1%}")
        
        overall_accuracy = overall_correct / overall_total
        print(f"\n📊 COMPREHENSIVE TEST RESULTS:")
        print(f"✅ Overall Accuracy: {overall_accuracy:.1%} ({overall_correct}/{overall_total})")
        
        return overall_accuracy
    
    def interactive_test(self):
        """Interactive testing mode"""
        print("\n🎮 INTERACTIVE TEST MODE")
        print("=" * 50)
        print("Enter text to classify emotions (type 'quit' to exit)")
        print("Supported emotions: anger, fear, happy, love, sadness, surprise")
        print()
        
        while True:
            try:
                text = input("💬 Your text: ").strip()
                
                if text.lower() in ['quit', 'exit', 'q']:
                    print("👋 Goodbye!")
                    break
                
                if not text:
                    continue
                
                result = self.classifier(text)
                predicted = result[0]['label'].lower()
                confidence = result[0]['score']
                
                # Get emoji for emotion
                emotion_emojis = {
                    'anger': '😠', 'fear': '😨', 'happy': '😊',
                    'love': '❤️', 'sadness': '😢', 'surprise': '😲'
                }
                
                emoji = emotion_emojis.get(predicted, '🤔')
                confidence_level = "💪 High" if confidence > 0.9 else "👍 Good" if confidence > 0.7 else "⚠️ Low"
                
                print(f"🎭 Result: {emoji} {predicted}")
                print(f"📊 Confidence: {confidence:.1%}")
                print(f"💪 {confidence_level} confidence!")
                print()
                
            except KeyboardInterrupt:
                print("\n👋 Goodbye!")
                break
            except Exception as e:
                print(f"❌ Error: {e}")
    
    def benchmark_test(self):
        """Performance benchmark test"""
        print("\n⚡ BENCHMARK TEST")
        print("=" * 50)
        
        # Test texts for benchmarking
        benchmark_texts = [
            "I am so happy today!",
            "This makes me angry!",
            "I love this!",
            "I'm scared!",
            "This is sad news",
            "What a surprise!",
            "Saya gembira!",
            "Aku marah!",
            "Sayang betul!",
            "Takut sangat!"
        ] * 10  # 100 predictions total
        
        print(f"🔄 Running {len(benchmark_texts)} predictions...")
        
        start_time = time.time()
        
        for text in benchmark_texts:
            _ = self.classifier(text)
        
        end_time = time.time()
        total_time = end_time - start_time
        avg_time = total_time / len(benchmark_texts)
        predictions_per_second = len(benchmark_texts) / total_time
        
        print(f"📊 BENCHMARK RESULTS:")
        print(f"⏱️  Total time: {total_time:.2f} seconds")
        print(f"⚡ Average per prediction: {avg_time*1000:.1f} ms")
        print(f"🚀 Predictions per second: {predictions_per_second:.1f}")
        
        if predictions_per_second > 10:
            print("🎉 EXCELLENT! Very fast performance!")
        elif predictions_per_second > 5:
            print("👍 GOOD! Acceptable performance!")
        else:
            print("⚠️ SLOW. Consider optimization.")
        
        return predictions_per_second

def main():
    """Main testing function"""
    parser = argparse.ArgumentParser(description="Test the multilingual emotion classifier")
    parser.add_argument(
        "--test-type", 
        choices=["quick", "comprehensive", "interactive", "benchmark", "all"],
        default="quick",
        help="Type of test to run"
    )
    parser.add_argument(
        "--model", 
        default="rmtariq/multilingual-emotion-classifier",
        help="Model name or path"
    )
    
    args = parser.parse_args()
    
    print("🎭 MULTILINGUAL EMOTION CLASSIFIER TESTING SUITE")
    print("=" * 60)
    print(f"Model: {args.model}")
    print(f"Test Type: {args.test_type}")
    
    try:
        tester = EmotionModelTester(args.model)
        
        if args.test_type == "quick":
            tester.quick_test()
        elif args.test_type == "comprehensive":
            tester.comprehensive_test()
        elif args.test_type == "interactive":
            tester.interactive_test()
        elif args.test_type == "benchmark":
            tester.benchmark_test()
        elif args.test_type == "all":
            print("🔄 Running all tests...")
            tester.quick_test()
            tester.comprehensive_test()
            tester.benchmark_test()
            print("\n🎮 Starting interactive mode...")
            tester.interactive_test()
        
    except Exception as e:
        print(f"❌ Testing failed: {e}")
        return 1
    
    return 0

if __name__ == "__main__":
    exit(main())