# 공용 상수 EMOTION_LABELS = [ "neutral", "joy", "sadness", "anger", "surprise", "fear", "disgust", ] EMOTION_TO_VALENCE = { "joy": 1.0, "surprise": 0.5, "neutral": 0.0, "sadness": -0.5, "fear": -0.6, "anger": -0.8, "disgust": -0.9, } FUSION_WEIGHTS = { "audio": 0.6, "text": 0.4, } # Korean emotion-specific fusion weights — trained via gradient descent # Parameterization: w_a = sigmoid(α), w_t = 1 - w_a (7 params jointly optimized) # Data: AI Hub 263 val (1,294 samples, stratified 80/20 train/val) # Val macro F1 = 0.8724 (vs 0.8744 fixed 60/40, 0.8757 greedy v1). # Full 263 val macro F1 = 0.8748 (vs 0.8736 greedy v1, 0.8347 fixed 60/40). # Text-dominant pattern preserved (Korean KcELECTRA LoRA > audio LoRA on most classes). EMOTION_FUSION_WEIGHTS_KO = { "neutral": {"audio": 0.53, "text": 0.47}, "joy": {"audio": 0.25, "text": 0.75}, "sadness": {"audio": 0.16, "text": 0.84}, "anger": {"audio": 0.11, "text": 0.89}, "surprise": {"audio": 0.22, "text": 0.78}, "fear": {"audio": 0.10, "text": 0.90}, "disgust": {"audio": 0.15, "text": 0.85}, } # English emotion-specific fusion weights — trained via gradient descent # Parameterization: w_a = sigmoid(α), w_t = 1 - w_a (7 params jointly optimized) # Data: JL-Corpus + SAVEE + MELD + RAVDESS phone (fear/disgust/sadness) = 2,821 samples # Val macro F1 = 0.7596 (vs 0.7473 fixed 60/40, 0.7114 audio-only, 0.6295 greedy v1) EMOTION_FUSION_WEIGHTS_EN = { "neutral": {"audio": 0.78, "text": 0.22}, "joy": {"audio": 0.61, "text": 0.39}, "sadness": {"audio": 0.70, "text": 0.30}, "anger": {"audio": 0.60, "text": 0.40}, "surprise": {"audio": 0.46, "text": 0.54}, "fear": {"audio": 0.74, "text": 0.26}, "disgust": {"audio": 0.77, "text": 0.23}, } # Default (Korean) — back-compat alias EMOTION_FUSION_WEIGHTS = EMOTION_FUSION_WEIGHTS_KO