File size: 6,169 Bytes
c339092
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e7c29e
 
 
 
 
 
c339092
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e7c29e
c339092
8e7c29e
c339092
8e7c29e
 
c339092
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/usr/bin/env python3
"""Analyze extracted emotion vectors: cosine similarity, clustering, visualization."""

import json
import os
import numpy as np

EXP_DIR = os.path.dirname(os.path.abspath(__file__))
RESULTS_DIR = os.path.join(EXP_DIR, "results")


def load_vectors():
    vectors_file = os.path.join(RESULTS_DIR, "emotion_vectors.npz")
    data = np.load(vectors_file)
    return {name: data[name] for name in data.files}


def cosine_sim(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b) + 1e-8)


def cosine_similarity_matrix(vectors):
    emotions = sorted(vectors.keys())
    n = len(emotions)
    matrix = np.zeros((n, n))
    for i, e1 in enumerate(emotions):
        for j, e2 in enumerate(emotions):
            matrix[i, j] = cosine_sim(vectors[e1], vectors[e2])
    return emotions, matrix


def print_similarity_matrix(emotions, matrix):
    print("\n=== Cosine Similarity Matrix ===\n")
    # Header
    header = f"{'':12s}" + "".join(f"{e[:6]:>7s}" for e in emotions)
    print(header)
    for i, e in enumerate(emotions):
        row = f"{e:12s}" + "".join(f"{matrix[i,j]:7.2f}" for j in range(len(emotions)))
        print(row)


def find_clusters(emotions, matrix, threshold=0.5):
    """Find emotion pairs with high similarity."""
    print(f"\n=== High Similarity Pairs (>{threshold}) ===\n")
    pairs = []
    for i in range(len(emotions)):
        for j in range(i + 1, len(emotions)):
            if matrix[i, j] > threshold:
                pairs.append((emotions[i], emotions[j], matrix[i, j]))
    pairs.sort(key=lambda x: -x[2])
    for e1, e2, sim in pairs:
        print(f"  {e1:12s} <-> {e2:12s}  sim={sim:.3f}")
    if not pairs:
        print("  (none found)")
    return pairs


def find_opposites(emotions, matrix, threshold=-0.3):
    """Find emotion pairs with negative similarity (opposites)."""
    print(f"\n=== Opposite Pairs (<{threshold}) ===\n")
    pairs = []
    for i in range(len(emotions)):
        for j in range(i + 1, len(emotions)):
            if matrix[i, j] < threshold:
                pairs.append((emotions[i], emotions[j], matrix[i, j]))
    pairs.sort(key=lambda x: x[2])
    for e1, e2, sim in pairs:
        print(f"  {e1:12s} <-> {e2:12s}  sim={sim:.3f}")
    if not pairs:
        print("  (none found)")
    return pairs


def valence_arousal_check(emotions, pca_results):
    """Check if PC1≈valence, PC2≈arousal based on known emotion groupings."""
    print("\n=== Valence-Arousal Structure Check ===\n")

    positive = {"happy", "proud", "inspired", "loving", "hopeful", "calm", "playful"}
    negative = {"sad", "angry", "afraid", "desperate", "guilty", "disgusted", "lonely", "spiteful"}
    # Arousal groupings revised per dejanseo's feedback (2026-04-06):
    # - Removed inspired from high arousal (more contemplative/medium)
    # - Added disgusted, confused, playful, spiteful to high arousal
    # - Added loving, hopeful to low arousal
    high_arousal = {"angry", "afraid", "surprised", "desperate", "nervous", "anxious", "disgusted", "confused", "playful", "spiteful"}
    low_arousal = {"calm", "sad", "brooding", "lonely", "guilty", "loving", "hopeful"}

    for pc_name, pc_vals in [("PC1", pca_results["pc1"]), ("PC2", pca_results["pc2"])]:
        pos_vals = [pc_vals[i] for i, e in enumerate(pca_results["emotions"]) if e in positive]
        neg_vals = [pc_vals[i] for i, e in enumerate(pca_results["emotions"]) if e in negative]
        hi_vals = [pc_vals[i] for i, e in enumerate(pca_results["emotions"]) if e in high_arousal]
        lo_vals = [pc_vals[i] for i, e in enumerate(pca_results["emotions"]) if e in low_arousal]

        pos_mean = np.mean(pos_vals) if pos_vals else 0
        neg_mean = np.mean(neg_vals) if neg_vals else 0
        hi_mean = np.mean(hi_vals) if hi_vals else 0
        lo_mean = np.mean(lo_vals) if lo_vals else 0

        valence_sep = abs(pos_mean - neg_mean)
        arousal_sep = abs(hi_mean - lo_mean)

        print(f"  {pc_name}:")
        print(f"    Positive mean: {pos_mean:+.3f}  Negative mean: {neg_mean:+.3f}  → Valence separation: {valence_sep:.3f}")
        print(f"    High-A mean:   {hi_mean:+.3f}  Low-A mean:    {lo_mean:+.3f}  → Arousal separation: {arousal_sep:.3f}")

        if valence_sep > arousal_sep and valence_sep > 1.0:
            print(f"    → {pc_name} ≈ VALENCE axis")
        elif arousal_sep > valence_sep and arousal_sep > 0.5:
            print(f"    → {pc_name} ≈ AROUSAL axis")
        else:
            print(f"    → {pc_name} ≈ UNCLEAR — neither valence nor arousal dominant (model may have learned its own geometry)")


def main():
    print("=== Emotion Vector Analysis ===\n")

    # Load vectors
    vectors = load_vectors()
    print(f"Loaded {len(vectors)} emotion vectors")
    print(f"Vector dimension: {next(iter(vectors.values())).shape[0]}")

    # Load experiment results for PCA
    results_file = os.path.join(RESULTS_DIR, "experiment_results.json")
    with open(results_file, "r") as f:
        results = json.load(f)

    # Similarity analysis
    emotions, matrix = cosine_similarity_matrix(vectors)
    print_similarity_matrix(emotions, matrix)
    find_clusters(emotions, matrix, threshold=0.4)
    find_opposites(emotions, matrix, threshold=-0.2)

    # Valence-Arousal check
    if "pca" in results:
        valence_arousal_check(emotions, results["pca"])

    # Summary
    print("\n=== SUMMARY ===\n")
    avg_sim = matrix[np.triu_indices_from(matrix, k=1)].mean()
    print(f"  Average pairwise similarity: {avg_sim:.3f}")
    print(f"  Variance explained by PC1+PC2: {(results['pca']['explained_variance_pc1'] + results['pca']['explained_variance_pc2'])*100:.1f}%")

    # Anthropic found ~30% variance in first 2 PCs for 171 emotions
    # With 20 emotions, we'd expect higher concentration
    var_12 = results['pca']['explained_variance_pc1'] + results['pca']['explained_variance_pc2']
    if var_12 > 0.3:
        print("  ✓ Strong 2D structure detected (>30% in PC1+PC2)")
    else:
        print("  ✗ Weak 2D structure (<30% in PC1+PC2)")

    print("\n=== ANALYSIS COMPLETE ===")


if __name__ == "__main__":
    main()