wuff-mann commited on
Commit
ba1929a
·
verified ·
1 Parent(s): f58f3de

Upload synthir2026_results_v2.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. synthir2026_results_v2.json +191 -0
synthir2026_results_v2.json ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hotpot_f1": {
3
+ "Cosine": 0.77325,
4
+ "Diagonal": 0.7735,
5
+ "LowRank": 0.79275,
6
+ "Skew": 0.818,
7
+ "MixSkew-2": 0.81525,
8
+ "MixSkew-3": 0.81625,
9
+ "Oracle": 1.0
10
+ },
11
+ "hotpot_f1_std": {
12
+ "Cosine": 0.2719548445974073,
13
+ "Diagonal": 0.2742220815324688,
14
+ "LowRank": 0.2691141718676294,
15
+ "Skew": 0.2652847526715397,
16
+ "MixSkew-2": 0.27201550966810695,
17
+ "MixSkew-3": 0.2680689043884053,
18
+ "Oracle": 0.0
19
+ },
20
+ "statistical_tests": {
21
+ "Diagonal": {
22
+ "p": 0.9560499306404453,
23
+ "delta": 0.00024999999999997247
24
+ },
25
+ "LowRank": {
26
+ "p": 9.642371805114744e-06,
27
+ "delta": 0.019499999999999962
28
+ },
29
+ "Skew": {
30
+ "p": 1.0234538115855705e-14,
31
+ "delta": 0.044749999999999956
32
+ },
33
+ "MixSkew-2": {
34
+ "p": 1.332926046651438e-12,
35
+ "delta": 0.04200000000000004
36
+ },
37
+ "MixSkew-3": {
38
+ "p": 1.202985095024838e-14,
39
+ "delta": 0.04300000000000004
40
+ }
41
+ },
42
+ "mix3_vs_mix2": {
43
+ "delta": 0.0010000000000000009,
44
+ "p": 0.8242047070728566
45
+ },
46
+ "cross_dataset": {
47
+ "HotpotQA": {
48
+ "cosine": 0.7665,
49
+ "skew": 0.8185,
50
+ "mix2": 0.811,
51
+ "p_skew": 2.2756934713749659e-10,
52
+ "p_mix2": 1.2478886489464592e-07,
53
+ "N": 1000
54
+ },
55
+ "MuSiQue": {
56
+ "cosine": 0.5221,
57
+ "skew": 0.4534666666666667,
58
+ "mix2": 0.43796666666666667,
59
+ "p_skew": 2.1401805737941205e-14,
60
+ "p_mix2": 2.4231383612273678e-20,
61
+ "N": 1000
62
+ },
63
+ "SQuAD": {
64
+ "cosine": 0.6653333333333332,
65
+ "skew": 0.6639999999999999,
66
+ "mix2": 0.6613333333333332,
67
+ "p_skew": 0.564223301076451,
68
+ "p_mix2": 0.1799739084053753,
69
+ "N": 500
70
+ }
71
+ },
72
+ "generation": {
73
+ "base": {
74
+ "Oracle": {
75
+ "rouge_l": 0.5687365143714215,
76
+ "bertscore": 0.7399862408638
77
+ },
78
+ "Cosine": {
79
+ "rouge_l": 0.41953139171629883,
80
+ "bertscore": 0.6891847252845764
81
+ },
82
+ "Diagonal": {
83
+ "rouge_l": 0.42788499921990636,
84
+ "bertscore": 0.694346010684967
85
+ },
86
+ "LowRank": {
87
+ "rouge_l": 0.4435282892631964,
88
+ "bertscore": 0.6973357796669006
89
+ },
90
+ "Skew": {
91
+ "rouge_l": 0.4718719400568472,
92
+ "bertscore": 0.7059648633003235
93
+ },
94
+ "MixSkew-2": {
95
+ "rouge_l": 0.4794889674738746,
96
+ "bertscore": 0.7094716429710388
97
+ },
98
+ "MixSkew-3": {
99
+ "rouge_l": 0.4735894725243796,
100
+ "bertscore": 0.7058268785476685
101
+ }
102
+ },
103
+ "xl": {
104
+ "Oracle": {
105
+ "rouge_l": 0.4951331830622079,
106
+ "bertscore": 0.6803053617477417
107
+ },
108
+ "Cosine": {
109
+ "rouge_l": 0.3636951599741848,
110
+ "bertscore": 0.6113541722297668
111
+ },
112
+ "Diagonal": {
113
+ "rouge_l": 0.37530530538433016,
114
+ "bertscore": 0.6197639107704163
115
+ },
116
+ "LowRank": {
117
+ "rouge_l": 0.37686305319207797,
118
+ "bertscore": 0.6189463138580322
119
+ },
120
+ "Skew": {
121
+ "rouge_l": 0.41289841811954814,
122
+ "bertscore": 0.6382456421852112
123
+ },
124
+ "MixSkew-2": {
125
+ "rouge_l": 0.4099065097355345,
126
+ "bertscore": 0.6355350613594055
127
+ },
128
+ "MixSkew-3": {
129
+ "rouge_l": 0.4080551280762581,
130
+ "bertscore": 0.6337690353393555
131
+ }
132
+ }
133
+ },
134
+ "learning_curve": {
135
+ "train_sizes": [
136
+ 500,
137
+ 1000,
138
+ 2000,
139
+ 5000,
140
+ 10000,
141
+ 20000
142
+ ],
143
+ "skew_f1": [
144
+ 0.757,
145
+ 0.738,
146
+ 0.762,
147
+ 0.779,
148
+ 0.7835,
149
+ 0.811
150
+ ],
151
+ "cosine_f1": 0.7665
152
+ },
153
+ "latency": {
154
+ "N": [
155
+ 10,
156
+ 100,
157
+ 1000,
158
+ 10000,
159
+ 100000
160
+ ],
161
+ "cosine_ms": [
162
+ 0.0688852000166662,
163
+ 0.06259280007725465,
164
+ 0.056003099962254055,
165
+ 0.0626234000264958,
166
+ 0.3105587999925774
167
+ ],
168
+ "skew_ms": [
169
+ 1.7369928999869444,
170
+ 1.7217745999914769,
171
+ 1.6900264000469178,
172
+ 1.7137092000666598,
173
+ 4.257518300073571
174
+ ]
175
+ },
176
+ "dimensional_analysis": {
177
+ "bridge_vs_comparison_r": 0.9782037746098716,
178
+ "perturbation_r": 0.9966080143418911,
179
+ "cv_bridge": 0.32542163133621216,
180
+ "cv_comparison": 0.6063311100006104
181
+ },
182
+ "hubness": {
183
+ "cosine": 0.06412825651302605,
184
+ "skew": 0.08755932775532456,
185
+ "mix2": 0.08617234468937876
186
+ },
187
+ "encoder_ablation": {
188
+ "cosine_mini": 0.6015,
189
+ "lowrank_mini": 0.605
190
+ }
191
+ }