quantumly commited on
Commit
12e7473
·
verified ·
1 Parent(s): 536735b

v0.3 appraiser (fine-tuned mpnet): 2026-04-26

Browse files
Files changed (1) hide show
  1. v0_3_metadata.json +325 -0
v0_3_metadata.json ADDED
@@ -0,0 +1,325 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trained_at": "2026-04-26T08:12:01.990046+00:00",
3
+ "data_run_date": "2026-04-25",
4
+ "version": "v0.3-finetuned-mpnet",
5
+ "description": "v0.2 + contrastive fine-tuning of mpnet on price-similarity triplets",
6
+ "parent_version": "v0.2",
7
+ "fine_tune": {
8
+ "base_model": "sentence-transformers/all-mpnet-base-v2",
9
+ "triplets": 3252,
10
+ "epochs": 3,
11
+ "lr": 2e-05,
12
+ "batch_size": 64,
13
+ "val_triplet_acc_before": 0.5185185185185185,
14
+ "val_triplet_acc_after": 0.6049382716049383,
15
+ "improvement_pp": 8.64197530864198
16
+ },
17
+ "splits": {
18
+ "train": {
19
+ "rows": 265240,
20
+ "start": "2022-01-28",
21
+ "end": "2023-09-30"
22
+ },
23
+ "val": {
24
+ "rows": 3545,
25
+ "start": "2023-10-01",
26
+ "end": "2023-12-31"
27
+ },
28
+ "test": {
29
+ "rows": 2744,
30
+ "start": "2024-01-01",
31
+ "end": "2024-05-04"
32
+ }
33
+ },
34
+ "feature_count": 146,
35
+ "feature_cols": [
36
+ "len",
37
+ "n_digits",
38
+ "n_letters",
39
+ "n_special",
40
+ "n_lower",
41
+ "n_upper",
42
+ "is_palindrome",
43
+ "is_all_digits",
44
+ "is_all_letters",
45
+ "is_ascii",
46
+ "has_unicode",
47
+ "starts_digit",
48
+ "ends_digit",
49
+ "max_char_run",
50
+ "n_unique_chars",
51
+ "in_wikipedia",
52
+ "in_geonames",
53
+ "in_us_firstname",
54
+ "in_iso3166",
55
+ "in_ticker",
56
+ "in_sec_edgar",
57
+ "in_wiktionary_en",
58
+ "wordlist_hits",
59
+ "club__social_handles",
60
+ "club__prepunk_full_rankings",
61
+ "club__personas",
62
+ "club__gamertags",
63
+ "club__top500_cities_global",
64
+ "club__familynames_usa",
65
+ "club__common_english",
66
+ "club__top_nouns",
67
+ "club__top500_cities_usa",
68
+ "club__common_animals",
69
+ "club__catholicism",
70
+ "club__crypto_terms",
71
+ "club__finance_terms",
72
+ "club__pokemon_gen4",
73
+ "club__logistics",
74
+ "club__us_government",
75
+ "club__top_crypto_names",
76
+ "club__mythical_creatures",
77
+ "club__firstnames_usa",
78
+ "club__gen_alpha",
79
+ "club__sports",
80
+ "club__crayola_classic",
81
+ "club__performing_arts",
82
+ "club__top_crypto_tickers",
83
+ "club__gamertags_double",
84
+ "club__country_codes",
85
+ "club__us_states",
86
+ "club__pokemon_gen1",
87
+ "club__conspiracy_theories",
88
+ "club__wikidata_top_fantasy_char",
89
+ "club__historic_figures",
90
+ "club__currency_symbols",
91
+ "club__luxury",
92
+ "club__natural_wonders",
93
+ "club__pokemon_gen3",
94
+ "club__paranormal",
95
+ "club__holidays",
96
+ "club__memes",
97
+ "club__currency_names",
98
+ "club__home",
99
+ "club__pokemon_gen2",
100
+ "club__fine_art",
101
+ "n_clubs",
102
+ "trademark_conflict",
103
+ "name_age_days",
104
+ "prior_transfer_count",
105
+ "fg_value",
106
+ "eth_tvl_usd",
107
+ "eth_stable_mcap",
108
+ "eth_dex_volume",
109
+ "nft_total_fee_usd",
110
+ "pca_000",
111
+ "pca_001",
112
+ "pca_002",
113
+ "pca_003",
114
+ "pca_004",
115
+ "pca_005",
116
+ "pca_006",
117
+ "pca_007",
118
+ "pca_008",
119
+ "pca_009",
120
+ "pca_010",
121
+ "pca_011",
122
+ "pca_012",
123
+ "pca_013",
124
+ "pca_014",
125
+ "pca_015",
126
+ "pca_016",
127
+ "pca_017",
128
+ "pca_018",
129
+ "pca_019",
130
+ "pca_020",
131
+ "pca_021",
132
+ "pca_022",
133
+ "pca_023",
134
+ "pca_024",
135
+ "pca_025",
136
+ "pca_026",
137
+ "pca_027",
138
+ "pca_028",
139
+ "pca_029",
140
+ "pca_030",
141
+ "pca_031",
142
+ "pca_032",
143
+ "pca_033",
144
+ "pca_034",
145
+ "pca_035",
146
+ "pca_036",
147
+ "pca_037",
148
+ "pca_038",
149
+ "pca_039",
150
+ "pca_040",
151
+ "pca_041",
152
+ "pca_042",
153
+ "pca_043",
154
+ "pca_044",
155
+ "pca_045",
156
+ "pca_046",
157
+ "pca_047",
158
+ "pca_048",
159
+ "pca_049",
160
+ "pca_050",
161
+ "pca_051",
162
+ "pca_052",
163
+ "pca_053",
164
+ "pca_054",
165
+ "pca_055",
166
+ "pca_056",
167
+ "pca_057",
168
+ "pca_058",
169
+ "pca_059",
170
+ "pca_060",
171
+ "pca_061",
172
+ "pca_062",
173
+ "pca_063",
174
+ "knn_count",
175
+ "knn_mean_log",
176
+ "knn_median_log",
177
+ "knn_p90_log",
178
+ "knn_max_sim",
179
+ "knn_min_sim",
180
+ "knn_log_max",
181
+ "knn_log_min"
182
+ ],
183
+ "pca_dim": 64,
184
+ "best_iteration": 369,
185
+ "xgb_params": {
186
+ "objective": "reg:squarederror",
187
+ "eval_metric": "rmse",
188
+ "tree_method": "hist",
189
+ "device": "cuda",
190
+ "max_depth": 7,
191
+ "learning_rate": 0.04,
192
+ "subsample": 0.85,
193
+ "colsample_bytree": 0.65,
194
+ "min_child_weight": 8,
195
+ "reg_alpha": 0.5,
196
+ "reg_lambda": 2.0,
197
+ "seed": 42
198
+ },
199
+ "metrics": {
200
+ "train": {
201
+ "r2_log": 0.7984388470649719,
202
+ "rmse_log": 0.7249601483345032,
203
+ "mae_log": 0.48019057512283325,
204
+ "median_ape": 0.2934191823005676,
205
+ "bias_log": -0.0001444444787921384
206
+ },
207
+ "val": {
208
+ "r2_log": 0.6729167103767395,
209
+ "rmse_log": 1.0476961135864258,
210
+ "mae_log": 0.7321875691413879,
211
+ "median_ape": 0.48088952898979187,
212
+ "bias_log": 0.12580855190753937
213
+ },
214
+ "test": {
215
+ "r2_log": 0.32547563314437866,
216
+ "rmse_log": 1.527344822883606,
217
+ "mae_log": 1.2592869997024536,
218
+ "median_ape": 1.3042705059051514,
219
+ "bias_log": 0.712732195854187
220
+ }
221
+ },
222
+ "top_features": [
223
+ {
224
+ "name": "knn_mean_log",
225
+ "gain": 2475.822021484375
226
+ },
227
+ {
228
+ "name": "knn_median_log",
229
+ "gain": 1594.9427490234375
230
+ },
231
+ {
232
+ "name": "knn_p90_log",
233
+ "gain": 1196.794189453125
234
+ },
235
+ {
236
+ "name": "in_wikipedia",
237
+ "gain": 432.5325927734375
238
+ },
239
+ {
240
+ "name": "len",
241
+ "gain": 427.3330078125
242
+ },
243
+ {
244
+ "name": "pca_002",
245
+ "gain": 208.85269165039062
246
+ },
247
+ {
248
+ "name": "is_all_digits",
249
+ "gain": 207.7695770263672
250
+ },
251
+ {
252
+ "name": "n_digits",
253
+ "gain": 207.7196807861328
254
+ },
255
+ {
256
+ "name": "pca_001",
257
+ "gain": 198.31832885742188
258
+ },
259
+ {
260
+ "name": "pca_007",
261
+ "gain": 197.07334899902344
262
+ },
263
+ {
264
+ "name": "n_clubs",
265
+ "gain": 186.21694946289062
266
+ },
267
+ {
268
+ "name": "name_age_days",
269
+ "gain": 184.0680694580078
270
+ },
271
+ {
272
+ "name": "knn_count",
273
+ "gain": 183.48965454101562
274
+ },
275
+ {
276
+ "name": "n_unique_chars",
277
+ "gain": 170.0704345703125
278
+ },
279
+ {
280
+ "name": "is_palindrome",
281
+ "gain": 158.3007049560547
282
+ },
283
+ {
284
+ "name": "eth_stable_mcap",
285
+ "gain": 153.7845001220703
286
+ },
287
+ {
288
+ "name": "trademark_conflict",
289
+ "gain": 153.01382446289062
290
+ },
291
+ {
292
+ "name": "pca_022",
293
+ "gain": 145.79708862304688
294
+ },
295
+ {
296
+ "name": "ends_digit",
297
+ "gain": 140.9295196533203
298
+ },
299
+ {
300
+ "name": "club__prepunk_full_rankings",
301
+ "gain": 130.34014892578125
302
+ },
303
+ {
304
+ "name": "eth_tvl_usd",
305
+ "gain": 120.09010314941406
306
+ },
307
+ {
308
+ "name": "n_special",
309
+ "gain": 116.31814575195312
310
+ },
311
+ {
312
+ "name": "n_letters",
313
+ "gain": 108.61463928222656
314
+ },
315
+ {
316
+ "name": "is_ascii",
317
+ "gain": 108.28950500488281
318
+ },
319
+ {
320
+ "name": "pca_000",
321
+ "gain": 103.93241119384766
322
+ }
323
+ ],
324
+ "wandb_run": "https://wandb.ai/quantumly-aletheia-research/ens-appraiser/runs/0eevvvo0"
325
+ }