ens-appraiser / v0_6_metadata.json
quantumly's picture
v0.6 tuned + quantile + tail-correction: 2026-04-27
eedc32e verified
{
"trained_at": "2026-04-27T07:37:26.101815+00:00",
"data_run_date": "2026-04-25",
"llm_run_date": "2026-04-26",
"version": "v0.6-tuning-quantile-stacking",
"description": "v0.5 features + Optuna-tuned hyperparams + quantile regression at 0.05/0.5/0.95 + tail-correction stacking",
"parent_version": "v0.5",
"changes_from_parent": [
"Optuna search (60 trials)",
"Quantile regression trio (q=0.05, 0.5, 0.95)",
"Tail-correction stacking layer on q=0.5 residuals"
],
"embedders": [
"mpnet-finetuned (from v0.3)",
"BAAI/bge-base-en-v1.5"
],
"splits": {
"train": {
"rows": 265240,
"start": "2022-01-28",
"end": "2023-09-30"
},
"val": {
"rows": 3545,
"start": "2023-10-01",
"end": "2023-12-31"
},
"test": {
"rows": 2744,
"start": "2024-01-01",
"end": "2024-05-04"
}
},
"feature_count": 212,
"feature_cols": [
"pca_000",
"pca_001",
"pca_002",
"pca_003",
"pca_004",
"pca_005",
"pca_006",
"pca_007",
"pca_008",
"pca_009",
"pca_010",
"pca_011",
"pca_012",
"pca_013",
"pca_014",
"pca_015",
"pca_016",
"pca_017",
"pca_018",
"pca_019",
"pca_020",
"pca_021",
"pca_022",
"pca_023",
"pca_024",
"pca_025",
"pca_026",
"pca_027",
"pca_028",
"pca_029",
"pca_030",
"pca_031",
"pca_032",
"pca_033",
"pca_034",
"pca_035",
"pca_036",
"pca_037",
"pca_038",
"pca_039",
"pca_040",
"pca_041",
"pca_042",
"pca_043",
"pca_044",
"pca_045",
"pca_046",
"pca_047",
"pca_048",
"pca_049",
"pca_050",
"pca_051",
"pca_052",
"pca_053",
"pca_054",
"pca_055",
"pca_056",
"pca_057",
"pca_058",
"pca_059",
"pca_060",
"pca_061",
"pca_062",
"pca_063",
"len",
"n_digits",
"n_letters",
"n_special",
"n_lower",
"n_upper",
"is_palindrome",
"is_all_digits",
"is_all_letters",
"is_ascii",
"has_unicode",
"starts_digit",
"ends_digit",
"max_char_run",
"n_unique_chars",
"in_wikipedia",
"in_geonames",
"in_us_firstname",
"in_iso3166",
"in_ticker",
"in_sec_edgar",
"in_wiktionary_en",
"wordlist_hits",
"club__logistics",
"club__prepunk_full_rankings",
"club__gamertags",
"club__natural_wonders",
"club__social_handles",
"club__mythical_creatures",
"club__top_nouns",
"club__common_english",
"club__firstnames_usa",
"club__luxury",
"club__us_government",
"club__paranormal",
"club__pokemon_gen3",
"club__top500_cities_usa",
"club__gamertags_double",
"club__historic_figures",
"club__catholicism",
"club__performing_arts",
"club__sports",
"club__crayola_classic",
"club__personas",
"club__gen_alpha",
"club__familynames_usa",
"club__crypto_terms",
"club__wikidata_top_fantasy_char",
"club__country_codes",
"club__finance_terms",
"club__fine_art",
"club__home",
"club__common_animals",
"club__conspiracy_theories",
"club__holidays",
"club__top_crypto_tickers",
"club__pokemon_gen1",
"club__pokemon_gen2",
"club__top_crypto_names",
"club__top500_cities_global",
"club__memes",
"club__currency_symbols",
"club__pokemon_gen4",
"club__us_states",
"club__currency_names",
"n_clubs",
"trademark_conflict",
"name_age_days",
"prior_transfer_count",
"fg_value",
"eth_tvl_usd",
"eth_stable_mcap",
"eth_dex_volume",
"nft_total_fee_usd",
"fame_score",
"crypto_relevance_ord",
"brand_collision_risk_ord",
"kind__concept",
"kind__random",
"kind__brand",
"kind__surname",
"kind__first_name",
"kind__abbreviation",
"kind__place",
"kind__other",
"kind__unknown",
"origin__english",
"origin__none",
"origin__mixed",
"origin__spanish",
"origin__german",
"origin__french",
"origin__japanese",
"origin__chinese",
"origin__italian",
"origin__slavic",
"origin__korean",
"origin__arabic",
"origin__other",
"origin__unknown",
"desc_pca_000",
"desc_pca_001",
"desc_pca_002",
"desc_pca_003",
"desc_pca_004",
"desc_pca_005",
"desc_pca_006",
"desc_pca_007",
"desc_pca_008",
"desc_pca_009",
"desc_pca_010",
"desc_pca_011",
"desc_pca_012",
"desc_pca_013",
"desc_pca_014",
"desc_pca_015",
"desc_pca_016",
"desc_pca_017",
"desc_pca_018",
"desc_pca_019",
"desc_pca_020",
"desc_pca_021",
"desc_pca_022",
"desc_pca_023",
"desc_pca_024",
"desc_pca_025",
"desc_pca_026",
"desc_pca_027",
"desc_pca_028",
"desc_pca_029",
"desc_pca_030",
"desc_pca_031",
"knnmp_count",
"knnmp_mean_log",
"knnmp_median_log",
"knnmp_p90_log",
"knnmp_max_sim",
"knnmp_min_sim",
"knnmp_log_max",
"knnmp_log_min",
"knnbg_count",
"knnbg_mean_log",
"knnbg_median_log",
"knnbg_p90_log",
"knnbg_max_sim",
"knnbg_min_sim",
"knnbg_log_max",
"knnbg_log_min"
],
"pca_dim_concat": 64,
"pca_dim_description": 32,
"name_kind_values": [
"concept",
"random",
"brand",
"surname",
"first_name",
"abbreviation",
"place",
"other",
"unknown"
],
"cultural_origin_values": [
"english",
"none",
"mixed",
"spanish",
"german",
"french",
"japanese",
"chinese",
"italian",
"slavic",
"korean",
"arabic",
"other",
"unknown"
],
"best_xgb_params": {
"tree_method": "hist",
"device": "cuda",
"seed": 42,
"max_depth": 12,
"learning_rate": 0.007564423378912868,
"subsample": 0.7048816055233034,
"colsample_bytree": 0.8377589264565862,
"colsample_bylevel": 0.4011055562046537,
"min_child_weight": 7,
"reg_alpha": 2.609660530404674,
"reg_lambda": 1.946648297232411,
"gamma": 2.3669086866916134
},
"optuna": {
"n_trials": 60,
"best_val_rmse": 1.0080678462982178,
"best_params": {
"max_depth": 12,
"learning_rate": 0.007564423378912868,
"subsample": 0.7048816055233034,
"colsample_bytree": 0.8377589264565862,
"colsample_bylevel": 0.4011055562046537,
"min_child_weight": 7,
"reg_alpha": 2.609660530404674,
"reg_lambda": 1.946648297232411,
"gamma": 2.3669086866916134
}
},
"quantile_models": {
"q05": {
"best_iteration": 798,
"best_val_rmse": 1.9434242115149494
},
"q50": {
"best_iteration": 2262,
"best_val_rmse": 1.0197268625497296
},
"q95": {
"best_iteration": 1773,
"best_val_rmse": 2.2088471640402845
}
},
"tail_correction": {
"best_iteration": 795,
"best_val_rmse": 1.0014360745042261,
"cv_folds": 3,
"fold_metrics": [
{
"fold": 0,
"best_iter": 2256
},
{
"fold": 1,
"best_iter": 2261
},
{
"fold": 2,
"best_iter": 2261
}
]
},
"metrics": {
"final": {
"train": {
"r2_log": 0.8784301280975342,
"rmse_log": 0.5630198121070862,
"mae_log": 0.34918516874313354,
"median_ape": 0.1952281892299652,
"bias_log": 0.0008572799852117896
},
"val": {
"r2_log": 0.7011631727218628,
"rmse_log": 1.0014359951019287,
"mae_log": 0.687095046043396,
"median_ape": 0.4087112247943878,
"bias_log": 0.02248295769095421
},
"test": {
"r2_log": 0.45456844568252563,
"rmse_log": 1.3734359741210938,
"mae_log": 1.1084247827529907,
"median_ape": 0.9863521456718445,
"bias_log": 0.5754767060279846
}
},
"base": {
"train": {
"r2_log": 0.8039833307266235,
"rmse_log": 0.7149196863174438,
"mae_log": 0.4110703468322754,
"median_ape": 0.19649738073349,
"bias_log": -0.07636234164237976
},
"val": {
"r2_log": 0.6901469230651855,
"rmse_log": 1.019727349281311,
"mae_log": 0.6950583457946777,
"median_ape": 0.40584105253219604,
"bias_log": -0.040105946362018585
},
"test": {
"r2_log": 0.48208510875701904,
"rmse_log": 1.3383431434631348,
"mae_log": 1.0609599351882935,
"median_ape": 0.9018900394439697,
"bias_log": 0.4112372100353241
}
},
"coverage": {
"train": {
"coverage_90pct": 0.9052216860202081,
"median_interval_log": 2.52069091796875,
"median_interval_ratio": 12.437186771527003
},
"val": {
"coverage_90pct": 0.7870239774330042,
"median_interval_log": 3.475064992904663,
"median_interval_ratio": 32.299927734517325
},
"test": {
"coverage_90pct": 0.642128279883382,
"median_interval_log": 3.9815444946289062,
"median_interval_ratio": 53.5997548637558
}
}
},
"top_features_base": [
{
"name": "knnmp_mean_log",
"gain": 79.17141723632812
},
{
"name": "knnmp_median_log",
"gain": 69.66679382324219
},
{
"name": "len",
"gain": 59.86244201660156
},
{
"name": "knnmp_p90_log",
"gain": 42.315155029296875
},
{
"name": "is_all_digits",
"gain": 37.01343536376953
},
{
"name": "knnmp_log_min",
"gain": 35.15000534057617
},
{
"name": "knnmp_count",
"gain": 33.0050048828125
},
{
"name": "pca_004",
"gain": 29.43238639831543
},
{
"name": "ends_digit",
"gain": 23.06843376159668
},
{
"name": "knnmp_log_max",
"gain": 22.34497833251953
},
{
"name": "pca_002",
"gain": 21.7791690826416
},
{
"name": "origin__chinese",
"gain": 21.658403396606445
},
{
"name": "in_wikipedia",
"gain": 20.88755989074707
},
{
"name": "n_digits",
"gain": 20.82081413269043
},
{
"name": "eth_stable_mcap",
"gain": 20.470684051513672
},
{
"name": "brand_collision_risk_ord",
"gain": 19.85502052307129
},
{
"name": "kind__random",
"gain": 19.53866958618164
},
{
"name": "n_unique_chars",
"gain": 18.933242797851562
},
{
"name": "eth_tvl_usd",
"gain": 18.808229446411133
},
{
"name": "knnbg_count",
"gain": 18.738136291503906
},
{
"name": "name_age_days",
"gain": 18.64664077758789
},
{
"name": "starts_digit",
"gain": 17.717424392700195
},
{
"name": "origin__none",
"gain": 17.592206954956055
},
{
"name": "n_letters",
"gain": 17.253936767578125
},
{
"name": "is_palindrome",
"gain": 16.289718627929688
},
{
"name": "origin__arabic",
"gain": 15.088671684265137
},
{
"name": "kind__abbreviation",
"gain": 15.046517372131348
},
{
"name": "n_special",
"gain": 14.652754783630371
},
{
"name": "n_lower",
"gain": 14.477858543395996
},
{
"name": "desc_pca_000",
"gain": 13.754626274108887
}
],
"top_features_tail": [
{
"name": "in_iso3166",
"gain": 20.713716506958008
},
{
"name": "prior_transfer_count",
"gain": 19.949108123779297
},
{
"name": "n_lower",
"gain": 19.065231323242188
},
{
"name": "brand_collision_risk_ord",
"gain": 19.003070831298828
},
{
"name": "n_letters",
"gain": 16.631563186645508
},
{
"name": "in_wiktionary_en",
"gain": 15.533679962158203
},
{
"name": "club__crypto_terms",
"gain": 15.453104972839355
},
{
"name": "fame_score",
"gain": 15.42578125
},
{
"name": "has_unicode",
"gain": 14.748347282409668
},
{
"name": "n_clubs",
"gain": 14.500151634216309
},
{
"name": "club__gamertags_double",
"gain": 14.302302360534668
},
{
"name": "is_all_letters",
"gain": 14.27059555053711
},
{
"name": "origin__english",
"gain": 14.242754936218262
},
{
"name": "crypto_relevance_ord",
"gain": 14.161867141723633
},
{
"name": "trademark_conflict",
"gain": 13.951108932495117
},
{
"name": "club__currency_symbols",
"gain": 13.67905330657959
},
{
"name": "n_upper",
"gain": 13.592854499816895
},
{
"name": "name_age_days",
"gain": 13.328607559204102
},
{
"name": "ends_digit",
"gain": 13.104514122009277
},
{
"name": "wordlist_hits",
"gain": 12.854232788085938
},
{
"name": "starts_digit",
"gain": 12.26870059967041
},
{
"name": "is_ascii",
"gain": 12.160033226013184
},
{
"name": "kind__random",
"gain": 11.966256141662598
},
{
"name": "club__prepunk_full_rankings",
"gain": 11.913127899169922
},
{
"name": "max_char_run",
"gain": 11.784753799438477
},
{
"name": "n_unique_chars",
"gain": 11.777941703796387
},
{
"name": "eth_stable_mcap",
"gain": 11.768474578857422
},
{
"name": "origin__italian",
"gain": 11.581886291503906
},
{
"name": "in_geonames",
"gain": 11.519492149353027
},
{
"name": "n_special",
"gain": 11.42654800415039
}
],
"inference_recipe": {
"description": "Inference combines 4 models",
"point_estimate": "final_log = q50_model(features) + tail_model(features)",
"lower_bound_90pct": "low_log = q05_model(features)",
"upper_bound_90pct": "high_log = q95_model(features)",
"output_usd": "np.exp(final_log)"
},
"wandb_run": "https://wandb.ai/quantumly-aletheia-research/ens-appraiser/runs/i35wokpu"
}