ens-appraiser / v0_8_metadata.json
quantumly's picture
v0.8 fixed wash + ensemble: 2026-04-27
0e895c4 verified
{
"trained_at": "2026-04-27T12:00:40.349432+00:00",
"data_run_date": "2026-04-25",
"llm_run_date": "2026-04-26",
"version": "v0.8-fixed-wash-ensemble",
"description": "v0.7 features + tightened wash detection + dual-tree ensemble + bias correction",
"parent_version": "v0.7",
"changes_from_parent": [
"wash detection: row-level, \u226424h + \u22651.5x price gain",
"wash detection: dropped wallet concentration heuristic",
"dual-tree ensemble: tuned XGBoost + tuned LightGBM, log-space mean",
"val-derived bias correction (val_bias = -0.0781)",
"4x Optuna budget (120 trials per model)"
],
"splits": {
"train": {
"rows": 265240,
"start": "2022-01-28",
"end": "2023-09-30"
},
"val": {
"rows": 3545,
"start": "2023-10-01",
"end": "2023-12-31"
},
"test": {
"rows": 2744,
"start": "2024-01-01",
"end": "2024-05-04"
}
},
"feature_count": 212,
"feature_cols": [
"pca_000",
"pca_001",
"pca_002",
"pca_003",
"pca_004",
"pca_005",
"pca_006",
"pca_007",
"pca_008",
"pca_009",
"pca_010",
"pca_011",
"pca_012",
"pca_013",
"pca_014",
"pca_015",
"pca_016",
"pca_017",
"pca_018",
"pca_019",
"pca_020",
"pca_021",
"pca_022",
"pca_023",
"pca_024",
"pca_025",
"pca_026",
"pca_027",
"pca_028",
"pca_029",
"pca_030",
"pca_031",
"pca_032",
"pca_033",
"pca_034",
"pca_035",
"pca_036",
"pca_037",
"pca_038",
"pca_039",
"pca_040",
"pca_041",
"pca_042",
"pca_043",
"pca_044",
"pca_045",
"pca_046",
"pca_047",
"pca_048",
"pca_049",
"pca_050",
"pca_051",
"pca_052",
"pca_053",
"pca_054",
"pca_055",
"pca_056",
"pca_057",
"pca_058",
"pca_059",
"pca_060",
"pca_061",
"pca_062",
"pca_063",
"len",
"n_digits",
"n_letters",
"n_special",
"n_lower",
"n_upper",
"is_palindrome",
"is_all_digits",
"is_all_letters",
"is_ascii",
"has_unicode",
"starts_digit",
"ends_digit",
"max_char_run",
"n_unique_chars",
"in_wikipedia",
"in_geonames",
"in_us_firstname",
"in_iso3166",
"in_ticker",
"in_sec_edgar",
"in_wiktionary_en",
"wordlist_hits",
"club__prepunk_full_rankings",
"club__firstnames_usa",
"club__finance_terms",
"club__social_handles",
"club__gamertags",
"club__top_crypto_tickers",
"club__common_english",
"club__sports",
"club__crypto_terms",
"club__gamertags_double",
"club__pokemon_gen4",
"club__fine_art",
"club__logistics",
"club__top500_cities_usa",
"club__natural_wonders",
"club__familynames_usa",
"club__top_nouns",
"club__wikidata_top_fantasy_char",
"club__holidays",
"club__us_government",
"club__top500_cities_global",
"club__pokemon_gen3",
"club__common_animals",
"club__country_codes",
"club__personas",
"club__currency_symbols",
"club__mythical_creatures",
"club__pokemon_gen1",
"club__catholicism",
"club__historic_figures",
"club__luxury",
"club__paranormal",
"club__crayola_classic",
"club__top_crypto_names",
"club__performing_arts",
"club__pokemon_gen2",
"club__home",
"club__conspiracy_theories",
"club__memes",
"club__us_states",
"club__currency_names",
"club__gen_alpha",
"n_clubs",
"trademark_conflict",
"name_age_days",
"prior_transfer_count",
"fg_value",
"eth_tvl_usd",
"eth_stable_mcap",
"eth_dex_volume",
"nft_total_fee_usd",
"fame_score",
"crypto_relevance_ord",
"brand_collision_risk_ord",
"kind__concept",
"kind__random",
"kind__brand",
"kind__surname",
"kind__first_name",
"kind__abbreviation",
"kind__place",
"kind__other",
"kind__unknown",
"origin__english",
"origin__none",
"origin__mixed",
"origin__spanish",
"origin__german",
"origin__french",
"origin__japanese",
"origin__chinese",
"origin__italian",
"origin__slavic",
"origin__korean",
"origin__arabic",
"origin__other",
"origin__unknown",
"desc_pca_000",
"desc_pca_001",
"desc_pca_002",
"desc_pca_003",
"desc_pca_004",
"desc_pca_005",
"desc_pca_006",
"desc_pca_007",
"desc_pca_008",
"desc_pca_009",
"desc_pca_010",
"desc_pca_011",
"desc_pca_012",
"desc_pca_013",
"desc_pca_014",
"desc_pca_015",
"desc_pca_016",
"desc_pca_017",
"desc_pca_018",
"desc_pca_019",
"desc_pca_020",
"desc_pca_021",
"desc_pca_022",
"desc_pca_023",
"desc_pca_024",
"desc_pca_025",
"desc_pca_026",
"desc_pca_027",
"desc_pca_028",
"desc_pca_029",
"desc_pca_030",
"desc_pca_031",
"knnmp_count",
"knnmp_mean_log",
"knnmp_median_log",
"knnmp_p90_log",
"knnmp_max_sim",
"knnmp_min_sim",
"knnmp_log_max",
"knnmp_log_min",
"knnbg_count",
"knnbg_mean_log",
"knnbg_median_log",
"knnbg_p90_log",
"knnbg_max_sim",
"knnbg_min_sim",
"knnbg_log_max",
"knnbg_log_min"
],
"pca_dim_concat": 64,
"pca_dim_description": 32,
"name_kind_values": [
"concept",
"random",
"brand",
"surname",
"first_name",
"abbreviation",
"place",
"other",
"unknown"
],
"cultural_origin_values": [
"english",
"none",
"mixed",
"spanish",
"german",
"french",
"japanese",
"chinese",
"italian",
"slavic",
"korean",
"arabic",
"other",
"unknown"
],
"wash_filtering": {
"method": "row_level",
"round_trip_max_hours": 24,
"round_trip_min_price_gain": 1.5,
"wash_train_weight": 0.1,
"n_round_trip_rows": 8236,
"n_sub_minute_rows": 2612,
"n_total_flagged_rows": 10733,
"flag_rate_pct": 3.9528006216647213
},
"optuna": {
"n_trials_per_model": 120,
"xgb_best_val_rmse": 1.0110926628112793,
"xgb_best_params": {
"max_depth": 12,
"learning_rate": 0.00707952961758631,
"subsample": 0.82649559352978,
"colsample_bytree": 0.5076045191428247,
"colsample_bylevel": 0.4490941323703761,
"min_child_weight": 25,
"reg_alpha": 2.1100924086636215,
"reg_lambda": 6.7085987791616875,
"gamma": 3.012785755618491
},
"xgb_best_trial": 78,
"lgb_best_val_rmse": 1.0083379004456063,
"lgb_best_params": {
"boosting_type": "gbdt",
"num_leaves": 294,
"max_depth": 13,
"learning_rate": 0.007555121763111837,
"feature_fraction": 0.714483668555124,
"feature_fraction_bynode": 0.5304393257914843,
"min_data_in_leaf": 6,
"lambda_l1": 0.002471118357836334,
"lambda_l2": 1.776020947969551,
"min_gain_to_split": 2.2135701585440897,
"bagging_fraction": 0.9999413018023948,
"bagging_freq": 7
},
"lgb_best_trial": 16
},
"xgb_quantile_models": {
"q05": {
"best_iteration": 984,
"best_val_rmse": 1.9671143709945549
},
"q50": {
"best_iteration": 1756,
"best_val_rmse": 1.0471049311283205
},
"q95": {
"best_iteration": 3964,
"best_val_rmse": 2.1785484376377413
}
},
"lgb_quantile_models": {
"q05": {
"best_iteration": 500
},
"q50": {
"best_iteration": 1465
},
"q95": {
"best_iteration": 1123
}
},
"ensemble": {
"method": "log_space_mean",
"members": [
"xgb_q50",
"lgb_q50"
]
},
"bias_correction": {
"method": "val_derived_constant",
"val_bias": -0.0781494239191985
},
"calibration": {
"method": "additive",
"target_coverage": 0.9,
"delta_lower": 0.14621443811853208,
"delta_upper": 0.0,
"val_bias_correction": -0.0781494239191985,
"ensemble_method": "log_space_mean_xgb_lgb"
},
"metrics": {
"final": {
"train": {
"r2_log": 0.7860012517591471,
"rmse_log": 0.7469926912886737,
"mae_log": 0.45205338808523504,
"median_ape": 0.24072927576014078,
"bias_log": -0.0053798850758574246
},
"val": {
"r2_log": 0.6758550101826879,
"rmse_log": 1.0429795602982888,
"mae_log": 0.706429696381903,
"median_ape": 0.43773059069184295,
"bias_log": -1.8039166775291682e-16
},
"test": {
"r2_log": 0.47530460716475276,
"rmse_log": 1.3470754265844747,
"mae_log": 1.0706065416171697,
"median_ape": 0.9519209571310361,
"bias_log": 0.444023565817704
}
},
"coverage": {
"train": {
"coverage_90pct": 0.937335997587091,
"median_interval_log": 2.823927108304763,
"median_interval_ratio": 16.84286471905812
},
"val": {
"coverage_90pct": 0.8493653032440056,
"median_interval_log": 3.6384351371935924,
"median_interval_ratio": 38.03227484212217
},
"test": {
"coverage_90pct": 0.8126822157434402,
"median_interval_log": 4.143242668045713,
"median_interval_ratio": 63.00680069124568
}
}
},
"top_features_xgb": [
{
"name": "knnmp_mean_log",
"gain": 108.798828125
},
{
"name": "knnmp_median_log",
"gain": 78.58806610107422
},
{
"name": "knnmp_log_min",
"gain": 72.71774291992188
},
{
"name": "len",
"gain": 61.729530334472656
},
{
"name": "knnmp_p90_log",
"gain": 58.61769104003906
},
{
"name": "pca_004",
"gain": 42.28458786010742
},
{
"name": "knnmp_log_max",
"gain": 39.73566818237305
},
{
"name": "is_all_digits",
"gain": 38.88374710083008
},
{
"name": "knnmp_count",
"gain": 36.54442596435547
},
{
"name": "pca_002",
"gain": 36.348838806152344
},
{
"name": "kind__random",
"gain": 34.42299270629883
},
{
"name": "in_wikipedia",
"gain": 28.338823318481445
},
{
"name": "eth_stable_mcap",
"gain": 28.019874572753906
},
{
"name": "origin__none",
"gain": 27.93862533569336
},
{
"name": "n_digits",
"gain": 26.823389053344727
},
{
"name": "ends_digit",
"gain": 25.347196578979492
},
{
"name": "n_unique_chars",
"gain": 25.27269172668457
},
{
"name": "name_age_days",
"gain": 25.112060546875
},
{
"name": "eth_tvl_usd",
"gain": 24.409042358398438
},
{
"name": "starts_digit",
"gain": 23.97994613647461
},
{
"name": "knnbg_count",
"gain": 23.276514053344727
},
{
"name": "is_palindrome",
"gain": 22.497846603393555
},
{
"name": "brand_collision_risk_ord",
"gain": 21.805770874023438
},
{
"name": "origin__chinese",
"gain": 21.738651275634766
},
{
"name": "n_letters",
"gain": 20.87726402282715
},
{
"name": "kind__abbreviation",
"gain": 20.590986251831055
},
{
"name": "pca_001",
"gain": 20.483781814575195
},
{
"name": "has_unicode",
"gain": 19.911903381347656
},
{
"name": "nft_total_fee_usd",
"gain": 19.79189682006836
},
{
"name": "desc_pca_000",
"gain": 19.757749557495117
}
],
"top_features_lgb": [
{
"name": "knnmp_mean_log",
"gain": 781449.8409805298
},
{
"name": "eth_stable_mcap",
"gain": 780319.1262354851
},
{
"name": "name_age_days",
"gain": 621649.1777780056
},
{
"name": "eth_tvl_usd",
"gain": 549847.3152387142
},
{
"name": "len",
"gain": 535403.315526247
},
{
"name": "knnmp_median_log",
"gain": 476187.2463479042
},
{
"name": "nft_total_fee_usd",
"gain": 376482.9395339489
},
{
"name": "knnmp_p90_log",
"gain": 299560.9693763256
},
{
"name": "fg_value",
"gain": 291222.41970157623
},
{
"name": "n_digits",
"gain": 200339.19226312637
},
{
"name": "knnmp_count",
"gain": 198496.79864764214
},
{
"name": "knnmp_log_min",
"gain": 196220.42209792137
},
{
"name": "eth_dex_volume",
"gain": 183750.19882249832
},
{
"name": "n_unique_chars",
"gain": 178599.7514090538
},
{
"name": "knnmp_log_max",
"gain": 165553.80459403992
},
{
"name": "pca_004",
"gain": 119861.93015813828
},
{
"name": "desc_pca_000",
"gain": 114556.08047294617
},
{
"name": "pca_002",
"gain": 104693.19005703926
},
{
"name": "n_letters",
"gain": 83801.49758315086
},
{
"name": "pca_001",
"gain": 82503.51477956772
},
{
"name": "n_special",
"gain": 70174.17102837563
},
{
"name": "is_palindrome",
"gain": 64647.565772771835
},
{
"name": "is_all_digits",
"gain": 56822.11631703377
},
{
"name": "desc_pca_002",
"gain": 47493.986828804016
},
{
"name": "n_lower",
"gain": 44417.38100814819
},
{
"name": "max_char_run",
"gain": 40815.91745662689
},
{
"name": "knnbg_count",
"gain": 39852.69767045975
},
{
"name": "fame_score",
"gain": 36314.24634027481
},
{
"name": "pca_005",
"gain": 34642.322847127914
},
{
"name": "knnmp_min_sim",
"gain": 34452.339801073074
}
],
"family_gain_split_xgb": {
"mpnet_ft_knn": 416.6655960083008,
"bge_knn": 94.33125257492065,
"llm_kind": 89.02087259292603,
"llm_origin": 129.38252782821655,
"llm_scores": 46.957244873046875,
"llm_desc": 279.1159596443176,
"llm_total": 544.4766049385071
},
"inference_recipe": {
"description": "Inference uses 6 models (XGB\u00d73 + LGB\u00d73) + bias + calibration constants",
"point_estimate": "(xgb_q50.predict() + lgb_q50.predict()) / 2 - val_bias",
"uncalibrated_low": "(xgb_q05.predict() + lgb_q05.predict()) / 2 - val_bias",
"uncalibrated_high": "(xgb_q95.predict() + lgb_q95.predict()) / 2 - val_bias",
"calibrated_low": "uncalibrated_low - delta_lower",
"calibrated_high": "uncalibrated_high + delta_upper",
"output_usd": "np.exp(final)"
},
"wandb_run": "https://wandb.ai/quantumly-aletheia-research/ens-appraiser/runs/d3y6y5c6"
}