ens-appraiser / v0_7_metadata.json
quantumly's picture
v0.7 wash filtering + recalibration: 2026-04-27
985bd86 verified
{
"trained_at": "2026-04-27T08:32:28.319255+00:00",
"data_run_date": "2026-04-25",
"llm_run_date": "2026-04-26",
"version": "v0.7-wash-recalibration",
"description": "v0.6 features + wash-trade detection/down-weighting + dropped tail correction + conformal quantile recalibration",
"parent_version": "v0.6",
"changes_from_parent": [
"Wash-trade detection (round-trip \u22647d, sub-minute resale, wallet concentration >70%)",
"Down-weight wash rows in training (weight=0.1)",
"DROPPED tail-correction stacking (overfit on v0.6, test \u0394 R\u00b2 = -0.027)",
"Conformal quantile recalibration via val set"
],
"embedders": [
"mpnet-finetuned (from v0.3)",
"BAAI/bge-base-en-v1.5"
],
"splits": {
"train": {
"rows": 265240,
"start": "2022-01-28",
"end": "2023-09-30"
},
"val": {
"rows": 3545,
"start": "2023-10-01",
"end": "2023-12-31"
},
"test": {
"rows": 2744,
"start": "2024-01-01",
"end": "2024-05-04"
}
},
"feature_count": 212,
"feature_cols": [
"pca_000",
"pca_001",
"pca_002",
"pca_003",
"pca_004",
"pca_005",
"pca_006",
"pca_007",
"pca_008",
"pca_009",
"pca_010",
"pca_011",
"pca_012",
"pca_013",
"pca_014",
"pca_015",
"pca_016",
"pca_017",
"pca_018",
"pca_019",
"pca_020",
"pca_021",
"pca_022",
"pca_023",
"pca_024",
"pca_025",
"pca_026",
"pca_027",
"pca_028",
"pca_029",
"pca_030",
"pca_031",
"pca_032",
"pca_033",
"pca_034",
"pca_035",
"pca_036",
"pca_037",
"pca_038",
"pca_039",
"pca_040",
"pca_041",
"pca_042",
"pca_043",
"pca_044",
"pca_045",
"pca_046",
"pca_047",
"pca_048",
"pca_049",
"pca_050",
"pca_051",
"pca_052",
"pca_053",
"pca_054",
"pca_055",
"pca_056",
"pca_057",
"pca_058",
"pca_059",
"pca_060",
"pca_061",
"pca_062",
"pca_063",
"len",
"n_digits",
"n_letters",
"n_special",
"n_lower",
"n_upper",
"is_palindrome",
"is_all_digits",
"is_all_letters",
"is_ascii",
"has_unicode",
"starts_digit",
"ends_digit",
"max_char_run",
"n_unique_chars",
"in_wikipedia",
"in_geonames",
"in_us_firstname",
"in_iso3166",
"in_ticker",
"in_sec_edgar",
"in_wiktionary_en",
"wordlist_hits",
"club__prepunk_full_rankings",
"club__historic_figures",
"club__gamertags",
"club__firstnames_usa",
"club__top500_cities_global",
"club__familynames_usa",
"club__social_handles",
"club__country_codes",
"club__sports",
"club__crypto_terms",
"club__common_english",
"club__top500_cities_usa",
"club__top_nouns",
"club__home",
"club__conspiracy_theories",
"club__us_government",
"club__performing_arts",
"club__gamertags_double",
"club__mythical_creatures",
"club__finance_terms",
"club__catholicism",
"club__natural_wonders",
"club__fine_art",
"club__personas",
"club__luxury",
"club__pokemon_gen1",
"club__memes",
"club__top_crypto_tickers",
"club__currency_symbols",
"club__common_animals",
"club__logistics",
"club__currency_names",
"club__wikidata_top_fantasy_char",
"club__top_crypto_names",
"club__gen_alpha",
"club__pokemon_gen3",
"club__holidays",
"club__pokemon_gen2",
"club__paranormal",
"club__crayola_classic",
"club__pokemon_gen4",
"club__us_states",
"n_clubs",
"trademark_conflict",
"name_age_days",
"prior_transfer_count",
"fg_value",
"eth_tvl_usd",
"eth_stable_mcap",
"eth_dex_volume",
"nft_total_fee_usd",
"fame_score",
"crypto_relevance_ord",
"brand_collision_risk_ord",
"kind__concept",
"kind__random",
"kind__brand",
"kind__surname",
"kind__first_name",
"kind__abbreviation",
"kind__place",
"kind__other",
"kind__unknown",
"origin__english",
"origin__none",
"origin__mixed",
"origin__spanish",
"origin__german",
"origin__french",
"origin__japanese",
"origin__chinese",
"origin__italian",
"origin__slavic",
"origin__korean",
"origin__arabic",
"origin__other",
"origin__unknown",
"desc_pca_000",
"desc_pca_001",
"desc_pca_002",
"desc_pca_003",
"desc_pca_004",
"desc_pca_005",
"desc_pca_006",
"desc_pca_007",
"desc_pca_008",
"desc_pca_009",
"desc_pca_010",
"desc_pca_011",
"desc_pca_012",
"desc_pca_013",
"desc_pca_014",
"desc_pca_015",
"desc_pca_016",
"desc_pca_017",
"desc_pca_018",
"desc_pca_019",
"desc_pca_020",
"desc_pca_021",
"desc_pca_022",
"desc_pca_023",
"desc_pca_024",
"desc_pca_025",
"desc_pca_026",
"desc_pca_027",
"desc_pca_028",
"desc_pca_029",
"desc_pca_030",
"desc_pca_031",
"knnmp_count",
"knnmp_mean_log",
"knnmp_median_log",
"knnmp_p90_log",
"knnmp_max_sim",
"knnmp_min_sim",
"knnmp_log_max",
"knnmp_log_min",
"knnbg_count",
"knnbg_mean_log",
"knnbg_median_log",
"knnbg_p90_log",
"knnbg_max_sim",
"knnbg_min_sim",
"knnbg_log_max",
"knnbg_log_min"
],
"pca_dim_concat": 64,
"pca_dim_description": 32,
"name_kind_values": [
"concept",
"random",
"brand",
"surname",
"first_name",
"abbreviation",
"place",
"other",
"unknown"
],
"cultural_origin_values": [
"english",
"none",
"mixed",
"spanish",
"german",
"french",
"japanese",
"chinese",
"italian",
"slavic",
"korean",
"arabic",
"other",
"unknown"
],
"wash_filtering": {
"n_round_trip_labels": 23049,
"n_sub_minute_labels": 1222,
"n_high_conc_labels": 26255,
"n_total_flagged_labels": 39995,
"wash_train_weight": 0.1,
"train_wash_pct": 41.393832001206455
},
"best_xgb_params": {
"tree_method": "hist",
"device": "cuda",
"seed": 42,
"max_depth": 11,
"learning_rate": 0.00607909965562535,
"subsample": 0.5242971482550959,
"colsample_bytree": 0.5178101703841028,
"colsample_bylevel": 0.5676418253037602,
"min_child_weight": 3,
"reg_alpha": 4.318424044857591,
"reg_lambda": 1.7796015928106095,
"gamma": 2.982764564516846
},
"optuna": {
"n_trials": 30,
"best_val_rmse": 1.016706943511963,
"best_params": {
"max_depth": 11,
"learning_rate": 0.00607909965562535,
"subsample": 0.5242971482550959,
"colsample_bytree": 0.5178101703841028,
"colsample_bylevel": 0.5676418253037602,
"min_child_weight": 3,
"reg_alpha": 4.318424044857591,
"reg_lambda": 1.7796015928106095,
"gamma": 2.982764564516846
},
"best_trial": 28,
"warm_started_with_v0_6_best": true
},
"quantile_models": {
"q05": {
"best_iteration": 1527,
"best_val_rmse": 1.9560544421029673
},
"q50": {
"best_iteration": 1834,
"best_val_rmse": 1.045858812580417
},
"q95": {
"best_iteration": 3355,
"best_val_rmse": 2.1802895661452273
}
},
"calibration": {
"method": "additive",
"target_coverage": 0.9,
"delta_lower_additive": 0.08095530420541763,
"delta_upper_additive": 0.0,
"multiplicative_factor": 1.239099800588292,
"val_coverage_after": 0.8595204513399154
},
"metrics": {
"final": {
"train": {
"r2_log": 0.7497193813323975,
"rmse_log": 0.8078381419181824,
"mae_log": 0.47278380393981934,
"median_ape": 0.23963597416877747,
"bias_log": -0.1186392605304718
},
"val": {
"r2_log": 0.6740628480911255,
"rmse_log": 1.0458588600158691,
"mae_log": 0.7022704482078552,
"median_ape": 0.413220077753067,
"bias_log": -0.09871374070644379
},
"test": {
"r2_log": 0.4626653790473938,
"rmse_log": 1.363203525543213,
"mae_log": 1.0832252502441406,
"median_ape": 0.9336060285568237,
"bias_log": 0.4047386348247528
}
},
"coverage": {
"train": {
"coverage_90pct": 0.920309908007842,
"median_interval_log": 2.7887799739837646,
"median_interval_ratio": 16.261168645984977
},
"val": {
"coverage_90pct": 0.8595204513399154,
"median_interval_log": 3.658205986022949,
"median_interval_ratio": 38.79168757902037
},
"test": {
"coverage_90pct": 0.8083090379008746,
"median_interval_log": 4.073790550231934,
"median_interval_ratio": 58.77934691322367
}
}
},
"top_features": [
{
"name": "len",
"gain": 58.87883758544922
},
{
"name": "knnmp_mean_log",
"gain": 46.72057342529297
},
{
"name": "knnmp_median_log",
"gain": 41.08598327636719
},
{
"name": "is_all_digits",
"gain": 38.29785919189453
},
{
"name": "knnmp_count",
"gain": 34.5153923034668
},
{
"name": "knnmp_p90_log",
"gain": 31.491409301757812
},
{
"name": "in_wikipedia",
"gain": 30.60625457763672
},
{
"name": "knnmp_log_max",
"gain": 24.936614990234375
},
{
"name": "is_ascii",
"gain": 24.386781692504883
},
{
"name": "ends_digit",
"gain": 24.157560348510742
},
{
"name": "n_digits",
"gain": 24.11026382446289
},
{
"name": "has_unicode",
"gain": 23.687318801879883
},
{
"name": "name_age_days",
"gain": 22.8796443939209
},
{
"name": "origin__none",
"gain": 22.28803062438965
},
{
"name": "eth_stable_mcap",
"gain": 22.02800941467285
},
{
"name": "pca_002",
"gain": 21.55613136291504
},
{
"name": "knnbg_count",
"gain": 20.121904373168945
},
{
"name": "knnmp_log_min",
"gain": 19.98278045654297
},
{
"name": "n_unique_chars",
"gain": 19.560546875
},
{
"name": "kind__random",
"gain": 19.375856399536133
},
{
"name": "starts_digit",
"gain": 18.9150390625
},
{
"name": "origin__chinese",
"gain": 18.488323211669922
},
{
"name": "eth_tvl_usd",
"gain": 18.1763858795166
},
{
"name": "n_lower",
"gain": 17.70460319519043
},
{
"name": "n_letters",
"gain": 17.535446166992188
},
{
"name": "pca_004",
"gain": 16.79183578491211
},
{
"name": "kind__abbreviation",
"gain": 16.659326553344727
},
{
"name": "desc_pca_000",
"gain": 16.628366470336914
},
{
"name": "is_palindrome",
"gain": 16.464502334594727
},
{
"name": "fg_value",
"gain": 15.839948654174805
}
],
"family_gain_split": {
"mpnet_ft_knn": 217.51888847351074,
"bge_knn": 67.69801044464111,
"llm_kind": 68.03838443756104,
"llm_origin": 118.48860311508179,
"llm_scores": 37.4162712097168,
"llm_desc": 208.6804609298706,
"llm_total": 432.6237196922302
},
"inference_recipe": {
"description": "Inference uses 3 models + calibration constants",
"point_estimate": "final_log = q50_model(features)",
"uncalibrated_low": "low_log_raw = q05_model(features)",
"uncalibrated_high": "high_log_raw = q95_model(features)",
"calibrated_low_additive": "low_log = low_log_raw - delta_lower_additive",
"calibrated_high_additive": "high_log = high_log_raw + delta_upper_additive",
"calibrated_low_multiplicative": "low_log = q50 - multiplicative_factor * (q50 - low_log_raw)",
"calibrated_high_multiplicative": "high_log = q50 + multiplicative_factor * (high_log_raw - q50)",
"use_method": "additive",
"output_usd": "np.exp(final_log)"
},
"wandb_run": "https://wandb.ai/quantumly-aletheia-research/ens-appraiser/runs/xd2jhbwk"
}