{ "trained_at": "2026-04-27T08:32:28.319255+00:00", "data_run_date": "2026-04-25", "llm_run_date": "2026-04-26", "version": "v0.7-wash-recalibration", "description": "v0.6 features + wash-trade detection/down-weighting + dropped tail correction + conformal quantile recalibration", "parent_version": "v0.6", "changes_from_parent": [ "Wash-trade detection (round-trip \u22647d, sub-minute resale, wallet concentration >70%)", "Down-weight wash rows in training (weight=0.1)", "DROPPED tail-correction stacking (overfit on v0.6, test \u0394 R\u00b2 = -0.027)", "Conformal quantile recalibration via val set" ], "embedders": [ "mpnet-finetuned (from v0.3)", "BAAI/bge-base-en-v1.5" ], "splits": { "train": { "rows": 265240, "start": "2022-01-28", "end": "2023-09-30" }, "val": { "rows": 3545, "start": "2023-10-01", "end": "2023-12-31" }, "test": { "rows": 2744, "start": "2024-01-01", "end": "2024-05-04" } }, "feature_count": 212, "feature_cols": [ "pca_000", "pca_001", "pca_002", "pca_003", "pca_004", "pca_005", "pca_006", "pca_007", "pca_008", "pca_009", "pca_010", "pca_011", "pca_012", "pca_013", "pca_014", "pca_015", "pca_016", "pca_017", "pca_018", "pca_019", "pca_020", "pca_021", "pca_022", "pca_023", "pca_024", "pca_025", "pca_026", "pca_027", "pca_028", "pca_029", "pca_030", "pca_031", "pca_032", "pca_033", "pca_034", "pca_035", "pca_036", "pca_037", "pca_038", "pca_039", "pca_040", "pca_041", "pca_042", "pca_043", "pca_044", "pca_045", "pca_046", "pca_047", "pca_048", "pca_049", "pca_050", "pca_051", "pca_052", "pca_053", "pca_054", "pca_055", "pca_056", "pca_057", "pca_058", "pca_059", "pca_060", "pca_061", "pca_062", "pca_063", "len", "n_digits", "n_letters", "n_special", "n_lower", "n_upper", "is_palindrome", "is_all_digits", "is_all_letters", "is_ascii", "has_unicode", "starts_digit", "ends_digit", "max_char_run", "n_unique_chars", "in_wikipedia", "in_geonames", "in_us_firstname", "in_iso3166", "in_ticker", "in_sec_edgar", "in_wiktionary_en", "wordlist_hits", "club__prepunk_full_rankings", "club__historic_figures", "club__gamertags", "club__firstnames_usa", "club__top500_cities_global", "club__familynames_usa", "club__social_handles", "club__country_codes", "club__sports", "club__crypto_terms", "club__common_english", "club__top500_cities_usa", "club__top_nouns", "club__home", "club__conspiracy_theories", "club__us_government", "club__performing_arts", "club__gamertags_double", "club__mythical_creatures", "club__finance_terms", "club__catholicism", "club__natural_wonders", "club__fine_art", "club__personas", "club__luxury", "club__pokemon_gen1", "club__memes", "club__top_crypto_tickers", "club__currency_symbols", "club__common_animals", "club__logistics", "club__currency_names", "club__wikidata_top_fantasy_char", "club__top_crypto_names", "club__gen_alpha", "club__pokemon_gen3", "club__holidays", "club__pokemon_gen2", "club__paranormal", "club__crayola_classic", "club__pokemon_gen4", "club__us_states", "n_clubs", "trademark_conflict", "name_age_days", "prior_transfer_count", "fg_value", "eth_tvl_usd", "eth_stable_mcap", "eth_dex_volume", "nft_total_fee_usd", "fame_score", "crypto_relevance_ord", "brand_collision_risk_ord", "kind__concept", "kind__random", "kind__brand", "kind__surname", "kind__first_name", "kind__abbreviation", "kind__place", "kind__other", "kind__unknown", "origin__english", "origin__none", "origin__mixed", "origin__spanish", "origin__german", "origin__french", "origin__japanese", "origin__chinese", "origin__italian", "origin__slavic", "origin__korean", "origin__arabic", "origin__other", "origin__unknown", "desc_pca_000", "desc_pca_001", "desc_pca_002", "desc_pca_003", "desc_pca_004", "desc_pca_005", "desc_pca_006", "desc_pca_007", "desc_pca_008", "desc_pca_009", "desc_pca_010", "desc_pca_011", "desc_pca_012", "desc_pca_013", "desc_pca_014", "desc_pca_015", "desc_pca_016", "desc_pca_017", "desc_pca_018", "desc_pca_019", "desc_pca_020", "desc_pca_021", "desc_pca_022", "desc_pca_023", "desc_pca_024", "desc_pca_025", "desc_pca_026", "desc_pca_027", "desc_pca_028", "desc_pca_029", "desc_pca_030", "desc_pca_031", "knnmp_count", "knnmp_mean_log", "knnmp_median_log", "knnmp_p90_log", "knnmp_max_sim", "knnmp_min_sim", "knnmp_log_max", "knnmp_log_min", "knnbg_count", "knnbg_mean_log", "knnbg_median_log", "knnbg_p90_log", "knnbg_max_sim", "knnbg_min_sim", "knnbg_log_max", "knnbg_log_min" ], "pca_dim_concat": 64, "pca_dim_description": 32, "name_kind_values": [ "concept", "random", "brand", "surname", "first_name", "abbreviation", "place", "other", "unknown" ], "cultural_origin_values": [ "english", "none", "mixed", "spanish", "german", "french", "japanese", "chinese", "italian", "slavic", "korean", "arabic", "other", "unknown" ], "wash_filtering": { "n_round_trip_labels": 23049, "n_sub_minute_labels": 1222, "n_high_conc_labels": 26255, "n_total_flagged_labels": 39995, "wash_train_weight": 0.1, "train_wash_pct": 41.393832001206455 }, "best_xgb_params": { "tree_method": "hist", "device": "cuda", "seed": 42, "max_depth": 11, "learning_rate": 0.00607909965562535, "subsample": 0.5242971482550959, "colsample_bytree": 0.5178101703841028, "colsample_bylevel": 0.5676418253037602, "min_child_weight": 3, "reg_alpha": 4.318424044857591, "reg_lambda": 1.7796015928106095, "gamma": 2.982764564516846 }, "optuna": { "n_trials": 30, "best_val_rmse": 1.016706943511963, "best_params": { "max_depth": 11, "learning_rate": 0.00607909965562535, "subsample": 0.5242971482550959, "colsample_bytree": 0.5178101703841028, "colsample_bylevel": 0.5676418253037602, "min_child_weight": 3, "reg_alpha": 4.318424044857591, "reg_lambda": 1.7796015928106095, "gamma": 2.982764564516846 }, "best_trial": 28, "warm_started_with_v0_6_best": true }, "quantile_models": { "q05": { "best_iteration": 1527, "best_val_rmse": 1.9560544421029673 }, "q50": { "best_iteration": 1834, "best_val_rmse": 1.045858812580417 }, "q95": { "best_iteration": 3355, "best_val_rmse": 2.1802895661452273 } }, "calibration": { "method": "additive", "target_coverage": 0.9, "delta_lower_additive": 0.08095530420541763, "delta_upper_additive": 0.0, "multiplicative_factor": 1.239099800588292, "val_coverage_after": 0.8595204513399154 }, "metrics": { "final": { "train": { "r2_log": 0.7497193813323975, "rmse_log": 0.8078381419181824, "mae_log": 0.47278380393981934, "median_ape": 0.23963597416877747, "bias_log": -0.1186392605304718 }, "val": { "r2_log": 0.6740628480911255, "rmse_log": 1.0458588600158691, "mae_log": 0.7022704482078552, "median_ape": 0.413220077753067, "bias_log": -0.09871374070644379 }, "test": { "r2_log": 0.4626653790473938, "rmse_log": 1.363203525543213, "mae_log": 1.0832252502441406, "median_ape": 0.9336060285568237, "bias_log": 0.4047386348247528 } }, "coverage": { "train": { "coverage_90pct": 0.920309908007842, "median_interval_log": 2.7887799739837646, "median_interval_ratio": 16.261168645984977 }, "val": { "coverage_90pct": 0.8595204513399154, "median_interval_log": 3.658205986022949, "median_interval_ratio": 38.79168757902037 }, "test": { "coverage_90pct": 0.8083090379008746, "median_interval_log": 4.073790550231934, "median_interval_ratio": 58.77934691322367 } } }, "top_features": [ { "name": "len", "gain": 58.87883758544922 }, { "name": "knnmp_mean_log", "gain": 46.72057342529297 }, { "name": "knnmp_median_log", "gain": 41.08598327636719 }, { "name": "is_all_digits", "gain": 38.29785919189453 }, { "name": "knnmp_count", "gain": 34.5153923034668 }, { "name": "knnmp_p90_log", "gain": 31.491409301757812 }, { "name": "in_wikipedia", "gain": 30.60625457763672 }, { "name": "knnmp_log_max", "gain": 24.936614990234375 }, { "name": "is_ascii", "gain": 24.386781692504883 }, { "name": "ends_digit", "gain": 24.157560348510742 }, { "name": "n_digits", "gain": 24.11026382446289 }, { "name": "has_unicode", "gain": 23.687318801879883 }, { "name": "name_age_days", "gain": 22.8796443939209 }, { "name": "origin__none", "gain": 22.28803062438965 }, { "name": "eth_stable_mcap", "gain": 22.02800941467285 }, { "name": "pca_002", "gain": 21.55613136291504 }, { "name": "knnbg_count", "gain": 20.121904373168945 }, { "name": "knnmp_log_min", "gain": 19.98278045654297 }, { "name": "n_unique_chars", "gain": 19.560546875 }, { "name": "kind__random", "gain": 19.375856399536133 }, { "name": "starts_digit", "gain": 18.9150390625 }, { "name": "origin__chinese", "gain": 18.488323211669922 }, { "name": "eth_tvl_usd", "gain": 18.1763858795166 }, { "name": "n_lower", "gain": 17.70460319519043 }, { "name": "n_letters", "gain": 17.535446166992188 }, { "name": "pca_004", "gain": 16.79183578491211 }, { "name": "kind__abbreviation", "gain": 16.659326553344727 }, { "name": "desc_pca_000", "gain": 16.628366470336914 }, { "name": "is_palindrome", "gain": 16.464502334594727 }, { "name": "fg_value", "gain": 15.839948654174805 } ], "family_gain_split": { "mpnet_ft_knn": 217.51888847351074, "bge_knn": 67.69801044464111, "llm_kind": 68.03838443756104, "llm_origin": 118.48860311508179, "llm_scores": 37.4162712097168, "llm_desc": 208.6804609298706, "llm_total": 432.6237196922302 }, "inference_recipe": { "description": "Inference uses 3 models + calibration constants", "point_estimate": "final_log = q50_model(features)", "uncalibrated_low": "low_log_raw = q05_model(features)", "uncalibrated_high": "high_log_raw = q95_model(features)", "calibrated_low_additive": "low_log = low_log_raw - delta_lower_additive", "calibrated_high_additive": "high_log = high_log_raw + delta_upper_additive", "calibrated_low_multiplicative": "low_log = q50 - multiplicative_factor * (q50 - low_log_raw)", "calibrated_high_multiplicative": "high_log = q50 + multiplicative_factor * (high_log_raw - q50)", "use_method": "additive", "output_usd": "np.exp(final_log)" }, "wandb_run": "https://wandb.ai/quantumly-aletheia-research/ens-appraiser/runs/xd2jhbwk" }