ens-appraiser / v0_5_metadata.json
quantumly's picture
v0.5 LLM-features appraiser: 2026-04-27
1bde3e5 verified
{
"trained_at": "2026-04-27T06:37:23.331946+00:00",
"data_run_date": "2026-04-25",
"llm_run_date": "2026-04-26",
"version": "v0.5-llm-features",
"description": "v0.4 + LLM-derived features (name_kind, cultural_origin, fame_score, crypto_relevance, brand_collision_risk, description_emb_pca32)",
"parent_version": "v0.4",
"embedders": [
"mpnet-finetuned (from v0.3)",
"BAAI/bge-base-en-v1.5"
],
"splits": {
"train": {
"rows": 265240,
"start": "2022-01-28",
"end": "2023-09-30"
},
"val": {
"rows": 3545,
"start": "2023-10-01",
"end": "2023-12-31"
},
"test": {
"rows": 2744,
"start": "2024-01-01",
"end": "2024-05-04"
}
},
"feature_count": 212,
"n_llm_features": 56,
"feature_cols": [
"pca_000",
"pca_001",
"pca_002",
"pca_003",
"pca_004",
"pca_005",
"pca_006",
"pca_007",
"pca_008",
"pca_009",
"pca_010",
"pca_011",
"pca_012",
"pca_013",
"pca_014",
"pca_015",
"pca_016",
"pca_017",
"pca_018",
"pca_019",
"pca_020",
"pca_021",
"pca_022",
"pca_023",
"pca_024",
"pca_025",
"pca_026",
"pca_027",
"pca_028",
"pca_029",
"pca_030",
"pca_031",
"pca_032",
"pca_033",
"pca_034",
"pca_035",
"pca_036",
"pca_037",
"pca_038",
"pca_039",
"pca_040",
"pca_041",
"pca_042",
"pca_043",
"pca_044",
"pca_045",
"pca_046",
"pca_047",
"pca_048",
"pca_049",
"pca_050",
"pca_051",
"pca_052",
"pca_053",
"pca_054",
"pca_055",
"pca_056",
"pca_057",
"pca_058",
"pca_059",
"pca_060",
"pca_061",
"pca_062",
"pca_063",
"len",
"n_digits",
"n_letters",
"n_special",
"n_lower",
"n_upper",
"is_palindrome",
"is_all_digits",
"is_all_letters",
"is_ascii",
"has_unicode",
"starts_digit",
"ends_digit",
"max_char_run",
"n_unique_chars",
"in_wikipedia",
"in_geonames",
"in_us_firstname",
"in_iso3166",
"in_ticker",
"in_sec_edgar",
"in_wiktionary_en",
"wordlist_hits",
"club__prepunk_full_rankings",
"club__common_english",
"club__common_animals",
"club__gamertags",
"club__firstnames_usa",
"club__top_nouns",
"club__us_government",
"club__pokemon_gen1",
"club__catholicism",
"club__gamertags_double",
"club__wikidata_top_fantasy_char",
"club__social_handles",
"club__pokemon_gen2",
"club__top500_cities_global",
"club__top500_cities_usa",
"club__mythical_creatures",
"club__conspiracy_theories",
"club__crypto_terms",
"club__natural_wonders",
"club__pokemon_gen3",
"club__familynames_usa",
"club__country_codes",
"club__sports",
"club__top_crypto_names",
"club__logistics",
"club__home",
"club__historic_figures",
"club__holidays",
"club__top_crypto_tickers",
"club__paranormal",
"club__performing_arts",
"club__fine_art",
"club__currency_symbols",
"club__luxury",
"club__personas",
"club__finance_terms",
"club__currency_names",
"club__pokemon_gen4",
"club__gen_alpha",
"club__crayola_classic",
"club__memes",
"club__us_states",
"n_clubs",
"trademark_conflict",
"name_age_days",
"prior_transfer_count",
"fg_value",
"eth_tvl_usd",
"eth_stable_mcap",
"eth_dex_volume",
"nft_total_fee_usd",
"fame_score",
"crypto_relevance_ord",
"brand_collision_risk_ord",
"kind__concept",
"kind__random",
"kind__brand",
"kind__surname",
"kind__first_name",
"kind__abbreviation",
"kind__place",
"kind__other",
"kind__unknown",
"origin__english",
"origin__none",
"origin__mixed",
"origin__spanish",
"origin__german",
"origin__french",
"origin__japanese",
"origin__chinese",
"origin__italian",
"origin__slavic",
"origin__korean",
"origin__arabic",
"origin__other",
"origin__unknown",
"desc_pca_000",
"desc_pca_001",
"desc_pca_002",
"desc_pca_003",
"desc_pca_004",
"desc_pca_005",
"desc_pca_006",
"desc_pca_007",
"desc_pca_008",
"desc_pca_009",
"desc_pca_010",
"desc_pca_011",
"desc_pca_012",
"desc_pca_013",
"desc_pca_014",
"desc_pca_015",
"desc_pca_016",
"desc_pca_017",
"desc_pca_018",
"desc_pca_019",
"desc_pca_020",
"desc_pca_021",
"desc_pca_022",
"desc_pca_023",
"desc_pca_024",
"desc_pca_025",
"desc_pca_026",
"desc_pca_027",
"desc_pca_028",
"desc_pca_029",
"desc_pca_030",
"desc_pca_031",
"knnmp_count",
"knnmp_mean_log",
"knnmp_median_log",
"knnmp_p90_log",
"knnmp_max_sim",
"knnmp_min_sim",
"knnmp_log_max",
"knnmp_log_min",
"knnbg_count",
"knnbg_mean_log",
"knnbg_median_log",
"knnbg_p90_log",
"knnbg_max_sim",
"knnbg_min_sim",
"knnbg_log_max",
"knnbg_log_min"
],
"pca_dim_concat": 64,
"pca_dim_description": 32,
"name_kind_values": [
"concept",
"random",
"brand",
"surname",
"first_name",
"abbreviation",
"place",
"other",
"unknown"
],
"cultural_origin_values": [
"english",
"none",
"mixed",
"spanish",
"german",
"french",
"japanese",
"chinese",
"italian",
"slavic",
"korean",
"arabic",
"other",
"unknown"
],
"best_iteration": 712,
"xgb_params": {
"objective": "reg:squarederror",
"eval_metric": "rmse",
"tree_method": "hist",
"device": "cuda",
"max_depth": 7,
"learning_rate": 0.04,
"subsample": 0.85,
"colsample_bytree": 0.65,
"min_child_weight": 8,
"reg_alpha": 0.5,
"reg_lambda": 2.0,
"seed": 42
},
"metrics": {
"train": {
"r2_log": 0.8462194204330444,
"rmse_log": 0.6332300305366516,
"mae_log": 0.4247952699661255,
"median_ape": 0.26942259073257446,
"bias_log": -0.0002484263095539063
},
"val": {
"r2_log": 0.6880427002906799,
"rmse_log": 1.0231839418411255,
"mae_log": 0.7111709713935852,
"median_ape": 0.4590761661529541,
"bias_log": 0.11442188918590546
},
"test": {
"r2_log": 0.41624486446380615,
"rmse_log": 1.420867681503296,
"mae_log": 1.1492146253585815,
"median_ape": 1.0128521919250488,
"bias_log": 0.6260586380958557
}
},
"top_features": [
{
"name": "knnmp_mean_log",
"gain": 2001.844970703125
},
{
"name": "knnmp_median_log",
"gain": 1155.0853271484375
},
{
"name": "knnmp_p90_log",
"gain": 832.7803344726562
},
{
"name": "is_all_digits",
"gain": 782.1705322265625
},
{
"name": "in_wikipedia",
"gain": 531.300048828125
},
{
"name": "len",
"gain": 489.9974365234375
},
{
"name": "knnmp_log_max",
"gain": 330.5997619628906
},
{
"name": "n_clubs",
"gain": 207.94166564941406
},
{
"name": "n_letters",
"gain": 196.677490234375
},
{
"name": "origin__chinese",
"gain": 170.69927978515625
},
{
"name": "ends_digit",
"gain": 165.2652587890625
},
{
"name": "knnmp_count",
"gain": 161.44723510742188
},
{
"name": "n_digits",
"gain": 159.4886016845703
},
{
"name": "name_age_days",
"gain": 156.08135986328125
},
{
"name": "n_lower",
"gain": 148.55072021484375
},
{
"name": "n_special",
"gain": 141.7484130859375
},
{
"name": "is_palindrome",
"gain": 139.62179565429688
},
{
"name": "n_unique_chars",
"gain": 124.55799102783203
},
{
"name": "eth_stable_mcap",
"gain": 124.44175720214844
},
{
"name": "trademark_conflict",
"gain": 116.59305572509766
},
{
"name": "eth_tvl_usd",
"gain": 107.47349548339844
},
{
"name": "has_unicode",
"gain": 106.94119262695312
},
{
"name": "fame_score",
"gain": 103.080078125
},
{
"name": "knnbg_log_max",
"gain": 102.78841400146484
},
{
"name": "club__prepunk_full_rankings",
"gain": 97.14898681640625
},
{
"name": "is_ascii",
"gain": 96.375
},
{
"name": "club__gamertags",
"gain": 94.76091766357422
},
{
"name": "pca_045",
"gain": 86.68685913085938
},
{
"name": "pca_002",
"gain": 85.17119598388672
},
{
"name": "pca_004",
"gain": 81.07220458984375
}
],
"gain_split": {
"mpnet_ft_knn": 4593.555641174316,
"bge_knn": 324.30273818969727,
"llm_name_kind": 197.1177396774292,
"llm_cultural_origin": 421.92126178741455,
"llm_scores": 103.080078125,
"llm_description": 748.444676399231,
"llm_total": 1470.5637559890747
},
"wandb_run": "https://wandb.ai/quantumly-aletheia-research/ens-appraiser/runs/m1mlf4h0"
}