File size: 1,576 Bytes
0bf0abe | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | {
"model_type": "tabula_transformer",
"architecture": "TabularTransformer",
"d_model": 256,
"n_heads": 8,
"n_layers": 8,
"d_ff": 512,
"dropout": 0.1,
"ffn_activation": "swiglu",
"norm": "rmsnorm",
"pooling": "cls",
"numeric_embedding": "periodic",
"numeric_periodic_features": 16,
"max_numeric_features": 64,
"max_categories": 128,
"feature_token_dropout": 0.05,
"n_params": 10752769,
"pretraining": {
"best_step": 45000,
"best_val_loss": 0.229543,
"best_rows_seen": 23040000,
"final_step": 61825,
"final_rows_seen": 31654400,
"batch_size": 512,
"lr": 0.0003,
"weight_decay": 0.0001,
"amp": true,
"amp_dtype": "float16",
"grad_clip": 1.0,
"warmup_steps": 2000,
"lr_schedule": "cosine",
"max_steps": 200000
},
"corpus": {
"hf_repo": "avewright/tabula-pretraining-corpus-v2",
"total_shards": 541,
"real_datasets_ok": 3371,
"sources": {
"pmlb": {
"ok": 422,
"total_attempted": 423,
"status": "fully_exhausted"
},
"openml": {
"ok": 2949,
"total_attempted": 4886,
"schema_fail": 1900,
"download_fail": 37
},
"huggingface": {
"ok": 0,
"download_fail": 66,
"schema_fail": 1
}
},
"synthetic_generators": [
"tree_prior",
"gaussian_mixture",
"polynomial",
"scm",
"regression",
"time_series",
"mixed_type"
]
},
"date_trained": "2026-03-16",
"framework": "pytorch",
"pytorch_version": "2.4.1+cu124"
} |