tabula-v1 / config.json
avewright's picture
Upload Tabula v1 pretrained model — step 61,825, best_val=0.2295
0bf0abe verified
{
"model_type": "tabula_transformer",
"architecture": "TabularTransformer",
"d_model": 256,
"n_heads": 8,
"n_layers": 8,
"d_ff": 512,
"dropout": 0.1,
"ffn_activation": "swiglu",
"norm": "rmsnorm",
"pooling": "cls",
"numeric_embedding": "periodic",
"numeric_periodic_features": 16,
"max_numeric_features": 64,
"max_categories": 128,
"feature_token_dropout": 0.05,
"n_params": 10752769,
"pretraining": {
"best_step": 45000,
"best_val_loss": 0.229543,
"best_rows_seen": 23040000,
"final_step": 61825,
"final_rows_seen": 31654400,
"batch_size": 512,
"lr": 0.0003,
"weight_decay": 0.0001,
"amp": true,
"amp_dtype": "float16",
"grad_clip": 1.0,
"warmup_steps": 2000,
"lr_schedule": "cosine",
"max_steps": 200000
},
"corpus": {
"hf_repo": "avewright/tabula-pretraining-corpus-v2",
"total_shards": 541,
"real_datasets_ok": 3371,
"sources": {
"pmlb": {
"ok": 422,
"total_attempted": 423,
"status": "fully_exhausted"
},
"openml": {
"ok": 2949,
"total_attempted": 4886,
"schema_fail": 1900,
"download_fail": 37
},
"huggingface": {
"ok": 0,
"download_fail": 66,
"schema_fail": 1
}
},
"synthetic_generators": [
"tree_prior",
"gaussian_mixture",
"polynomial",
"scm",
"regression",
"time_series",
"mixed_type"
]
},
"date_trained": "2026-03-16",
"framework": "pytorch",
"pytorch_version": "2.4.1+cu124"
}