{ "model_type": "tabula_transformer", "architecture": "TabularTransformer", "d_model": 256, "n_heads": 8, "n_layers": 8, "d_ff": 512, "dropout": 0.1, "ffn_activation": "swiglu", "norm": "rmsnorm", "pooling": "cls", "numeric_embedding": "periodic", "numeric_periodic_features": 16, "max_numeric_features": 64, "max_categories": 128, "feature_token_dropout": 0.05, "n_params": 10752769, "pretraining": { "best_step": 45000, "best_val_loss": 0.229543, "best_rows_seen": 23040000, "final_step": 61825, "final_rows_seen": 31654400, "batch_size": 512, "lr": 0.0003, "weight_decay": 0.0001, "amp": true, "amp_dtype": "float16", "grad_clip": 1.0, "warmup_steps": 2000, "lr_schedule": "cosine", "max_steps": 200000 }, "corpus": { "hf_repo": "avewright/tabula-pretraining-corpus-v2", "total_shards": 541, "real_datasets_ok": 3371, "sources": { "pmlb": { "ok": 422, "total_attempted": 423, "status": "fully_exhausted" }, "openml": { "ok": 2949, "total_attempted": 4886, "schema_fail": 1900, "download_fail": 37 }, "huggingface": { "ok": 0, "download_fail": 66, "schema_fail": 1 } }, "synthetic_generators": [ "tree_prior", "gaussian_mixture", "polynomial", "scm", "regression", "time_series", "mixed_type" ] }, "date_trained": "2026-03-16", "framework": "pytorch", "pytorch_version": "2.4.1+cu124" }