| { | |
| "model_type": "tabula_transformer", | |
| "architecture": "TabularTransformer", | |
| "d_model": 256, | |
| "n_heads": 8, | |
| "n_layers": 8, | |
| "d_ff": 512, | |
| "dropout": 0.1, | |
| "ffn_activation": "swiglu", | |
| "norm": "rmsnorm", | |
| "pooling": "cls", | |
| "numeric_embedding": "periodic", | |
| "numeric_periodic_features": 16, | |
| "max_numeric_features": 64, | |
| "max_categories": 128, | |
| "feature_token_dropout": 0.05, | |
| "n_params": 10752769, | |
| "pretraining": { | |
| "best_step": 45000, | |
| "best_val_loss": 0.229543, | |
| "best_rows_seen": 23040000, | |
| "final_step": 61825, | |
| "final_rows_seen": 31654400, | |
| "batch_size": 512, | |
| "lr": 0.0003, | |
| "weight_decay": 0.0001, | |
| "amp": true, | |
| "amp_dtype": "float16", | |
| "grad_clip": 1.0, | |
| "warmup_steps": 2000, | |
| "lr_schedule": "cosine", | |
| "max_steps": 200000 | |
| }, | |
| "corpus": { | |
| "hf_repo": "avewright/tabula-pretraining-corpus-v2", | |
| "total_shards": 541, | |
| "real_datasets_ok": 3371, | |
| "sources": { | |
| "pmlb": { | |
| "ok": 422, | |
| "total_attempted": 423, | |
| "status": "fully_exhausted" | |
| }, | |
| "openml": { | |
| "ok": 2949, | |
| "total_attempted": 4886, | |
| "schema_fail": 1900, | |
| "download_fail": 37 | |
| }, | |
| "huggingface": { | |
| "ok": 0, | |
| "download_fail": 66, | |
| "schema_fail": 1 | |
| } | |
| }, | |
| "synthetic_generators": [ | |
| "tree_prior", | |
| "gaussian_mixture", | |
| "polynomial", | |
| "scm", | |
| "regression", | |
| "time_series", | |
| "mixed_type" | |
| ] | |
| }, | |
| "date_trained": "2026-03-16", | |
| "framework": "pytorch", | |
| "pytorch_version": "2.4.1+cu124" | |
| } |