File size: 1,576 Bytes
0bf0abe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
{
  "model_type": "tabula_transformer",
  "architecture": "TabularTransformer",
  "d_model": 256,
  "n_heads": 8,
  "n_layers": 8,
  "d_ff": 512,
  "dropout": 0.1,
  "ffn_activation": "swiglu",
  "norm": "rmsnorm",
  "pooling": "cls",
  "numeric_embedding": "periodic",
  "numeric_periodic_features": 16,
  "max_numeric_features": 64,
  "max_categories": 128,
  "feature_token_dropout": 0.05,
  "n_params": 10752769,
  "pretraining": {
    "best_step": 45000,
    "best_val_loss": 0.229543,
    "best_rows_seen": 23040000,
    "final_step": 61825,
    "final_rows_seen": 31654400,
    "batch_size": 512,
    "lr": 0.0003,
    "weight_decay": 0.0001,
    "amp": true,
    "amp_dtype": "float16",
    "grad_clip": 1.0,
    "warmup_steps": 2000,
    "lr_schedule": "cosine",
    "max_steps": 200000
  },
  "corpus": {
    "hf_repo": "avewright/tabula-pretraining-corpus-v2",
    "total_shards": 541,
    "real_datasets_ok": 3371,
    "sources": {
      "pmlb": {
        "ok": 422,
        "total_attempted": 423,
        "status": "fully_exhausted"
      },
      "openml": {
        "ok": 2949,
        "total_attempted": 4886,
        "schema_fail": 1900,
        "download_fail": 37
      },
      "huggingface": {
        "ok": 0,
        "download_fail": 66,
        "schema_fail": 1
      }
    },
    "synthetic_generators": [
      "tree_prior",
      "gaussian_mixture",
      "polynomial",
      "scm",
      "regression",
      "time_series",
      "mixed_type"
    ]
  },
  "date_trained": "2026-03-16",
  "framework": "pytorch",
  "pytorch_version": "2.4.1+cu124"
}