tabula-v1 / config.json

Upload Tabula v1 pretrained model — step 61,825, best_val=0.2295

0bf0abe verified 5 days ago

1.58 kB

	{
	"model_type": "tabula_transformer",
	"architecture": "TabularTransformer",
	"d_model": 256,
	"n_heads": 8,
	"n_layers": 8,
	"d_ff": 512,
	"dropout": 0.1,
	"ffn_activation": "swiglu",
	"norm": "rmsnorm",
	"pooling": "cls",
	"numeric_embedding": "periodic",
	"numeric_periodic_features": 16,
	"max_numeric_features": 64,
	"max_categories": 128,
	"feature_token_dropout": 0.05,
	"n_params": 10752769,
	"pretraining": {
	"best_step": 45000,
	"best_val_loss": 0.229543,
	"best_rows_seen": 23040000,
	"final_step": 61825,
	"final_rows_seen": 31654400,
	"batch_size": 512,
	"lr": 0.0003,
	"weight_decay": 0.0001,
	"amp": true,
	"amp_dtype": "float16",
	"grad_clip": 1.0,
	"warmup_steps": 2000,
	"lr_schedule": "cosine",
	"max_steps": 200000
	},
	"corpus": {
	"hf_repo": "avewright/tabula-pretraining-corpus-v2",
	"total_shards": 541,
	"real_datasets_ok": 3371,
	"sources": {
	"pmlb": {
	"ok": 422,
	"total_attempted": 423,
	"status": "fully_exhausted"
	},
	"openml": {
	"ok": 2949,
	"total_attempted": 4886,
	"schema_fail": 1900,
	"download_fail": 37
	},
	"huggingface": {
	"ok": 0,
	"download_fail": 66,
	"schema_fail": 1
	}
	},
	"synthetic_generators": [
	"tree_prior",
	"gaussian_mixture",
	"polynomial",
	"scm",
	"regression",
	"time_series",
	"mixed_type"
	]
	},
	"date_trained": "2026-03-16",
	"framework": "pytorch",
	"pytorch_version": "2.4.1+cu124"
	}