File size: 1,166 Bytes
ad8888f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | {
"model_type": "ao_gpt_hybrid",
"architecture": "TinyDecoderLM",
"vocab_size": 2481,
"atomic_motifs": 2387,
"freq_cutoff": 5000,
"d_model": 512,
"n_heads": 8,
"n_layers": 8,
"d_ff": 2048,
"max_seq_len": 64,
"dropout": 0.1,
"use_adaln": true,
"bidirectional": false,
"dtype": "bfloat16",
"epoch": 7,
"n_params_total": 31099825,
"training": {
"dataset": "30M STAMP molecules (train split, all_pass=True)",
"train_rows": 19148578,
"valid_rows_sampled": 20000,
"optimizer": "AdamW (fused, bf16)",
"lr": 5e-4,
"weight_decay": 0.01,
"micro_batch_size": 6144,
"global_batch_size": 6144,
"grad_accum_steps": 1,
"random_ratio": 0.9,
"torch_compile": true,
"fallback_p_low": 0.02,
"fallback_p_high": 0.15
},
"default_sampling": {
"temperature": 0.95,
"top_p": 0.85,
"top_k": 0,
"max_new_tokens": 64
},
"eval": {
"N": 1024,
"validity_pct": 100.0,
"uniqueness_pct": 100.0,
"quality_over_valid_pct": 79.16,
"genmol_pct": 79.00,
"qed_mean": 0.727,
"sa_mean": 2.92,
"diversity": 0.860,
"reference_ar_baseline_genmol_pct": 79.64
}
}
|