{ "model_type": "ao_gpt_hybrid", "architecture": "TinyDecoderLM", "vocab_size": 2481, "atomic_motifs": 2387, "freq_cutoff": 5000, "d_model": 512, "n_heads": 8, "n_layers": 8, "d_ff": 2048, "max_seq_len": 64, "dropout": 0.1, "use_adaln": true, "bidirectional": false, "dtype": "bfloat16", "epoch": 7, "n_params_total": 31099825, "training": { "dataset": "30M STAMP molecules (train split, all_pass=True)", "train_rows": 19148578, "valid_rows_sampled": 20000, "optimizer": "AdamW (fused, bf16)", "lr": 5e-4, "weight_decay": 0.01, "micro_batch_size": 6144, "global_batch_size": 6144, "grad_accum_steps": 1, "random_ratio": 0.9, "torch_compile": true, "fallback_p_low": 0.02, "fallback_p_high": 0.15 }, "default_sampling": { "temperature": 0.95, "top_p": 0.85, "top_k": 0, "max_new_tokens": 64 }, "eval": { "N": 1024, "validity_pct": 100.0, "uniqueness_pct": 100.0, "quality_over_valid_pct": 79.16, "genmol_pct": 79.00, "qed_mean": 0.727, "sa_mean": 2.92, "diversity": 0.860, "reference_ar_baseline_genmol_pct": 79.64 } }