{
  "model_type": "ao_gpt_hybrid",
  "architecture": "TinyDecoderLM",
  "vocab_size": 2481,
  "atomic_motifs": 2387,
  "freq_cutoff": 5000,
  "d_model": 512,
  "n_heads": 8,
  "n_layers": 8,
  "d_ff": 2048,
  "max_seq_len": 64,
  "dropout": 0.1,
  "use_adaln": true,
  "bidirectional": false,
  "dtype": "bfloat16",
  "epoch": 7,
  "n_params_total": 31099825,
  "training": {
    "dataset": "30M STAMP molecules (train split, all_pass=True)",
    "train_rows": 19148578,
    "valid_rows_sampled": 20000,
    "optimizer": "AdamW (fused, bf16)",
    "lr": 5e-4,
    "weight_decay": 0.01,
    "micro_batch_size": 6144,
    "global_batch_size": 6144,
    "grad_accum_steps": 1,
    "random_ratio": 0.9,
    "torch_compile": true,
    "fallback_p_low": 0.02,
    "fallback_p_high": 0.15
  },
  "default_sampling": {
    "temperature": 0.95,
    "top_p": 0.85,
    "top_k": 0,
    "max_new_tokens": 64
  },
  "eval": {
    "N": 1024,
    "validity_pct": 100.0,
    "uniqueness_pct": 100.0,
    "quality_over_valid_pct": 79.16,
    "genmol_pct": 79.00,
    "qed_mean": 0.727,
    "sa_mean": 2.92,
    "diversity": 0.860,
    "reference_ar_baseline_genmol_pct": 79.64
  }
}