laaaarrywang
/

SCDD

+model_id: laaaarrywang/SCDD
+checkpoint: checkpoints/scdd_pu_0.2.ckpt
+mode: eval
+diffusion: absorbing_state
+backbone: dit
+parameterization: scdd
+time_conditioning: false
+T: 1000
+subs_masking: false
+seed: 512
+data:
+  train: openwebtext-train
+  valid: openwebtext-valid
+  tokenizer_name_or_path: gpt2
+  wrap: true
+  streaming: false
+model:
+  name: small
+  type: ddit
+  hidden_size: 768
+  cond_dim: 128
+  length: 512
+  n_blocks: 12
+  n_heads: 12
+  scale_by_sigma: true
+  dropout: 0.0
+  tie_word_embeddings: false
+forward:
+  name: mix
+  ratio: 0.2
+  gamma: 1
+  t_peak: 0.5
+noise:
+  type: loglinear
+  sigma_min: 0.0001
+  sigma_max: 20
+sampling:
+  predictor: scdd
+  compile_sampler: true
+  steps: 1024
+  noise_removal: true
+  nucleus_p: 1.0
+training:
+  ema: 0.9999
+  antithetic_sampling: true
+  importance_sampling: false
+  sampling_eps: 0.001
+  change_of_variables: false
+optim:
+  weight_decay: 0.02
+  lr: 0.0005
+  beta1: 0.9
+  beta2: 0.999
+  eps: 1.0e-09
+trainer:
+  accelerator: cuda
+  devices: 8
+  precision: bf16
+  gradient_clip_val: 1.0
+  max_steps: 1000000
+  global_batch_size: 256