Hanbin42 commited on
Commit
529e40a
·
verified ·
1 Parent(s): 381699b

Upload config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.yaml +104 -0
config.yaml ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - _self_
3
+ - /callbacks: [checkpoint_every_n_steps, checkpoint_monitor, learning_rate_monitor]
4
+ - /data: Korean_dataset
5
+ - /model: tiny-ar
6
+ - /strategy: ddp
7
+ - /noise: loglinear
8
+ - /lr_scheduler: constant_warmup
9
+
10
+ mode: sample_eval # train / ppl_eval / sample_eval
11
+ diffusion: absorbing_state
12
+ backbone: ar # dit / dimamba / ar
13
+ parameterization: ar # subs / d3pm / sedd
14
+ time_conditioning: False
15
+ T: 0 # 0 (continuous time) / 1000
16
+ subs_masking: False
17
+
18
+ seed: 1
19
+
20
+ loader:
21
+ global_batch_size: 32
22
+ eval_global_batch_size: ${.global_batch_size}
23
+ # Note: batch_size and eval_batch_size are **per machine**
24
+ batch_size: ${div_up:${.global_batch_size}, ${eval:${trainer.devices} * ${trainer.num_nodes}}}
25
+ eval_batch_size: 1
26
+ #${div_up:${.eval_global_batch_size}, ${eval:${trainer.devices} * ${trainer.num_nodes}}}
27
+ num_workers: ${eval:"len(__import__('os').sched_getaffinity(0))"}
28
+ pin_memory: True
29
+
30
+ sampling:
31
+ predictor: ddpm_cache # analytic, ddpm, ddpm_cache
32
+ steps: 128
33
+ noise_removal: True
34
+ # TODO(yair): @subham, why aren't these params under `eval`?
35
+ num_sample_batches: 1 # Total samples: `num_gpus` * `loader.eval_batch_size` * num_sample_batches
36
+ num_sample_log: 1
37
+ semi_ar: False
38
+ stride_length: 1
39
+ num_strides: 1
40
+
41
+ training:
42
+ ema: 0.9999
43
+ antithetic_sampling: True
44
+ importance_sampling: False
45
+ sampling_eps: 1e-3
46
+ change_of_variables: False
47
+
48
+ eval:
49
+ checkpoint_path: /home/elicer/lhb01/mdlm/outputs/parkseongjun/psjkodata/2025.04.05/051927/checkpoints/best.ckpt # Used to evaluate a checkpoint after training.
50
+ disable_ema: False
51
+ compute_generative_perplexity: True
52
+ perplexity_batch_size: 8
53
+ compute_perplexity_on_sanity: False
54
+ gen_ppl_eval_model_name_or_path: gpt2-large # gpt2-large, meta-llama/Llama-2-7b-hf
55
+ generate_samples: True
56
+
57
+ optim:
58
+ weight_decay: 0.01
59
+ lr: 5e-5
60
+ beta1: 0.9
61
+ beta2: 0.999
62
+ eps: 1e-8
63
+
64
+ trainer:
65
+ _target_: lightning.Trainer
66
+ accelerator: cuda
67
+ num_nodes: 1
68
+ devices: ${device_count:}
69
+ accumulate_grad_batches: ${div_up:${loader.global_batch_size}, ${eval:${trainer.devices} * ${loader.batch_size} * ${trainer.num_nodes}}}
70
+ gradient_clip_val: 1.0
71
+ precision: 'bf16'
72
+ num_sanity_val_steps: 0
73
+ max_steps: 50000
74
+ log_every_n_steps: 10
75
+ limit_train_batches: 1.0 # train on full dataset, can be used to toggle quick run
76
+ limit_val_batches: 1.0 # validate on full dataset, can be used to toggle quick run
77
+ val_check_interval: 0.5
78
+
79
+ wandb:
80
+ project: test-ar
81
+ mode: online
82
+ notes: Mulan for text
83
+ resume: must
84
+ group: null
85
+ job_type: null
86
+ name: ar
87
+ id: f12b7c5e-07c9-48ae-96fa-4798823b8492
88
+ tags:
89
+ - ${noise.type}
90
+ - ${data.train}
91
+ - ${data.valid}
92
+
93
+ hydra:
94
+ run:
95
+ dir: ./outputs/${data.train}/${now:%Y.%m.%d}/${now:%H%M%S}
96
+ job:
97
+ chdir: true
98
+
99
+ checkpointing:
100
+ # Use custom `save_dir` if, e.g., saving to S3 bucket, otherwise leave this parameter as is
101
+ save_dir: ${cwd:}
102
+ # Note: `checkpoints` path should correspond to `checkpoint_every_n_steps.dirpath`
103
+ resume_from_ckpt: true
104
+ resume_ckpt_path: /home/elicer/lhb01/mdlm/outputs/parkseongjun/psjkodata/2025.04.05/045928/checkpoints/last.ckpt