Lexa-B commited on
Commit
796fed0
·
verified ·
1 Parent(s): e7632bb

Upload config.json

Browse files
Files changed (1) hide show
  1. CurriculumStages/0_228M/config.json +51 -0
CurriculumStages/0_228M/config.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "AdaLN_Timestep_Embed_Dim": 256,
3
+ "adafactor_rel_step": true,
4
+ "adafactor_warmup_init": true,
5
+ "architectures": [
6
+ "LexaLCM"
7
+ ],
8
+ "batch_size": 16,
9
+ "bf16": true,
10
+ "cfg_scale": 0.0,
11
+ "clip_threshold": 1.0,
12
+ "contextualizer": 1,
13
+ "custom_multi_gpu": true,
14
+ "d_ff": 8192,
15
+ "d_latent": 1024,
16
+ "d_model": 2048,
17
+ "denoiser": 0,
18
+ "denoiser_iterations_inference": 40,
19
+ "denoiser_iterations_pretrain": 100,
20
+ "dropout_context": 0.1,
21
+ "dropout_denoiser": 0.15,
22
+ "dropout_latent": 0.1,
23
+ "eval_every": 0,
24
+ "gpus": {
25
+ "contextualizer": 1,
26
+ "custom_multi_gpu": true,
27
+ "denoiser": 0,
28
+ "other": 1
29
+ },
30
+ "grad_norm_log_every": 20,
31
+ "input_dim": 1024,
32
+ "learning_rate": "None",
33
+ "max_grad_norm": "None",
34
+ "max_seq_len": 64,
35
+ "max_steps": 250000,
36
+ "model_type": "lexa_lcm_pre3",
37
+ "n_heads": 32,
38
+ "num_context_layers": 5,
39
+ "num_denoiser_layers": 15,
40
+ "num_denoising_steps": 100,
41
+ "num_workers": 20,
42
+ "optimizer": "adafactor",
43
+ "other": 1,
44
+ "output_dir": "./outputs",
45
+ "resume_from": "None",
46
+ "save_every": 5000,
47
+ "torch_dtype": "float32",
48
+ "transformers_version": "4.51.3",
49
+ "warmup_steps": 500,
50
+ "weight_decay": 0.01
51
+ }