clement-bonnet commited on
Commit
3c6f1d7
·
verified ·
1 Parent(s): 77b691b

Upload 2 files

Browse files
quiet-thunder-789--checkpoint:v0/config.yaml ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ eval:
2
+ eval_datasets: null
3
+ json_datasets: null
4
+ test_datasets:
5
+ - name: generator_mean
6
+ length: 96
7
+ generator: PATTERN
8
+ num_pairs: 4
9
+ batch_size: 96
10
+ num_tasks_to_show: 32
11
+ task_generator_kwargs:
12
+ num_cols: 4
13
+ num_rows: 4
14
+ pattern_size: 2
15
+ - name: generator_gradient_ascent_5
16
+ length: 96
17
+ generator: PATTERN
18
+ num_pairs: 4
19
+ batch_size: 96
20
+ inference_mode: gradient_ascent
21
+ inference_kwargs:
22
+ lr: 0.1
23
+ num_steps: 10
24
+ num_tasks_to_show: 32
25
+ task_generator_kwargs:
26
+ num_cols: 4
27
+ num_rows: 4
28
+ pattern_size: 2
29
+ training:
30
+ seed: 0
31
+ use_hf: true
32
+ kl_coeff: 0.001
33
+ batch_size: 128
34
+ learning_rate: 0.0004
35
+ inference_mode: mean
36
+ task_generator:
37
+ class: PATTERN
38
+ num_cols: 4
39
+ num_rows: 4
40
+ num_pairs: 4
41
+ num_workers: 16
42
+ pattern_size: 2
43
+ train_datasets: null
44
+ mixed_precision: false
45
+ total_num_steps: 200000
46
+ inference_kwargs: null
47
+ eval_every_n_logs: 20
48
+ log_every_n_steps: 1000
49
+ resume_from_checkpoint: null
50
+ online_data_augmentation: false
51
+ gradient_accumulation_steps: 1
52
+ save_checkpoint_every_n_logs: 200
53
+ decoder_transformer:
54
+ _target_: src_v2.models.utils.DecoderTransformerConfig
55
+ max_cols: 4
56
+ max_rows: 4
57
+ num_layers: 2
58
+ transformer_layer:
59
+ _target_: src_v2.models.utils.TransformerLayerConfig
60
+ num_heads: 6
61
+ dropout_rate: 0.0
62
+ mlp_dim_factor: 4.0
63
+ emb_dim_per_head: 12
64
+ attention_dropout_rate: 0.0
65
+ encoder_transformer:
66
+ _target_: src_v2.models.utils.EncoderTransformerConfig
67
+ max_cols: 4
68
+ max_rows: 4
69
+ latent_dim: 2
70
+ num_layers: 2
71
+ variational: true
72
+ transformer_layer:
73
+ _target_: src_v2.models.utils.TransformerLayerConfig
74
+ num_heads: 6
75
+ dropout_rate: 0.0
76
+ mlp_dim_factor: 4.0
77
+ emb_dim_per_head: 12
78
+ attention_dropout_rate: 0.0
79
+ latent_projection_bias: false
quiet-thunder-789--checkpoint:v0/state.msgpack ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60b3368d8a3c6232867a43b252597f16dbec5de0264bf23168d938a461256a3d
3
+ size 4082621