File size: 2,934 Bytes
6cba9d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
nn:
model: GeometricTransformer
dataset: GeometricTransformerDataset
dtype: float32
device: cuda
data_in_memory: false
load_model: null
batch_size: 128
num_workers: 4
collate_fn: geometric_transformer_collate_fn
model_args:
dim_model: 1536
unified_transformer_args:
n_layers: 48
geom_layer_indices:
- 0
mha_layer_indices:
- 0
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
bias: false
mha_args:
num_heads: 24
bias: false
qk_layernorm: true
gha_args:
num_heads: 256
num_vector_messages: 1
mask_and_zero_frameless: true
bias: false
scaling_factor: 1.1547005383792515
ffn_type: swiglu
norm_type: layer_norm
expansion_ratio: 2.66666666667
struc_token_info:
mask: 4096
eos: 4097
bos: 4098
pad: 4099
total: 5001
max_non_special_token: 4095
residue_token_info:
mask: 32
eos: 2
bos: 0
pad: 1
total: 33
max_non_special_token: null
sasa_token_info:
mask: 0
eos: 0
bos: 0
pad: 0
total: null
max_non_special_token: null
sec_struct_token_info:
mask: 0
eos: 0
bos: 0
pad: 0
total: null
max_non_special_token: null
res_annot_token_info:
mask: 0
eos: 0
bos: 0
pad: 0
total: null
max_non_special_token: null
dataset_split_args:
train: 0.8
val: 0.2
test: 0.0
train:
lightning_model: TransformerModel
resume_training_path: null
lightning_model_args:
eval_type: sft
beta: null
gamma: null
sampling_temperature: null
optimizer: Adam
optimizer_args:
lr: 0.0004
betas:
- 0.9
- 0.95
weight_decay: 0.01
lr_scheduler: LinearWarmupCosineAnnealingLR
lr_scheduler_args:
warmup_epochs: 250000
max_epochs: 2500000
eta_min: 4.0e-05
interval: step
monitor: val/CELoss
sync_dist: true
on_step: true
trainer_args:
eval_type: era
accelerator: cuda
devices: 1
strategy: auto
log_every_n_steps: 500
max_epochs: 10000
enable_progress_bar: false
gradient_clip_val: 1.0
logger:
loggertype: TensorBoard
seed_args:
seed: 42
workers: true
global_args:
dataset_filename: /scratch/group_scratch/era/directed_evolution/datasets/gb1/gb1_tokenized.h5
keys_to_test:
- nn.model
- nn.model_args
|