lucascamillomd commited on
Commit
518e59f
·
verified ·
1 Parent(s): 90e2d14

Upload config/human_rrbs_atlas.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config/human_rrbs_atlas.yaml +146 -0
config/human_rrbs_atlas.yaml ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task_name: train
2
+ tags:
3
+ - finetune_human_rrbs_atlas_large
4
+ - finetuning
5
+ - large
6
+ - human_rrbs_atlas
7
+ train: true
8
+ test: true
9
+ trainer_ckpt_path: null
10
+ model_ckpt_path: dependencies/model/weights/large.ckpt
11
+ strict_load: true
12
+ seed: 42
13
+ data:
14
+ batch_size: 3
15
+ dna_llm: nucleotide-transformer-v2-500m-multi-species
16
+ max_length: 10000
17
+ sorting_strategy: sorted_chromosome
18
+ dna_context_len: 2001
19
+ num_workers: 8
20
+ pin_memory: false
21
+ _target_: cpgpt.data.cpgpt_datamodule.CpGPTDataModule
22
+ train_dir: ${paths.data_dir}/human_rrbs_atlas/processed/train
23
+ val_dir: ${paths.data_dir}/human_rrbs_atlas/processed/val
24
+ test_dir: ${paths.data_dir}/human_rrbs_atlas/processed/test
25
+ dependencies_dir: ${paths.dependencies_dir}/human
26
+ model:
27
+ optimizer:
28
+ _target_: schedulefree.AdamWScheduleFree
29
+ _partial_: true
30
+ lr: 0.0001
31
+ weight_decay: 0.01
32
+ betas:
33
+ - 0.9
34
+ - 0.95
35
+ scheduler:
36
+ _target_: torch.optim.lr_scheduler.ConstantLR
37
+ _partial_: true
38
+ factor: 1.0
39
+ total_iters: 1
40
+ net:
41
+ _target_: cpgpt.model.components.model.CpGPT
42
+ d_embedding: 512
43
+ d_hidden: 512
44
+ d_dna_embedding: 1024
45
+ n_attention_heads: 16
46
+ n_layers: 32
47
+ n_mlp_blocks: 3
48
+ dropout: 0.01
49
+ architecture: transformer
50
+ activation: swiglu
51
+ positional_encoding: rotary
52
+ sample_embedding_method: cls
53
+ use_power_norm: false
54
+ fft: false
55
+ use_condition_decoder: false
56
+ condition_size: 0
57
+ use_noise_decoder: false
58
+ mlp_block_bias: false
59
+ mlp_block_norm_type: rmsnorm
60
+ mlp_block_pre_norm: false
61
+ mlp_block_post_norm: false
62
+ transformer_block_bias: false
63
+ transformer_block_norm_type: rmsnorm
64
+ transformer_block_norm_first: true
65
+ transformer_block_dropout: 0.0
66
+ training:
67
+ generative_splits: 2
68
+ binarize_input: true
69
+ contrastive_threshold: 0.5
70
+ diffusion: false
71
+ reconstruct_mode: all
72
+ diffusion_params:
73
+ num_timesteps: 1000
74
+ loss_weights:
75
+ m_mae: 10.0
76
+ m_mae_unc: 1.0
77
+ betas_mae: 0.0
78
+ betas_kld: 0.0
79
+ betas_beta: 0.0
80
+ betas_wd: 1.0
81
+ contrastive: 1.0
82
+ sample_kld: 1.0
83
+ diffusion_mse: 0.0
84
+ condition_loss: 0.0
85
+ _target_: cpgpt.model.cpgpt_module.CpGPTLitModule
86
+ compile: true
87
+ callbacks:
88
+ model_checkpoint:
89
+ _target_: lightning.pytorch.callbacks.ModelCheckpoint
90
+ dirpath: ${paths.output_dir}/checkpoints
91
+ filename: ${tags[0]}
92
+ monitor: val/loss
93
+ verbose: false
94
+ save_last: true
95
+ save_top_k: 1
96
+ mode: min
97
+ auto_insert_metric_name: false
98
+ save_weights_only: false
99
+ every_n_train_steps: null
100
+ train_time_interval: null
101
+ every_n_epochs: null
102
+ save_on_train_epoch_end: null
103
+ model_summary:
104
+ _target_: lightning.pytorch.callbacks.RichModelSummary
105
+ max_depth: -1
106
+ rich_progress_bar:
107
+ _target_: lightning.pytorch.callbacks.RichProgressBar
108
+ logger:
109
+ wandb:
110
+ _target_: lightning.pytorch.loggers.wandb.WandbLogger
111
+ save_dir: ${paths.output_dir}
112
+ offline: false
113
+ id: null
114
+ anonymous: null
115
+ project: CpGPT
116
+ log_model: true
117
+ prefix: ''
118
+ entity: lucascamillo
119
+ group: ''
120
+ tags: ${tags}
121
+ job_type: ''
122
+ trainer:
123
+ _target_: lightning.pytorch.trainer.Trainer
124
+ default_root_dir: ${paths.output_dir}
125
+ min_steps: 1000
126
+ max_steps: 50000
127
+ accelerator: auto
128
+ devices: 1
129
+ precision: 16-mixed
130
+ val_check_interval: 1000
131
+ check_val_every_n_epoch: null
132
+ log_every_n_steps: 1
133
+ detect_anomaly: false
134
+ deterministic: false
135
+ accumulate_grad_batches: 1
136
+ paths:
137
+ root_dir: ${oc.env:PROJECT_ROOT}
138
+ data_dir: ${paths.root_dir}/data/
139
+ dependencies_dir: ${paths.root_dir}/dependencies/
140
+ log_dir: ${paths.root_dir}/logs/
141
+ output_dir: ${hydra:runtime.output_dir}
142
+ work_dir: ${hydra:runtime.cwd}
143
+ extras:
144
+ ignore_warnings: true
145
+ enforce_tags: true
146
+ print_config: true