mojtababahrami commited on
Commit
114303b
·
verified ·
1 Parent(s): b7de5be

Upload corpus40M-model30M/config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. corpus40M-model30M/config.yaml +89 -0
corpus40M-model30M/config.yaml ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: ContrastiveModel
3
+ dim_hid: 1024
4
+ dropout: 0.1
5
+ nlayers: 8
6
+ num_head: 8
7
+ training:
8
+ lr: 0.0001
9
+ min_lr: 0
10
+ warmup: 10000
11
+ devices: -1
12
+ max_steps: 4000000
13
+ num_nodes: 1
14
+ scheduler: warmup
15
+ accelerator: gpu
16
+ masking_rate: 0
17
+ weight_decay: 0
18
+ optimizer_class: AdamW
19
+ gradient_clip_val: 1
20
+ log_every_n_steps: 1000
21
+ val_check_interval: 100000
22
+ limit_train_batches: 1
23
+ resume_from_checkpoint: false
24
+ accumulate_grad_batches: 1
25
+ check_val_every_n_epoch: null
26
+ use_learnable_embs_freq: null
27
+ validate_before_training: true
28
+ freeze_pretrained_vocabulary: false
29
+ dim_model: 512
30
+ activation: gelu
31
+ pe_max_len: 5000
32
+ norm_scheme: pre
33
+ decoder_head: false
34
+ dim_gene_embs: 512
35
+ input_encoding: rank_encoding
36
+ projection_dim: null
37
+ flash_attention: true
38
+ mlm_loss_weight: 0
39
+ cont_loss_weight: 1
40
+ contrastive_loss: multiclass
41
+ loss_switch_step: 10000
42
+ dim_pretrained_vocab: 640
43
+ logit_scale_init_value: 3
44
+ values_only_sanity_check: false
45
+ data_loading_speed_sanity_check: false
46
+ profiler:
47
+ enabled: false
48
+ datamodule:
49
+ name: DataModule
50
+ dataset:
51
+ train:
52
+ max_tokens: 1000
53
+ qc_threshold: null
54
+ panel_overlap: false
55
+ panel_size_max: 60000
56
+ panel_size_min: 400
57
+ panel_selection: mixed
58
+ panel_filter_regex: .*
59
+ panel_max_drop_rate: 0.5
60
+ max_total_seq_length: null
61
+ feature_max_drop_rate: null
62
+ panel_selection_mixed_prob: 0.25
63
+ species:
64
+ - hsapiens
65
+ obs_keys:
66
+ - dataset
67
+ - donor_id
68
+ - tissue
69
+ - cell_type
70
+ dataloader:
71
+ train:
72
+ shuffle: true
73
+ drop_last: true
74
+ batch_size: 512
75
+ num_groups: null
76
+ pin_memory: true
77
+ num_samples: null
78
+ num_workers: 12
79
+ within_group_sampling: dataset
80
+ normalization: raw
81
+ precomp_embs_key: null
82
+ gene_sampling_strategy: top-nonzero
83
+ model_speed_sanity_check: false
84
+ initialize:
85
+ resume: false
86
+ run_id: null
87
+ checkpoint: null
88
+ create_new_run: false
89
+ fresh_training: false