mojtababahrami commited on
Commit
45d8b03
·
verified ·
1 Parent(s): 347da14

Upload corpus360M[multi-species]-model170M/config.yaml with huggingface_hub

Browse files
corpus360M[multi-species]-model170M/config.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: ContrastiveModel
3
+ dim_hid: 2048
4
+ dropout: 0.1
5
+ nlayers: 16
6
+ num_head: 16
7
+ training:
8
+ lr: 0.0001
9
+ min_lr: 0
10
+ warmup: 20000
11
+ devices: -1
12
+ max_steps: 4000000
13
+ num_nodes: 1
14
+ scheduler: warmup
15
+ accelerator: gpu
16
+ masking_rate: 0
17
+ weight_decay: 0.01
18
+ optimizer_class: AdamW
19
+ train_vocab_only: false
20
+ gradient_clip_val: 1
21
+ log_every_n_steps: 1000
22
+ val_check_interval: 25000
23
+ limit_train_batches: 1
24
+ resume_from_checkpoint: false
25
+ accumulate_grad_batches: 1
26
+ check_val_every_n_epoch: null
27
+ use_learnable_embs_freq: 0.8
28
+ validate_before_training: false
29
+ freeze_pretrained_vocabulary: true
30
+ cls_value: -2
31
+ dim_model: 1024
32
+ activation: gelu
33
+ mask_value: -1
34
+ pe_max_len: 25000
35
+ norm_scheme: pre
36
+ decoder_head: false
37
+ dim_gene_embs: 1024
38
+ input_encoding: rank_encoding
39
+ projection_dim: null
40
+ flash_attention: true
41
+ mlm_loss_weight: 0
42
+ cont_loss_weight: 1
43
+ contrastive_loss: multiclass
44
+ loss_switch_step: 20000
45
+ dim_pretrained_vocab: 640
46
+ logit_scale_init_value: 3
47
+ values_only_sanity_check: false
48
+ data_loading_speed_sanity_check: false
49
+ profiler:
50
+ enabled: false
51
+ datamodule:
52
+ name: DataModule
53
+ dataset:
54
+ train:
55
+ max_tokens: 20000
56
+ qc_threshold: 1.0e-05
57
+ panel_overlap: false
58
+ panel_size_max: 60000
59
+ panel_size_min: 400
60
+ panel_selection: mixed
61
+ panel_filter_regex: .*
62
+ panel_max_drop_rate: 0.5
63
+ max_total_seq_length: 80000
64
+ feature_max_drop_rate: null
65
+ panel_selection_mixed_prob: 0.25
66
+ species:
67
+ - hsapiens
68
+ - mmusculus
69
+ - cjacchus
70
+ - mmulatta
71
+ - ptroglodytes
72
+ - btaurus
73
+ - celegans
74
+ - drerio
75
+ - dmelanogaster
76
+ - ecaballus
77
+ - ggallus
78
+ - ggorilla
79
+ - hgfemale
80
+ - ocuniculus
81
+ - oaries
82
+ - sscrofa
83
+ obs_keys:
84
+ - dataset
85
+ - donor_id
86
+ - tissue
87
+ - cell_type
88
+ dataloader:
89
+ train:
90
+ shuffle: true
91
+ drop_last: true
92
+ batch_size: 512
93
+ num_groups: null
94
+ pin_memory: true
95
+ num_samples: null
96
+ num_workers: 2
97
+ within_group_sampling: dataset
98
+ normalization: raw
99
+ precomp_embs_key: null
100
+ gene_sampling_strategy: top-nonzero
101
+ model_speed_sanity_check: false
102
+ initialize:
103
+ resume: true
104
+ run_id: s3z512hv
105
+ checkpoint: latest/last.ckpt
106
+ create_new_run: true