mojtababahrami commited on
Commit
6a9f517
·
verified ·
1 Parent(s): 7276f21

Upload Corpus-30M/config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. Corpus-30M/config.yaml +0 -142
Corpus-30M/config.yaml CHANGED
@@ -38,81 +38,12 @@ model:
38
  per_view_normalization: false
39
  values_only_sanity_check: false
40
  data_loading_speed_sanity_check: false
41
- wandb:
42
- entity: theislab-transformer
43
- run_id: null
44
- enabled: true
45
- project: contrastive-transformer
46
- run_name: cont/PE/padding_mask_True/flash_True/PDR_0.5/B512/P400-P60000/mixed-0.25/panel-.*
47
  datamodule:
48
  name: GeneToken
49
  columns:
50
  - cell_type
51
  - donor_id
52
  dataset:
53
- val:
54
- same:
55
- max_tokens: 1000
56
- variable_size: false
57
- panel_size_max: 60000
58
- panel_size_min: 400
59
- panel_selection: mixed
60
- panel_filter_regex: .*
61
- panel_selection_mixed_prob: 0.25
62
- within_P200_B256:
63
- max_tokens: 200
64
- variable_size: false
65
- panel_size_max: 200
66
- panel_size_min: 200
67
- panel_selection: random
68
- within_P800_B256:
69
- max_tokens: 800
70
- variable_size: false
71
- panel_size_max: 800
72
- panel_size_min: 800
73
- panel_selection: random
74
- within_P6400_B256:
75
- max_tokens: 6400
76
- variable_size: false
77
- panel_size_max: 6400
78
- panel_size_min: 6400
79
- panel_selection: random
80
- within_P_SEAAD_B256:
81
- max_tokens: 800
82
- variable_size: false
83
- panel_size_max: 800
84
- panel_size_min: 800
85
- panel_selection: preselected
86
- panel_filter_regex: Merfish-SEA-AD
87
- within_Preselected_B256:
88
- max_tokens: 800
89
- variable_size: false
90
- panel_size_max: 800
91
- panel_size_min: 800
92
- panel_selection: preselected
93
- panel_filter_regex: .*
94
- within_P_Xenium_hIO_B256:
95
- max_tokens: 800
96
- variable_size: false
97
- panel_size_max: 800
98
- panel_size_min: 800
99
- panel_selection: preselected
100
- panel_filter_regex: Xenium_hIO_v1
101
- within_P_Xenium_hIO_vs_all_B256:
102
- max_tokens: 1000
103
- panel_overlap: true
104
- variable_size: false
105
- panel_size_max: 100000
106
- panel_size_min: 100000
107
- panel_selection: preselected
108
- panel_filter_regex: Xenium_hIO_v1
109
- within_P_Xenium_Human_Prime_5K_B256:
110
- max_tokens: 800
111
- variable_size: false
112
- panel_size_max: 800
113
- panel_size_min: 800
114
- panel_selection: preselected
115
- panel_filter_regex: Xenium_Human_Prime_5K
116
  train:
117
  max_tokens: 1000
118
  variable_size: false
@@ -125,79 +56,6 @@ datamodule:
125
  panel_max_drop_rate: 0.5
126
  panel_selection_mixed_prob: 0.25
127
  dataloader:
128
- val:
129
- same:
130
- shuffle: true
131
- drop_last: true
132
- batch_size: 512
133
- pin_memory: false
134
- num_samples: 50000
135
- num_workers: 2
136
- within_group_sampling: dataset
137
- within_P200_B256:
138
- shuffle: true
139
- drop_last: true
140
- batch_size: 256
141
- pin_memory: false
142
- num_samples: 50000
143
- num_workers: 2
144
- within_group_sampling: dataset
145
- within_P800_B256:
146
- shuffle: true
147
- drop_last: true
148
- batch_size: 256
149
- pin_memory: false
150
- num_samples: 50000
151
- num_workers: 2
152
- within_group_sampling: dataset
153
- within_P6400_B256:
154
- shuffle: true
155
- drop_last: true
156
- batch_size: 256
157
- pin_memory: false
158
- num_samples: 50000
159
- num_workers: 2
160
- within_group_sampling: dataset
161
- within_P_SEAAD_B256:
162
- shuffle: true
163
- drop_last: true
164
- batch_size: 256
165
- pin_memory: false
166
- num_samples: 50000
167
- num_workers: 2
168
- within_group_sampling: dataset
169
- within_Preselected_B256:
170
- shuffle: true
171
- drop_last: true
172
- batch_size: 256
173
- pin_memory: false
174
- num_samples: 50000
175
- num_workers: 2
176
- within_group_sampling: dataset
177
- within_P_Xenium_hIO_B256:
178
- shuffle: true
179
- drop_last: true
180
- batch_size: 256
181
- pin_memory: false
182
- num_samples: 50000
183
- num_workers: 2
184
- within_group_sampling: dataset
185
- within_P_Xenium_hIO_vs_all_B256:
186
- shuffle: true
187
- drop_last: true
188
- batch_size: 256
189
- pin_memory: false
190
- num_samples: 50000
191
- num_workers: 2
192
- within_group_sampling: dataset
193
- within_P_Xenium_Human_Prime_5K_B256:
194
- shuffle: true
195
- drop_last: true
196
- batch_size: 256
197
- pin_memory: false
198
- num_samples: 50000
199
- num_workers: 2
200
- within_group_sampling: dataset
201
  train:
202
  shuffle: true
203
  drop_last: true
 
38
  per_view_normalization: false
39
  values_only_sanity_check: false
40
  data_loading_speed_sanity_check: false
 
 
 
 
 
 
41
  datamodule:
42
  name: GeneToken
43
  columns:
44
  - cell_type
45
  - donor_id
46
  dataset:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  train:
48
  max_tokens: 1000
49
  variable_size: false
 
56
  panel_max_drop_rate: 0.5
57
  panel_selection_mixed_prob: 0.25
58
  dataloader:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  train:
60
  shuffle: true
61
  drop_last: true