judewells commited on
Commit
fe4084f
·
verified ·
1 Parent(s): 2cc62d2

Fix module paths: src.* -> profam.* (.hydra/gym_config.yaml)

Browse files
Files changed (1) hide show
  1. .hydra/gym_config.yaml +16 -16
.hydra/gym_config.yaml CHANGED
@@ -8,7 +8,7 @@ ckpt_path: null
8
  seed: 12345
9
  float32_matmul_precision: high
10
  model:
11
- _target_: src.models.llama.LlamaLitModule
12
  scheduler_name: constant_with_warmup
13
  num_warmup_steps: 200
14
  num_training_steps: 1000000
@@ -49,7 +49,7 @@ model:
49
  rope_type: llama3
50
  callbacks:
51
  throughput:
52
- _target_: src.utils.callbacks.TokenThroughputMonitor
53
  model_checkpoint:
54
  _target_: lightning.pytorch.callbacks.ModelCheckpoint
55
  dirpath: ${paths.output_dir}/checkpoints
@@ -71,15 +71,15 @@ callbacks:
71
  rich_progress_bar:
72
  _target_: lightning.pytorch.callbacks.RichProgressBar
73
  timer:
74
- _target_: src.utils.callbacks.EpochTimerCallback
75
  print:
76
- _target_: src.utils.callbacks.PrintCallback
77
  sample_counter:
78
- _target_: src.utils.callbacks.SampleCounter
79
  logger: null
80
 
81
  trainer:
82
- _target_: src.utils.trainer.ProFamTrainer
83
  default_root_dir: ${paths.output_dir}
84
  max_epochs: 10000
85
  max_steps: -1
@@ -130,7 +130,7 @@ extras:
130
  enforce_tags: true
131
  print_config: true
132
  tokenizer:
133
- _target_: src.data.tokenizers.ProFamTokenizer
134
  tokenizer_file: data/profam_tokenizer.json
135
  unk_token: '[UNK]'
136
  pad_token: '[PAD]'
@@ -181,7 +181,7 @@ extra_callbacks:
181
  prompt_builder:
182
  preprocessor:
183
  cfg:
184
- _target_: src.data.processors.PreprocessingConfig
185
  document_token: '[RAW]'
186
  drop_first_protein: false
187
  keep_first_protein: false
@@ -190,27 +190,27 @@ extra_callbacks:
190
  shuffle_proteins_in_document: true
191
  padding: do_not_pad
192
  transform_fns:
193
- - _target_: src.data.processors.transforms.replace_nans_in_coords
194
  _partial_: true
195
  fill_value: 0.0
196
- _target_: src.data.processors.ProteinDocumentPreprocessor
197
- _target_: src.models.inference.PromptBuilder
198
- _target_: src.pipelines.callback.SamplingEvaluationPipelineCallback
199
  pipeline:
200
- _target_: src.pipelines.unconditional_sequence.UnconditionalSequenceEvaluationPipeline
201
  num_generations: 5
202
  max_tokens: 20000
203
  max_generated_length: 300
204
  pipeline_id: unconditional_sampling
205
  save_results_to_file: false
206
  evaluators:
207
- _target_: src.evaluators.esmfold.ESMFoldSamplingEvaluator
208
  name: esmfold_example
209
  data:
210
- _target_: src.data.datamodule.ProteinDataMixture
211
  dataset_builders:
212
  proteingym:
213
- _target_: src.data.builders.proteingym.ProteinGymDataset
214
  name: proteingym
215
  dms_ids: ${constants.gym_val_assay_list}
216
  seed: 42
 
8
  seed: 12345
9
  float32_matmul_precision: high
10
  model:
11
+ _target_: profam.models.llama.LlamaLitModule
12
  scheduler_name: constant_with_warmup
13
  num_warmup_steps: 200
14
  num_training_steps: 1000000
 
49
  rope_type: llama3
50
  callbacks:
51
  throughput:
52
+ _target_: profam.utils.callbacks.TokenThroughputMonitor
53
  model_checkpoint:
54
  _target_: lightning.pytorch.callbacks.ModelCheckpoint
55
  dirpath: ${paths.output_dir}/checkpoints
 
71
  rich_progress_bar:
72
  _target_: lightning.pytorch.callbacks.RichProgressBar
73
  timer:
74
+ _target_: profam.utils.callbacks.EpochTimerCallback
75
  print:
76
+ _target_: profam.utils.callbacks.PrintCallback
77
  sample_counter:
78
+ _target_: profam.utils.callbacks.SampleCounter
79
  logger: null
80
 
81
  trainer:
82
+ _target_: profam.utils.trainer.ProFamTrainer
83
  default_root_dir: ${paths.output_dir}
84
  max_epochs: 10000
85
  max_steps: -1
 
130
  enforce_tags: true
131
  print_config: true
132
  tokenizer:
133
+ _target_: profam.data.tokenizers.ProFamTokenizer
134
  tokenizer_file: data/profam_tokenizer.json
135
  unk_token: '[UNK]'
136
  pad_token: '[PAD]'
 
181
  prompt_builder:
182
  preprocessor:
183
  cfg:
184
+ _target_: profam.data.processors.PreprocessingConfig
185
  document_token: '[RAW]'
186
  drop_first_protein: false
187
  keep_first_protein: false
 
190
  shuffle_proteins_in_document: true
191
  padding: do_not_pad
192
  transform_fns:
193
+ - _target_: profam.data.processors.transforms.replace_nans_in_coords
194
  _partial_: true
195
  fill_value: 0.0
196
+ _target_: profam.data.processors.ProteinDocumentPreprocessor
197
+ _target_: profam.models.inference.PromptBuilder
198
+ _target_: profam.pipelines.callback.SamplingEvaluationPipelineCallback
199
  pipeline:
200
+ _target_: profam.pipelines.unconditional_sequence.UnconditionalSequenceEvaluationPipeline
201
  num_generations: 5
202
  max_tokens: 20000
203
  max_generated_length: 300
204
  pipeline_id: unconditional_sampling
205
  save_results_to_file: false
206
  evaluators:
207
+ _target_: profam.evaluators.esmfold.ESMFoldSamplingEvaluator
208
  name: esmfold_example
209
  data:
210
+ _target_: profam.data.datamodule.ProteinDataMixture
211
  dataset_builders:
212
  proteingym:
213
+ _target_: profam.data.builders.proteingym.ProteinGymDataset
214
  name: proteingym
215
  dms_ids: ${constants.gym_val_assay_list}
216
  seed: 42