Fix module paths: src.* -> profam.* (.hydra/gym_config.yaml)
Browse files- .hydra/gym_config.yaml +16 -16
.hydra/gym_config.yaml
CHANGED
|
@@ -8,7 +8,7 @@ ckpt_path: null
|
|
| 8 |
seed: 12345
|
| 9 |
float32_matmul_precision: high
|
| 10 |
model:
|
| 11 |
-
_target_:
|
| 12 |
scheduler_name: constant_with_warmup
|
| 13 |
num_warmup_steps: 200
|
| 14 |
num_training_steps: 1000000
|
|
@@ -49,7 +49,7 @@ model:
|
|
| 49 |
rope_type: llama3
|
| 50 |
callbacks:
|
| 51 |
throughput:
|
| 52 |
-
_target_:
|
| 53 |
model_checkpoint:
|
| 54 |
_target_: lightning.pytorch.callbacks.ModelCheckpoint
|
| 55 |
dirpath: ${paths.output_dir}/checkpoints
|
|
@@ -71,15 +71,15 @@ callbacks:
|
|
| 71 |
rich_progress_bar:
|
| 72 |
_target_: lightning.pytorch.callbacks.RichProgressBar
|
| 73 |
timer:
|
| 74 |
-
_target_:
|
| 75 |
print:
|
| 76 |
-
_target_:
|
| 77 |
sample_counter:
|
| 78 |
-
_target_:
|
| 79 |
logger: null
|
| 80 |
|
| 81 |
trainer:
|
| 82 |
-
_target_:
|
| 83 |
default_root_dir: ${paths.output_dir}
|
| 84 |
max_epochs: 10000
|
| 85 |
max_steps: -1
|
|
@@ -130,7 +130,7 @@ extras:
|
|
| 130 |
enforce_tags: true
|
| 131 |
print_config: true
|
| 132 |
tokenizer:
|
| 133 |
-
_target_:
|
| 134 |
tokenizer_file: data/profam_tokenizer.json
|
| 135 |
unk_token: '[UNK]'
|
| 136 |
pad_token: '[PAD]'
|
|
@@ -181,7 +181,7 @@ extra_callbacks:
|
|
| 181 |
prompt_builder:
|
| 182 |
preprocessor:
|
| 183 |
cfg:
|
| 184 |
-
_target_:
|
| 185 |
document_token: '[RAW]'
|
| 186 |
drop_first_protein: false
|
| 187 |
keep_first_protein: false
|
|
@@ -190,27 +190,27 @@ extra_callbacks:
|
|
| 190 |
shuffle_proteins_in_document: true
|
| 191 |
padding: do_not_pad
|
| 192 |
transform_fns:
|
| 193 |
-
- _target_:
|
| 194 |
_partial_: true
|
| 195 |
fill_value: 0.0
|
| 196 |
-
_target_:
|
| 197 |
-
_target_:
|
| 198 |
-
_target_:
|
| 199 |
pipeline:
|
| 200 |
-
_target_:
|
| 201 |
num_generations: 5
|
| 202 |
max_tokens: 20000
|
| 203 |
max_generated_length: 300
|
| 204 |
pipeline_id: unconditional_sampling
|
| 205 |
save_results_to_file: false
|
| 206 |
evaluators:
|
| 207 |
-
_target_:
|
| 208 |
name: esmfold_example
|
| 209 |
data:
|
| 210 |
-
_target_:
|
| 211 |
dataset_builders:
|
| 212 |
proteingym:
|
| 213 |
-
_target_:
|
| 214 |
name: proteingym
|
| 215 |
dms_ids: ${constants.gym_val_assay_list}
|
| 216 |
seed: 42
|
|
|
|
| 8 |
seed: 12345
|
| 9 |
float32_matmul_precision: high
|
| 10 |
model:
|
| 11 |
+
_target_: profam.models.llama.LlamaLitModule
|
| 12 |
scheduler_name: constant_with_warmup
|
| 13 |
num_warmup_steps: 200
|
| 14 |
num_training_steps: 1000000
|
|
|
|
| 49 |
rope_type: llama3
|
| 50 |
callbacks:
|
| 51 |
throughput:
|
| 52 |
+
_target_: profam.utils.callbacks.TokenThroughputMonitor
|
| 53 |
model_checkpoint:
|
| 54 |
_target_: lightning.pytorch.callbacks.ModelCheckpoint
|
| 55 |
dirpath: ${paths.output_dir}/checkpoints
|
|
|
|
| 71 |
rich_progress_bar:
|
| 72 |
_target_: lightning.pytorch.callbacks.RichProgressBar
|
| 73 |
timer:
|
| 74 |
+
_target_: profam.utils.callbacks.EpochTimerCallback
|
| 75 |
print:
|
| 76 |
+
_target_: profam.utils.callbacks.PrintCallback
|
| 77 |
sample_counter:
|
| 78 |
+
_target_: profam.utils.callbacks.SampleCounter
|
| 79 |
logger: null
|
| 80 |
|
| 81 |
trainer:
|
| 82 |
+
_target_: profam.utils.trainer.ProFamTrainer
|
| 83 |
default_root_dir: ${paths.output_dir}
|
| 84 |
max_epochs: 10000
|
| 85 |
max_steps: -1
|
|
|
|
| 130 |
enforce_tags: true
|
| 131 |
print_config: true
|
| 132 |
tokenizer:
|
| 133 |
+
_target_: profam.data.tokenizers.ProFamTokenizer
|
| 134 |
tokenizer_file: data/profam_tokenizer.json
|
| 135 |
unk_token: '[UNK]'
|
| 136 |
pad_token: '[PAD]'
|
|
|
|
| 181 |
prompt_builder:
|
| 182 |
preprocessor:
|
| 183 |
cfg:
|
| 184 |
+
_target_: profam.data.processors.PreprocessingConfig
|
| 185 |
document_token: '[RAW]'
|
| 186 |
drop_first_protein: false
|
| 187 |
keep_first_protein: false
|
|
|
|
| 190 |
shuffle_proteins_in_document: true
|
| 191 |
padding: do_not_pad
|
| 192 |
transform_fns:
|
| 193 |
+
- _target_: profam.data.processors.transforms.replace_nans_in_coords
|
| 194 |
_partial_: true
|
| 195 |
fill_value: 0.0
|
| 196 |
+
_target_: profam.data.processors.ProteinDocumentPreprocessor
|
| 197 |
+
_target_: profam.models.inference.PromptBuilder
|
| 198 |
+
_target_: profam.pipelines.callback.SamplingEvaluationPipelineCallback
|
| 199 |
pipeline:
|
| 200 |
+
_target_: profam.pipelines.unconditional_sequence.UnconditionalSequenceEvaluationPipeline
|
| 201 |
num_generations: 5
|
| 202 |
max_tokens: 20000
|
| 203 |
max_generated_length: 300
|
| 204 |
pipeline_id: unconditional_sampling
|
| 205 |
save_results_to_file: false
|
| 206 |
evaluators:
|
| 207 |
+
_target_: profam.evaluators.esmfold.ESMFoldSamplingEvaluator
|
| 208 |
name: esmfold_example
|
| 209 |
data:
|
| 210 |
+
_target_: profam.data.datamodule.ProteinDataMixture
|
| 211 |
dataset_builders:
|
| 212 |
proteingym:
|
| 213 |
+
_target_: profam.data.builders.proteingym.ProteinGymDataset
|
| 214 |
name: proteingym
|
| 215 |
dms_ids: ${constants.gym_val_assay_list}
|
| 216 |
seed: 42
|