evankomp
/

learn2therm

@@ -7,14 +7,6 @@ tags:
 __Purpose__: classifies protein sequence into Thermophilic (> 60C) or Mesophilic (<40C) by host organism growth temperature.
-__Training__:
-ProteinBERT (Rostlab/prot_bert) was fine tuned on a class balanced version of learn2therm (see [here]()), about 250k protein amino acid sequences.
-Training parameters below:
-TODO
-See the [training repository](https://github.com/BeckResearchLab/learn2thermML) for code.
 __Usage__:
 Prepare sequences identically to using the original pretrained model:
@@ -30,4 +22,118 @@ encoded_input = tokenizer(sequence_Example, return_tensors='pt')
 output = torch.argmax(model(**encoded_input), dim=1)
 ```
-1 indicates thermophilic, 0 mesophilic.

 __Purpose__: classifies protein sequence into Thermophilic (> 60C) or Mesophilic (<40C) by host organism growth temperature.
 __Usage__:
 Prepare sequences identically to using the original pretrained model:
 output = torch.argmax(model(**encoded_input), dim=1)
 ```
+1 indicates thermophilic, 0 mesophilic.
+__Training__:
+ProteinBERT (Rostlab/prot_bert) was fine tuned on a class balanced version of learn2therm (see [here]()), about 250k protein amino acid sequences.
+Training parameters below:
+TrainingArguments(
+_n_gpu=1,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_pin_memory=True,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+ddp_timeout=1800,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=25,
+eval_delay=0,
+eval_steps=6,
+evaluation_strategy=steps,
+fp16=True,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=25,
+gradient_checkpointing=True,
+greater_is_better=False,
+group_by_length=False,
+half_precision_backend=cuda_amp,
+hub_model_id=None,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=5e-05,
+length_column_name=length,
+load_best_model_at_end=True,
+local_rank=0,
+log_level=info,
+log_level_replica=passive,
+log_on_each_node=True,
+logging_dir=./data/ogt_protein_classifier/model/runs/Jun19_12-16-35_g3070,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=1,
+logging_strategy=steps,
+lr_scheduler_type=linear,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=loss,
+mp_parameters=,
+no_cuda=False,
+num_train_epochs=2,
+optim=adamw_hf,
+optim_args=None,
+output_dir=./data/ogt_protein_classifier/model,
+overwrite_output_dir=False,
+past_index=-1,
+per_device_eval_batch_size=32,
+per_device_train_batch_size=32,
+prediction_loss_only=False,
+push_to_hub=False,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard', 'codecarbon'],
+resume_from_checkpoint=None,
+run_name=./data/ogt_protein_classifier/model,
+save_on_each_node=False,
+save_steps=6,
+save_strategy=steps,
+save_total_limit=None,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tf32=None,
+torch_compile=False,
+torch_compile_backend=None,
+torch_compile_mode=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+use_mps_device=False,
+warmup_ratio=0.0,
+warmup_steps=0,
+weight_decay=0.0,
+xpu_backend=None,
+)
+See the [training repository](https://github.com/BeckResearchLab/learn2thermML) for code.