ModernMolBERT-small / ape_tokenizer_metadata.json
jsture's picture
Upload trained ModernMolBERT checkpoint
d734b00 verified
Raw
History Blame Contribute Delete
3.28 kB
{
"dataset_name": "data/pretrain/chembl36_selfies",
"selfies_column": "selfies",
"train_split": "train",
"validation_split": "valid",
"use_validation_split": true,
"representation": "SELFIES",
"expected_input": "SELFIES strings only. Convert SMILES before inference using a helper such as smiles_to_selfies().",
"tokenizer_vocab_path": "tokenizer/chembl36_selfies_2m_ape_max2_min3000.json",
"tokenizer_metadata_path": "tokenizer/chembl36_selfies_2m_ape_max2_min3000.metadata.json",
"backend": "cuda",
"platform": "Linux-6.8.0-110-generic-x86_64-with-glibc2.35",
"torch_version": "2.11.0+cu130",
"transformers_version": "5.8.1",
"vocab_size": 631,
"special_ids": {
"pad_token": 1,
"bos_token": 0,
"eos_token": 2,
"unk_token": 3,
"mask_token": 4
},
"num_parameters": 34149495,
"tokenizer_stats": {
"sample_size": 1000.0,
"mean_len": 25.555,
"p50_len": 25.0,
"p95_len": 41.0,
"p99_len": 51.0,
"max_len": 62.0,
"truncation_rate": 0.0,
"unk_rate": 0.0,
"empty_sequence_rate": 0.0,
"mostly_unknown_rate": 0.0
},
"final_eval_metrics": {
"eval_loss": 0.3744058609008789,
"eval_masked_accuracy": 0.8805643239064613,
"eval_runtime": 2.8656,
"eval_samples_per_second": 1429.379,
"eval_steps_per_second": 5.584,
"epoch": 3.066,
"eval_perplexity": 1.454127204085947
},
"trainer_state_summary": {
"best_global_step": 30000,
"best_metric": 0.37694016098976135,
"best_model_checkpoint": "runs/chembl36_small_mask_mlm_lr_sweep/mask_standard__mlm_0p15__lr_4e-4/checkpoint-30000",
"global_step": 30000
},
"args": {
"output_dir": "runs/chembl36_small_mask_mlm_lr_sweep/mask_standard__mlm_0p15__lr_4e-4",
"tokenizer_vocab_path": "tokenizer/chembl36_selfies_2m_ape_max2_min3000.json",
"tokenizer_metadata_path": "tokenizer/chembl36_selfies_2m_ape_max2_min3000.metadata.json",
"dataset_name": "data/pretrain/chembl36_selfies",
"selfies_column": "selfies",
"train_split": "train",
"validation_split": "valid",
"use_validation_split": true,
"data_dir": null,
"data_files": null,
"eval_size": 4096,
"shuffle_buffer_size": 100000,
"seed": 42,
"val_split_mod": 100,
"val_split_bucket": 0,
"tokenizer_validation_samples": 1000,
"unk_rate_threshold": 0.001,
"truncation_warn_threshold": 0.05,
"model_size": "small",
"max_seq_length": 128,
"mlm_probability": 0.15,
"masking_strategy": "standard",
"span_p_geom": 0.4,
"span_max_length": 6,
"heteroatom_start_weight": 2.0,
"max_steps": 30000,
"per_device_train_batch_size": 256,
"per_device_eval_batch_size": 256,
"gradient_accumulation_steps": 1,
"learning_rate": 0.0004,
"weight_decay": 0.01,
"warmup_steps": 1500,
"max_grad_norm": 1.0,
"load_best_model_at_end": true,
"metric_for_best_model": "eval_loss",
"greater_is_better": false,
"logging_steps": 100,
"eval_steps": 5000,
"save_steps": 5000,
"save_total_limit": 2,
"device_backend": "cuda",
"bf16": true,
"fp16": false,
"num_workers": 4,
"max_eval_batches": 16,
"report_to": "tensorboard",
"compute_masked_accuracy": true,
"debug": false,
"hf_login": false
}
}