{ "dataset_name": "data/pretrain/chembl36_selfies", "selfies_column": "selfies", "train_split": "train", "validation_split": "valid", "use_validation_split": true, "representation": "SELFIES", "expected_input": "SELFIES strings only. Convert SMILES before inference using a helper such as smiles_to_selfies().", "tokenizer_vocab_path": "tokenizer/chembl36_selfies_2m_ape_max2_min3000.json", "tokenizer_metadata_path": "tokenizer/chembl36_selfies_2m_ape_max2_min3000.metadata.json", "backend": "cuda", "platform": "Linux-6.8.0-110-generic-x86_64-with-glibc2.35", "torch_version": "2.11.0+cu130", "transformers_version": "5.8.1", "vocab_size": 631, "special_ids": { "pad_token": 1, "bos_token": 0, "eos_token": 2, "unk_token": 3, "mask_token": 4 }, "num_parameters": 34149495, "tokenizer_stats": { "sample_size": 1000.0, "mean_len": 25.555, "p50_len": 25.0, "p95_len": 41.0, "p99_len": 51.0, "max_len": 62.0, "truncation_rate": 0.0, "unk_rate": 0.0, "empty_sequence_rate": 0.0, "mostly_unknown_rate": 0.0 }, "final_eval_metrics": { "eval_loss": 0.3744058609008789, "eval_masked_accuracy": 0.8805643239064613, "eval_runtime": 2.8656, "eval_samples_per_second": 1429.379, "eval_steps_per_second": 5.584, "epoch": 3.066, "eval_perplexity": 1.454127204085947 }, "trainer_state_summary": { "best_global_step": 30000, "best_metric": 0.37694016098976135, "best_model_checkpoint": "runs/chembl36_small_mask_mlm_lr_sweep/mask_standard__mlm_0p15__lr_4e-4/checkpoint-30000", "global_step": 30000 }, "args": { "output_dir": "runs/chembl36_small_mask_mlm_lr_sweep/mask_standard__mlm_0p15__lr_4e-4", "tokenizer_vocab_path": "tokenizer/chembl36_selfies_2m_ape_max2_min3000.json", "tokenizer_metadata_path": "tokenizer/chembl36_selfies_2m_ape_max2_min3000.metadata.json", "dataset_name": "data/pretrain/chembl36_selfies", "selfies_column": "selfies", "train_split": "train", "validation_split": "valid", "use_validation_split": true, "data_dir": null, "data_files": null, "eval_size": 4096, "shuffle_buffer_size": 100000, "seed": 42, "val_split_mod": 100, "val_split_bucket": 0, "tokenizer_validation_samples": 1000, "unk_rate_threshold": 0.001, "truncation_warn_threshold": 0.05, "model_size": "small", "max_seq_length": 128, "mlm_probability": 0.15, "masking_strategy": "standard", "span_p_geom": 0.4, "span_max_length": 6, "heteroatom_start_weight": 2.0, "max_steps": 30000, "per_device_train_batch_size": 256, "per_device_eval_batch_size": 256, "gradient_accumulation_steps": 1, "learning_rate": 0.0004, "weight_decay": 0.01, "warmup_steps": 1500, "max_grad_norm": 1.0, "load_best_model_at_end": true, "metric_for_best_model": "eval_loss", "greater_is_better": false, "logging_steps": 100, "eval_steps": 5000, "save_steps": 5000, "save_total_limit": 2, "device_backend": "cuda", "bf16": true, "fp16": false, "num_workers": 4, "max_eval_batches": 16, "report_to": "tensorboard", "compute_masked_accuracy": true, "debug": false, "hf_login": false } }