Fill-Mask
Transformers
Safetensors
modernbert
chemistry
molecules
selfies
ape-tokenizer
masked-language-modeling
Instructions to use HauserGroup/ModernMolBERT-small with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use HauserGroup/ModernMolBERT-small with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("fill-mask", model="HauserGroup/ModernMolBERT-small")# Load model directly from transformers import AutoTokenizer, AutoModelForMaskedLM tokenizer = AutoTokenizer.from_pretrained("HauserGroup/ModernMolBERT-small") model = AutoModelForMaskedLM.from_pretrained("HauserGroup/ModernMolBERT-small") - Notebooks
- Google Colab
- Kaggle
| { | |
| "dataset_name": "data/pretrain/chembl36_selfies", | |
| "selfies_column": "selfies", | |
| "train_split": "train", | |
| "validation_split": "valid", | |
| "use_validation_split": true, | |
| "representation": "SELFIES", | |
| "expected_input": "SELFIES strings only. Convert SMILES before inference using a helper such as smiles_to_selfies().", | |
| "tokenizer_vocab_path": "tokenizer/chembl36_selfies_2m_ape_max2_min3000.json", | |
| "tokenizer_metadata_path": "tokenizer/chembl36_selfies_2m_ape_max2_min3000.metadata.json", | |
| "backend": "cuda", | |
| "platform": "Linux-6.8.0-110-generic-x86_64-with-glibc2.35", | |
| "torch_version": "2.11.0+cu130", | |
| "transformers_version": "5.8.1", | |
| "vocab_size": 631, | |
| "special_ids": { | |
| "pad_token": 1, | |
| "bos_token": 0, | |
| "eos_token": 2, | |
| "unk_token": 3, | |
| "mask_token": 4 | |
| }, | |
| "num_parameters": 34149495, | |
| "tokenizer_stats": { | |
| "sample_size": 1000.0, | |
| "mean_len": 25.555, | |
| "p50_len": 25.0, | |
| "p95_len": 41.0, | |
| "p99_len": 51.0, | |
| "max_len": 62.0, | |
| "truncation_rate": 0.0, | |
| "unk_rate": 0.0, | |
| "empty_sequence_rate": 0.0, | |
| "mostly_unknown_rate": 0.0 | |
| }, | |
| "final_eval_metrics": { | |
| "eval_loss": 0.3744058609008789, | |
| "eval_masked_accuracy": 0.8805643239064613, | |
| "eval_runtime": 2.8656, | |
| "eval_samples_per_second": 1429.379, | |
| "eval_steps_per_second": 5.584, | |
| "epoch": 3.066, | |
| "eval_perplexity": 1.454127204085947 | |
| }, | |
| "trainer_state_summary": { | |
| "best_global_step": 30000, | |
| "best_metric": 0.37694016098976135, | |
| "best_model_checkpoint": "runs/chembl36_small_mask_mlm_lr_sweep/mask_standard__mlm_0p15__lr_4e-4/checkpoint-30000", | |
| "global_step": 30000 | |
| }, | |
| "args": { | |
| "output_dir": "runs/chembl36_small_mask_mlm_lr_sweep/mask_standard__mlm_0p15__lr_4e-4", | |
| "tokenizer_vocab_path": "tokenizer/chembl36_selfies_2m_ape_max2_min3000.json", | |
| "tokenizer_metadata_path": "tokenizer/chembl36_selfies_2m_ape_max2_min3000.metadata.json", | |
| "dataset_name": "data/pretrain/chembl36_selfies", | |
| "selfies_column": "selfies", | |
| "train_split": "train", | |
| "validation_split": "valid", | |
| "use_validation_split": true, | |
| "data_dir": null, | |
| "data_files": null, | |
| "eval_size": 4096, | |
| "shuffle_buffer_size": 100000, | |
| "seed": 42, | |
| "val_split_mod": 100, | |
| "val_split_bucket": 0, | |
| "tokenizer_validation_samples": 1000, | |
| "unk_rate_threshold": 0.001, | |
| "truncation_warn_threshold": 0.05, | |
| "model_size": "small", | |
| "max_seq_length": 128, | |
| "mlm_probability": 0.15, | |
| "masking_strategy": "standard", | |
| "span_p_geom": 0.4, | |
| "span_max_length": 6, | |
| "heteroatom_start_weight": 2.0, | |
| "max_steps": 30000, | |
| "per_device_train_batch_size": 256, | |
| "per_device_eval_batch_size": 256, | |
| "gradient_accumulation_steps": 1, | |
| "learning_rate": 0.0004, | |
| "weight_decay": 0.01, | |
| "warmup_steps": 1500, | |
| "max_grad_norm": 1.0, | |
| "load_best_model_at_end": true, | |
| "metric_for_best_model": "eval_loss", | |
| "greater_is_better": false, | |
| "logging_steps": 100, | |
| "eval_steps": 5000, | |
| "save_steps": 5000, | |
| "save_total_limit": 2, | |
| "device_backend": "cuda", | |
| "bf16": true, | |
| "fp16": false, | |
| "num_workers": 4, | |
| "max_eval_batches": 16, | |
| "report_to": "tensorboard", | |
| "compute_masked_accuracy": true, | |
| "debug": false, | |
| "hf_login": false | |
| } | |
| } |