DeepChem
/

MoLFormer-c3-1.1B

cheminformatics

Model card Files Files and versions

updated-hf-ckpt

#6

by riya2801 - opened Jan 13

base: refs/heads/main

←

from: refs/pr/6

Discussion Files changed

Files changed (5) hide show

README.md +24 -0
config.json +2 -2
checkpoint.pt → deepchem_ckpt.pt +0 -0
model.safetensors +2 -2
tokenizer_config.json +1 -1

README.md CHANGED Viewed

@@ -1,3 +1,27 @@
 ---
 license: mit
 ---

 ---
 license: mit
+library_name: transformers
+pipeline_tag: fill-mask
+tags:
+- cheminformatics
+- ChemBERTa-3
+- masked-lm
+- c3-MoLFormer
 ---
+# MoLFormer-c3-1.1B
+MoLFormer-c3-1.1B, as described in the Chemberta-3 paper [1] is pretrained on a combination of 100% ZINC20 (1B) and 100% Pubchem (100M)
+## Usage
+```python
+from transformers import AutoTokenizer, AutoModelForMaskedLM
+tokenizer = AutoTokenizer.from_pretrained("DeepChem/MoLFormer-c3-1.1B")
+model = AutoModelForMaskedLM.from_pretrained("DeepChem/MoLFormer-c3-1.1B")
+```
+## Reference
+1. Singh R, Barsainyan AA, Irfan R, Amorin CJ, He S, Davis T, et al. ChemBERTa-3: An Open Source Training Framework for Chemical Foundation Models. ChemRxiv. 2025; doi:10.26434/chemrxiv-2025-4glrl-v2  This content is a preprint and has not been peer-reviewed.

config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "_name_or_path": "ibm/MoLFormer-XL-both-10pct",
   "architectures": [
-    "MolformerModel"
   ],
   "auto_map": {
     "AutoConfig": "ibm/MoLFormer-XL-both-10pct--configuration_molformer.MolformerConfig",
@@ -29,6 +29,6 @@
   "pad_token_id": 2,
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
-  "transformers_version": "4.46.3",
   "vocab_size": 2362
 }

 {
   "_name_or_path": "ibm/MoLFormer-XL-both-10pct",
   "architectures": [
+    "MolformerForMaskedLM"
   ],
   "auto_map": {
     "AutoConfig": "ibm/MoLFormer-XL-both-10pct--configuration_molformer.MolformerConfig",
   "pad_token_id": 2,
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
+  "transformers_version": "4.39.3",
   "vocab_size": 2362
 }

checkpoint.pt → deepchem_ckpt.pt RENAMED Viewed

File without changes

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa8e2087152b342ecb17f602d7ce23105a1d1023fb97c3ff7376da110281d1e2
-size 177621600

 version https://git-lfs.github.com/spec/v1
+oid sha256:98f9244242a9e4030922994985fc6fe5732e6c95737405194609d51349f61b74
+size 187248784

tokenizer_config.json CHANGED Viewed

@@ -50,7 +50,7 @@
   "clean_up_tokenization_spaces": true,
   "cls_token": "<bos>",
   "mask_token": "<mask>",
-  "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<pad>",
   "sep_token": "<eos>",
   "tokenizer_class": "MolformerTokenizer",

   "clean_up_tokenization_spaces": true,
   "cls_token": "<bos>",
   "mask_token": "<mask>",
+  "model_max_length": 202,
   "pad_token": "<pad>",
   "sep_token": "<eos>",
   "tokenizer_class": "MolformerTokenizer",