README.md CHANGED
@@ -1,3 +1,27 @@
1
  ---
2
  license: mit
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: mit
3
+ library_name: transformers
4
+ pipeline_tag: fill-mask
5
+ tags:
6
+ - cheminformatics
7
+ - ChemBERTa-3
8
+ - masked-lm
9
+ - c3-MoLFormer
10
  ---
11
+
12
+ # MoLFormer-c3-1.1B
13
+
14
+ MoLFormer-c3-1.1B, as described in the Chemberta-3 paper [1] is pretrained on a combination of 100% ZINC20 (1B) and 100% Pubchem (100M)
15
+
16
+ ## Usage
17
+
18
+ ```python
19
+ from transformers import AutoTokenizer, AutoModelForMaskedLM
20
+
21
+ tokenizer = AutoTokenizer.from_pretrained("DeepChem/MoLFormer-c3-1.1B")
22
+ model = AutoModelForMaskedLM.from_pretrained("DeepChem/MoLFormer-c3-1.1B")
23
+ ```
24
+
25
+ ## Reference
26
+
27
+ 1. Singh R, Barsainyan AA, Irfan R, Amorin CJ, He S, Davis T, et al. ChemBERTa-3: An Open Source Training Framework for Chemical Foundation Models. ChemRxiv. 2025; doi:10.26434/chemrxiv-2025-4glrl-v2 This content is a preprint and has not been peer-reviewed.
config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_name_or_path": "ibm/MoLFormer-XL-both-10pct",
3
  "architectures": [
4
- "MolformerModel"
5
  ],
6
  "auto_map": {
7
  "AutoConfig": "ibm/MoLFormer-XL-both-10pct--configuration_molformer.MolformerConfig",
@@ -29,6 +29,6 @@
29
  "pad_token_id": 2,
30
  "tie_word_embeddings": false,
31
  "torch_dtype": "float32",
32
- "transformers_version": "4.46.3",
33
  "vocab_size": 2362
34
  }
 
1
  {
2
  "_name_or_path": "ibm/MoLFormer-XL-both-10pct",
3
  "architectures": [
4
+ "MolformerForMaskedLM"
5
  ],
6
  "auto_map": {
7
  "AutoConfig": "ibm/MoLFormer-XL-both-10pct--configuration_molformer.MolformerConfig",
 
29
  "pad_token_id": 2,
30
  "tie_word_embeddings": false,
31
  "torch_dtype": "float32",
32
+ "transformers_version": "4.39.3",
33
  "vocab_size": 2362
34
  }
checkpoint.pt → deepchem_ckpt.pt RENAMED
File without changes
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa8e2087152b342ecb17f602d7ce23105a1d1023fb97c3ff7376da110281d1e2
3
- size 177621600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98f9244242a9e4030922994985fc6fe5732e6c95737405194609d51349f61b74
3
+ size 187248784
tokenizer_config.json CHANGED
@@ -50,7 +50,7 @@
50
  "clean_up_tokenization_spaces": true,
51
  "cls_token": "<bos>",
52
  "mask_token": "<mask>",
53
- "model_max_length": 1000000000000000019884624838656,
54
  "pad_token": "<pad>",
55
  "sep_token": "<eos>",
56
  "tokenizer_class": "MolformerTokenizer",
 
50
  "clean_up_tokenization_spaces": true,
51
  "cls_token": "<bos>",
52
  "mask_token": "<mask>",
53
+ "model_max_length": 202,
54
  "pad_token": "<pad>",
55
  "sep_token": "<eos>",
56
  "tokenizer_class": "MolformerTokenizer",