| { |
| "version": "1.0", |
| "truncation": { |
| "direction": "Right", |
| "max_length": 2048, |
| "strategy": "LongestFirst", |
| "stride": 0 |
| }, |
| "padding": null, |
| "added_tokens": [ |
| { |
| "id": 0, |
| "content": "<pad>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 1, |
| "content": "</s>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 2, |
| "content": "<unk>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 53, |
| "content": "<extra_id_0>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 54, |
| "content": "<extra_id_1>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 55, |
| "content": "<extra_id_2>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 56, |
| "content": "<extra_id_3>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 57, |
| "content": "<extra_id_4>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 58, |
| "content": "<extra_id_5>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 59, |
| "content": "<extra_id_6>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 60, |
| "content": "<extra_id_7>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 61, |
| "content": "<extra_id_8>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 62, |
| "content": "<extra_id_9>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 63, |
| "content": "<extra_id_10>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 64, |
| "content": "<extra_id_11>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 65, |
| "content": "<extra_id_12>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 66, |
| "content": "<extra_id_13>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 67, |
| "content": "<extra_id_14>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 68, |
| "content": "<extra_id_15>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 69, |
| "content": "<extra_id_16>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 70, |
| "content": "<extra_id_17>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 71, |
| "content": "<extra_id_18>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 72, |
| "content": "<extra_id_19>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 73, |
| "content": "<extra_id_20>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 74, |
| "content": "<extra_id_21>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 75, |
| "content": "<extra_id_22>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 76, |
| "content": "<extra_id_23>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 77, |
| "content": "<extra_id_24>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 78, |
| "content": "<extra_id_25>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 79, |
| "content": "<extra_id_26>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 80, |
| "content": "<extra_id_27>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 81, |
| "content": "<extra_id_28>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 82, |
| "content": "<extra_id_29>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 83, |
| "content": "<extra_id_30>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 84, |
| "content": "<extra_id_31>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 85, |
| "content": "<extra_id_32>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 86, |
| "content": "<extra_id_33>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 87, |
| "content": "<extra_id_34>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 88, |
| "content": "<extra_id_35>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 89, |
| "content": "<extra_id_36>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 90, |
| "content": "<extra_id_37>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 91, |
| "content": "<extra_id_38>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 92, |
| "content": "<extra_id_39>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 93, |
| "content": "<extra_id_40>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 94, |
| "content": "<extra_id_41>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 95, |
| "content": "<extra_id_42>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 96, |
| "content": "<extra_id_43>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 97, |
| "content": "<extra_id_44>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 98, |
| "content": "<extra_id_45>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 99, |
| "content": "<extra_id_46>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 100, |
| "content": "<extra_id_47>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 101, |
| "content": "<extra_id_48>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 102, |
| "content": "<extra_id_49>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 103, |
| "content": "<extra_id_50>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 104, |
| "content": "<extra_id_51>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 105, |
| "content": "<extra_id_52>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 106, |
| "content": "<extra_id_53>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 107, |
| "content": "<extra_id_54>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 108, |
| "content": "<extra_id_55>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 109, |
| "content": "<extra_id_56>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 110, |
| "content": "<extra_id_57>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 111, |
| "content": "<extra_id_58>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 112, |
| "content": "<extra_id_59>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 113, |
| "content": "<extra_id_60>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 114, |
| "content": "<extra_id_61>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 115, |
| "content": "<extra_id_62>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 116, |
| "content": "<extra_id_63>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 117, |
| "content": "<extra_id_64>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 118, |
| "content": "<extra_id_65>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 119, |
| "content": "<extra_id_66>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 120, |
| "content": "<extra_id_67>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 121, |
| "content": "<extra_id_68>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 122, |
| "content": "<extra_id_69>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 123, |
| "content": "<extra_id_70>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 124, |
| "content": "<extra_id_71>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 125, |
| "content": "<extra_id_72>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 126, |
| "content": "<extra_id_73>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 127, |
| "content": "<extra_id_74>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 128, |
| "content": "<extra_id_75>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 129, |
| "content": "<extra_id_76>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 130, |
| "content": "<extra_id_77>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 131, |
| "content": "<extra_id_78>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 132, |
| "content": "<extra_id_79>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 133, |
| "content": "<extra_id_80>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 134, |
| "content": "<extra_id_81>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 135, |
| "content": "<extra_id_82>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 136, |
| "content": "<extra_id_83>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 137, |
| "content": "<extra_id_84>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 138, |
| "content": "<extra_id_85>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 139, |
| "content": "<extra_id_86>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 140, |
| "content": "<extra_id_87>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 141, |
| "content": "<extra_id_88>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 142, |
| "content": "<extra_id_89>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 143, |
| "content": "<extra_id_90>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 144, |
| "content": "<extra_id_91>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 145, |
| "content": "<extra_id_92>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 146, |
| "content": "<extra_id_93>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 147, |
| "content": "<extra_id_94>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 148, |
| "content": "<extra_id_95>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 149, |
| "content": "<extra_id_96>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 150, |
| "content": "<extra_id_97>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 151, |
| "content": "<extra_id_98>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 152, |
| "content": "<extra_id_99>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 153, |
| "content": "fr_C_O", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 154, |
| "content": "fr_C_O_noCOO", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 155, |
| "content": "fr_Al_OH", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 156, |
| "content": "fr_Ar_OH", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 157, |
| "content": "fr_methoxy", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 158, |
| "content": "fr_oxime", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 159, |
| "content": "fr_ester", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 160, |
| "content": "fr_Al_COO", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 161, |
| "content": "fr_Ar_COO", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 162, |
| "content": "fr_COO", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 163, |
| "content": "fr_COO2", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 164, |
| "content": "fr_ketone", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 165, |
| "content": "fr_ether", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 166, |
| "content": "fr_phenol", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 167, |
| "content": "fr_aldehyde", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 168, |
| "content": "fr_quatN", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 169, |
| "content": "fr_NH2", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 170, |
| "content": "fr_NH1", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 171, |
| "content": "fr_NH0", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 172, |
| "content": "fr_Ar_N", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 173, |
| "content": "fr_Ar_NH", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 174, |
| "content": "fr_aniline", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 175, |
| "content": "fr_Imine", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 176, |
| "content": "fr_nitrile", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 177, |
| "content": "fr_hdrzine", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 178, |
| "content": "fr_hdrzone", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 179, |
| "content": "fr_nitroso", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 180, |
| "content": "fr_N_O", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 181, |
| "content": "fr_nitro", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 182, |
| "content": "fr_azo", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 183, |
| "content": "fr_diazo", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 184, |
| "content": "fr_azide", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 185, |
| "content": "fr_amide", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 186, |
| "content": "fr_priamide", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 187, |
| "content": "fr_amidine", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 188, |
| "content": "fr_guanido", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 189, |
| "content": "fr_Nhpyrrole", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 190, |
| "content": "fr_imide", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 191, |
| "content": "fr_isocyan", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 192, |
| "content": "fr_isothiocyan", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 193, |
| "content": "fr_thiocyan", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 194, |
| "content": "fr_halogen", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 195, |
| "content": "fr_alkyl_halide", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 196, |
| "content": "fr_sulfide", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 197, |
| "content": "fr_SH", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 198, |
| "content": "fr_C_S", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 199, |
| "content": "fr_sulfone", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 200, |
| "content": "fr_sulfonamd", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 201, |
| "content": "fr_prisulfonamd", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 202, |
| "content": "fr_barbitur", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 203, |
| "content": "fr_urea", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 204, |
| "content": "fr_term_acetylene", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 205, |
| "content": "fr_imidazole", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 206, |
| "content": "fr_furan", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 207, |
| "content": "fr_thiophene", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 208, |
| "content": "fr_thiazole", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 209, |
| "content": "fr_oxazole", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 210, |
| "content": "fr_pyridine", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 211, |
| "content": "fr_piperdine", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 212, |
| "content": "fr_piperzine", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 213, |
| "content": "fr_morpholine", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 214, |
| "content": "fr_lactam", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 215, |
| "content": "fr_lactone", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 216, |
| "content": "fr_tetrazole", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 217, |
| "content": "fr_epoxide", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 218, |
| "content": "fr_unbrch_alkane", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 219, |
| "content": "fr_bicyclic", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 220, |
| "content": "fr_benzene", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 221, |
| "content": "fr_phos_acid", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 222, |
| "content": "fr_phos_ester", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 223, |
| "content": "fr_nitro_arom", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 224, |
| "content": "fr_nitro_arom_nonortho", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 225, |
| "content": "fr_dihydropyridine", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 226, |
| "content": "fr_phenol_noOrthoHbond", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 227, |
| "content": "fr_Al_OH_noTert", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 228, |
| "content": "fr_benzodiazepine", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 229, |
| "content": "fr_para_hydroxylation", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 230, |
| "content": "fr_allylic_oxid", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 231, |
| "content": "fr_aryl_methyl", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 232, |
| "content": "fr_Ndealkylation1", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 233, |
| "content": "fr_Ndealkylation2", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 234, |
| "content": "fr_alkyl_carbamate", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 235, |
| "content": "fr_ketone_Topliss", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 236, |
| "content": "fr_ArN", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 237, |
| "content": "fr_HOCCN", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 238, |
| "content": "scaffold:", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| }, |
| { |
| "id": 239, |
| "content": "fragments:", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": true, |
| "special": false |
| } |
| ], |
| "normalizer": null, |
| "pre_tokenizer": { |
| "type": "Split", |
| "pattern": { |
| "Regex": "(\\[[^\\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\\(|\\)|\\.|=|-|\\+|\\\\|\\/|:|~|@|\\?|>>?|\\*|\\$|\\%[0-9]{2}|[0-9])" |
| }, |
| "behavior": "Isolated", |
| "invert": false |
| }, |
| "post_processor": { |
| "type": "TemplateProcessing", |
| "single": [ |
| { |
| "Sequence": { |
| "id": "A", |
| "type_id": 0 |
| } |
| }, |
| { |
| "SpecialToken": { |
| "id": "</s>", |
| "type_id": 0 |
| } |
| } |
| ], |
| "pair": [ |
| { |
| "Sequence": { |
| "id": "A", |
| "type_id": 0 |
| } |
| }, |
| { |
| "SpecialToken": { |
| "id": "</s>", |
| "type_id": 0 |
| } |
| }, |
| { |
| "Sequence": { |
| "id": "B", |
| "type_id": 1 |
| } |
| }, |
| { |
| "SpecialToken": { |
| "id": "</s>", |
| "type_id": 1 |
| } |
| } |
| ], |
| "special_tokens": { |
| "</s>": { |
| "id": "</s>", |
| "ids": [ |
| 1 |
| ], |
| "tokens": [ |
| "</s>" |
| ] |
| } |
| } |
| }, |
| "decoder": null, |
| "model": { |
| "type": "WordLevel", |
| "vocab": { |
| "<pad>": 0, |
| "</s>": 1, |
| "<unk>": 2, |
| "C": 3, |
| "c": 4, |
| "(": 5, |
| ")": 6, |
| "1": 7, |
| "O": 8, |
| "N": 9, |
| "=": 10, |
| "2": 11, |
| "n": 12, |
| "\n": 13, |
| "[C@H]": 14, |
| "[C@@H]": 15, |
| "3": 16, |
| "F": 17, |
| "o": 18, |
| "[nH]": 19, |
| "S": 20, |
| "s": 21, |
| "#": 22, |
| "Cl": 23, |
| "-": 24, |
| "4": 25, |
| "/": 26, |
| "[C@]": 27, |
| "[C@@]": 28, |
| "[O-]": 29, |
| "[N+]": 30, |
| "\\": 31, |
| "Br": 32, |
| "[S@]": 33, |
| "[S@@]": 34, |
| "5": 35, |
| "[n+]": 36, |
| "I": 37, |
| "[Si]": 38, |
| "P": 39, |
| "B": 40, |
| "[CH]": 41, |
| "6": 42, |
| "[C]": 43, |
| "[N-]": 44, |
| "7": 45, |
| "[C-]": 46, |
| "[CH2]": 47, |
| "[P@]": 48, |
| "[S+]": 49, |
| "[N@+]": 50, |
| "[S@+]": 51, |
| "[s+]": 52 |
| }, |
| "unk_token": "<unk>" |
| } |
| } |