Upload tokenizer
Browse files- README.md +201 -0
- tokenizer.json +364 -3
- tokenizer_config.json +27 -0
README.md
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: transformers
|
| 3 |
+
tags: []
|
| 4 |
+
---
|
| 5 |
+
|
| 6 |
+
# Model Card for Model ID
|
| 7 |
+
|
| 8 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
## Model Details
|
| 13 |
+
|
| 14 |
+
### Model Description
|
| 15 |
+
|
| 16 |
+
<!-- Provide a longer summary of what this model is. -->
|
| 17 |
+
|
| 18 |
+
This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated.
|
| 19 |
+
|
| 20 |
+
- **Developed by:** [More Information Needed]
|
| 21 |
+
- **Funded by [optional]:** [More Information Needed]
|
| 22 |
+
- **Shared by [optional]:** [More Information Needed]
|
| 23 |
+
- **Model type:** [More Information Needed]
|
| 24 |
+
- **Language(s) (NLP):** [More Information Needed]
|
| 25 |
+
- **License:** [More Information Needed]
|
| 26 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
| 27 |
+
|
| 28 |
+
### Model Sources [optional]
|
| 29 |
+
|
| 30 |
+
<!-- Provide the basic links for the model. -->
|
| 31 |
+
|
| 32 |
+
- **Repository:** [More Information Needed]
|
| 33 |
+
- **Paper [optional]:** [More Information Needed]
|
| 34 |
+
- **Demo [optional]:** [More Information Needed]
|
| 35 |
+
|
| 36 |
+
## Uses
|
| 37 |
+
|
| 38 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
| 39 |
+
|
| 40 |
+
### Direct Use
|
| 41 |
+
|
| 42 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
| 43 |
+
|
| 44 |
+
[More Information Needed]
|
| 45 |
+
|
| 46 |
+
### Downstream Use [optional]
|
| 47 |
+
|
| 48 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
| 49 |
+
|
| 50 |
+
[More Information Needed]
|
| 51 |
+
|
| 52 |
+
### Out-of-Scope Use
|
| 53 |
+
|
| 54 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
| 55 |
+
|
| 56 |
+
[More Information Needed]
|
| 57 |
+
|
| 58 |
+
## Bias, Risks, and Limitations
|
| 59 |
+
|
| 60 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
| 61 |
+
|
| 62 |
+
[More Information Needed]
|
| 63 |
+
|
| 64 |
+
### Recommendations
|
| 65 |
+
|
| 66 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
| 67 |
+
|
| 68 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
| 69 |
+
|
| 70 |
+
## How to Get Started with the Model
|
| 71 |
+
|
| 72 |
+
Use the code below to get started with the model.
|
| 73 |
+
|
| 74 |
+
[More Information Needed]
|
| 75 |
+
|
| 76 |
+
## Training Details
|
| 77 |
+
|
| 78 |
+
### Training Data
|
| 79 |
+
|
| 80 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
| 81 |
+
|
| 82 |
+
[More Information Needed]
|
| 83 |
+
|
| 84 |
+
### Training Procedure
|
| 85 |
+
|
| 86 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
| 87 |
+
|
| 88 |
+
#### Preprocessing [optional]
|
| 89 |
+
|
| 90 |
+
[More Information Needed]
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
#### Training Hyperparameters
|
| 94 |
+
|
| 95 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
| 96 |
+
|
| 97 |
+
#### Speeds, Sizes, Times [optional]
|
| 98 |
+
|
| 99 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
| 100 |
+
|
| 101 |
+
[More Information Needed]
|
| 102 |
+
|
| 103 |
+
## Evaluation
|
| 104 |
+
|
| 105 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
| 106 |
+
|
| 107 |
+
### Testing Data, Factors & Metrics
|
| 108 |
+
|
| 109 |
+
#### Testing Data
|
| 110 |
+
|
| 111 |
+
<!-- This should link to a Dataset Card if possible. -->
|
| 112 |
+
|
| 113 |
+
[More Information Needed]
|
| 114 |
+
|
| 115 |
+
#### Factors
|
| 116 |
+
|
| 117 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
| 118 |
+
|
| 119 |
+
[More Information Needed]
|
| 120 |
+
|
| 121 |
+
#### Metrics
|
| 122 |
+
|
| 123 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
| 124 |
+
|
| 125 |
+
[More Information Needed]
|
| 126 |
+
|
| 127 |
+
### Results
|
| 128 |
+
|
| 129 |
+
[More Information Needed]
|
| 130 |
+
|
| 131 |
+
#### Summary
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
## Model Examination [optional]
|
| 136 |
+
|
| 137 |
+
<!-- Relevant interpretability work for the model goes here -->
|
| 138 |
+
|
| 139 |
+
[More Information Needed]
|
| 140 |
+
|
| 141 |
+
## Environmental Impact
|
| 142 |
+
|
| 143 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
| 144 |
+
|
| 145 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
| 146 |
+
|
| 147 |
+
- **Hardware Type:** [More Information Needed]
|
| 148 |
+
- **Hours used:** [More Information Needed]
|
| 149 |
+
- **Cloud Provider:** [More Information Needed]
|
| 150 |
+
- **Compute Region:** [More Information Needed]
|
| 151 |
+
- **Carbon Emitted:** [More Information Needed]
|
| 152 |
+
|
| 153 |
+
## Technical Specifications [optional]
|
| 154 |
+
|
| 155 |
+
### Model Architecture and Objective
|
| 156 |
+
|
| 157 |
+
[More Information Needed]
|
| 158 |
+
|
| 159 |
+
### Compute Infrastructure
|
| 160 |
+
|
| 161 |
+
[More Information Needed]
|
| 162 |
+
|
| 163 |
+
#### Hardware
|
| 164 |
+
|
| 165 |
+
[More Information Needed]
|
| 166 |
+
|
| 167 |
+
#### Software
|
| 168 |
+
|
| 169 |
+
[More Information Needed]
|
| 170 |
+
|
| 171 |
+
## Citation [optional]
|
| 172 |
+
|
| 173 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
| 174 |
+
|
| 175 |
+
**BibTeX:**
|
| 176 |
+
|
| 177 |
+
[More Information Needed]
|
| 178 |
+
|
| 179 |
+
**APA:**
|
| 180 |
+
|
| 181 |
+
[More Information Needed]
|
| 182 |
+
|
| 183 |
+
## Glossary [optional]
|
| 184 |
+
|
| 185 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
| 186 |
+
|
| 187 |
+
[More Information Needed]
|
| 188 |
+
|
| 189 |
+
## More Information [optional]
|
| 190 |
+
|
| 191 |
+
[More Information Needed]
|
| 192 |
+
|
| 193 |
+
## Model Card Authors [optional]
|
| 194 |
+
|
| 195 |
+
[More Information Needed]
|
| 196 |
+
|
| 197 |
+
## Model Card Contact
|
| 198 |
+
|
| 199 |
+
[More Information Needed]
|
| 200 |
+
|
| 201 |
+
|
tokenizer.json
CHANGED
|
@@ -302,7 +302,6 @@
|
|
| 302 |
"ॽ": 164,
|
| 303 |
"ॾ": 165,
|
| 304 |
"“": 166,
|
| 305 |
-
"▁": 167,
|
| 306 |
"▁क": 168,
|
| 307 |
"े▁": 169,
|
| 308 |
"ा▁": 170,
|
|
@@ -32134,7 +32133,278 @@
|
|
| 32134 |
"रेक": 31996,
|
| 32135 |
"वरन▁": 31997,
|
| 32136 |
"उपयोगकर्ता▁को▁": 31998,
|
| 32137 |
-
"से▁से▁": 31999
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32138 |
},
|
| 32139 |
"merges": [
|
| 32140 |
"▁ क",
|
|
@@ -63968,7 +64238,98 @@
|
|
| 63968 |
"रे क",
|
| 63969 |
"वर न▁",
|
| 63970 |
"उपयोगकर् ता▁को▁",
|
| 63971 |
-
"से▁ से▁"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63972 |
]
|
| 63973 |
}
|
| 63974 |
}
|
|
|
|
| 302 |
"ॽ": 164,
|
| 303 |
"ॾ": 165,
|
| 304 |
"“": 166,
|
|
|
|
| 305 |
"▁क": 168,
|
| 306 |
"े▁": 169,
|
| 307 |
"ा▁": 170,
|
|
|
|
| 32133 |
"रेक": 31996,
|
| 32134 |
"वरन▁": 31997,
|
| 32135 |
"उपयोगकर्ता▁को▁": 31998,
|
| 32136 |
+
"से▁से▁": 31999,
|
| 32137 |
+
"<0x00>": 32000,
|
| 32138 |
+
"<0x01>": 32001,
|
| 32139 |
+
"<0x02>": 32002,
|
| 32140 |
+
"<0x03>": 32003,
|
| 32141 |
+
"<0x04>": 32004,
|
| 32142 |
+
"<0x05>": 32005,
|
| 32143 |
+
"<0x06>": 32006,
|
| 32144 |
+
"<0x07>": 32007,
|
| 32145 |
+
"<0x08>": 32008,
|
| 32146 |
+
"<0x09>": 32009,
|
| 32147 |
+
"<0x0a>": 32010,
|
| 32148 |
+
"<0x0b>": 32011,
|
| 32149 |
+
"<0x0c>": 32012,
|
| 32150 |
+
"<0x0d>": 32013,
|
| 32151 |
+
"<0x0e>": 32014,
|
| 32152 |
+
"<0x0f>": 32015,
|
| 32153 |
+
"<0x10>": 32016,
|
| 32154 |
+
"<0x11>": 32017,
|
| 32155 |
+
"<0x12>": 32018,
|
| 32156 |
+
"<0x13>": 32019,
|
| 32157 |
+
"<0x14>": 32020,
|
| 32158 |
+
"<0x15>": 32021,
|
| 32159 |
+
"<0x16>": 32022,
|
| 32160 |
+
"<0x17>": 32023,
|
| 32161 |
+
"<0x18>": 32024,
|
| 32162 |
+
"<0x19>": 32025,
|
| 32163 |
+
"<0x1a>": 32026,
|
| 32164 |
+
"<0x1b>": 32027,
|
| 32165 |
+
"<0x1c>": 32028,
|
| 32166 |
+
"<0x1d>": 32029,
|
| 32167 |
+
"<0x1e>": 32030,
|
| 32168 |
+
"<0x1f>": 32031,
|
| 32169 |
+
"<0x20>": 32032,
|
| 32170 |
+
"<0x21>": 32033,
|
| 32171 |
+
"<0x22>": 32034,
|
| 32172 |
+
"<0x23>": 32035,
|
| 32173 |
+
"<0x24>": 32036,
|
| 32174 |
+
"<0x25>": 32037,
|
| 32175 |
+
"<0x26>": 32038,
|
| 32176 |
+
"<0x27>": 32039,
|
| 32177 |
+
"<0x28>": 32040,
|
| 32178 |
+
"<0x29>": 32041,
|
| 32179 |
+
"<0x2a>": 32042,
|
| 32180 |
+
"<0x2b>": 32043,
|
| 32181 |
+
"<0x2c>": 32044,
|
| 32182 |
+
"<0x2d>": 32045,
|
| 32183 |
+
"<0x2e>": 32046,
|
| 32184 |
+
"<0x2f>": 32047,
|
| 32185 |
+
"<0x30>": 32048,
|
| 32186 |
+
"<0x31>": 32049,
|
| 32187 |
+
"<0x32>": 32050,
|
| 32188 |
+
"<0x33>": 32051,
|
| 32189 |
+
"<0x34>": 32052,
|
| 32190 |
+
"<0x35>": 32053,
|
| 32191 |
+
"<0x36>": 32054,
|
| 32192 |
+
"<0x37>": 32055,
|
| 32193 |
+
"<0x38>": 32056,
|
| 32194 |
+
"<0x39>": 32057,
|
| 32195 |
+
"<0x3a>": 32058,
|
| 32196 |
+
"<0x3b>": 32059,
|
| 32197 |
+
"<0x3c>": 32060,
|
| 32198 |
+
"<0x3d>": 32061,
|
| 32199 |
+
"<0x3e>": 32062,
|
| 32200 |
+
"<0x3f>": 32063,
|
| 32201 |
+
"<0x40>": 32064,
|
| 32202 |
+
"<0x41>": 32065,
|
| 32203 |
+
"<0x42>": 32066,
|
| 32204 |
+
"<0x43>": 32067,
|
| 32205 |
+
"<0x44>": 32068,
|
| 32206 |
+
"<0x45>": 32069,
|
| 32207 |
+
"<0x46>": 32070,
|
| 32208 |
+
"<0x47>": 32071,
|
| 32209 |
+
"<0x48>": 32072,
|
| 32210 |
+
"<0x49>": 32073,
|
| 32211 |
+
"<0x4a>": 32074,
|
| 32212 |
+
"<0x4b>": 32075,
|
| 32213 |
+
"<0x4c>": 32076,
|
| 32214 |
+
"<0x4d>": 32077,
|
| 32215 |
+
"<0x4e>": 32078,
|
| 32216 |
+
"<0x4f>": 32079,
|
| 32217 |
+
"<0x50>": 32080,
|
| 32218 |
+
"<0x51>": 32081,
|
| 32219 |
+
"<0x52>": 32082,
|
| 32220 |
+
"<0x53>": 32083,
|
| 32221 |
+
"<0x54>": 32084,
|
| 32222 |
+
"<0x55>": 32085,
|
| 32223 |
+
"<0x56>": 32086,
|
| 32224 |
+
"<0x57>": 32087,
|
| 32225 |
+
"<0x58>": 32088,
|
| 32226 |
+
"<0x59>": 32089,
|
| 32227 |
+
"<0x5a>": 32090,
|
| 32228 |
+
"<0x5b>": 32091,
|
| 32229 |
+
"<0x5c>": 32092,
|
| 32230 |
+
"<0x5d>": 32093,
|
| 32231 |
+
"<0x5e>": 32094,
|
| 32232 |
+
"<0x5f>": 32095,
|
| 32233 |
+
"<0x60>": 32096,
|
| 32234 |
+
"<0x61>": 32097,
|
| 32235 |
+
"<0x62>": 32098,
|
| 32236 |
+
"<0x63>": 32099,
|
| 32237 |
+
"<0x64>": 32100,
|
| 32238 |
+
"<0x65>": 32101,
|
| 32239 |
+
"<0x66>": 32102,
|
| 32240 |
+
"<0x67>": 32103,
|
| 32241 |
+
"<0x68>": 32104,
|
| 32242 |
+
"<0x69>": 32105,
|
| 32243 |
+
"<0x6a>": 32106,
|
| 32244 |
+
"<0x6b>": 32107,
|
| 32245 |
+
"<0x6c>": 32108,
|
| 32246 |
+
"<0x6d>": 32109,
|
| 32247 |
+
"<0x6e>": 32110,
|
| 32248 |
+
"<0x6f>": 32111,
|
| 32249 |
+
"<0x70>": 32112,
|
| 32250 |
+
"<0x71>": 32113,
|
| 32251 |
+
"<0x72>": 32114,
|
| 32252 |
+
"<0x73>": 32115,
|
| 32253 |
+
"<0x74>": 32116,
|
| 32254 |
+
"<0x75>": 32117,
|
| 32255 |
+
"<0x76>": 32118,
|
| 32256 |
+
"<0x77>": 32119,
|
| 32257 |
+
"<0x78>": 32120,
|
| 32258 |
+
"<0x79>": 32121,
|
| 32259 |
+
"<0x7a>": 32122,
|
| 32260 |
+
"<0x7b>": 32123,
|
| 32261 |
+
"<0x7c>": 32124,
|
| 32262 |
+
"<0x7d>": 32125,
|
| 32263 |
+
"<0x7e>": 32126,
|
| 32264 |
+
"<0x7f>": 32127,
|
| 32265 |
+
"<0x80>": 32128,
|
| 32266 |
+
"<0x81>": 32129,
|
| 32267 |
+
"<0x82>": 32130,
|
| 32268 |
+
"<0x83>": 32131,
|
| 32269 |
+
"<0x84>": 32132,
|
| 32270 |
+
"<0x85>": 32133,
|
| 32271 |
+
"<0x86>": 32134,
|
| 32272 |
+
"<0x87>": 32135,
|
| 32273 |
+
"<0x88>": 32136,
|
| 32274 |
+
"<0x89>": 32137,
|
| 32275 |
+
"<0x8a>": 32138,
|
| 32276 |
+
"<0x8b>": 32139,
|
| 32277 |
+
"<0x8c>": 32140,
|
| 32278 |
+
"<0x8d>": 32141,
|
| 32279 |
+
"<0x8e>": 32142,
|
| 32280 |
+
"<0x8f>": 32143,
|
| 32281 |
+
"<0x90>": 32144,
|
| 32282 |
+
"<0x91>": 32145,
|
| 32283 |
+
"<0x92>": 32146,
|
| 32284 |
+
"<0x93>": 32147,
|
| 32285 |
+
"<0x94>": 32148,
|
| 32286 |
+
"<0x95>": 32149,
|
| 32287 |
+
"<0x96>": 32150,
|
| 32288 |
+
"<0x97>": 32151,
|
| 32289 |
+
"<0x98>": 32152,
|
| 32290 |
+
"<0x99>": 32153,
|
| 32291 |
+
"<0x9a>": 32154,
|
| 32292 |
+
"<0x9b>": 32155,
|
| 32293 |
+
"<0x9c>": 32156,
|
| 32294 |
+
"<0x9d>": 32157,
|
| 32295 |
+
"<0x9e>": 32158,
|
| 32296 |
+
"<0x9f>": 32159,
|
| 32297 |
+
"<0xa0>": 32160,
|
| 32298 |
+
"<0xa1>": 32161,
|
| 32299 |
+
"<0xa2>": 32162,
|
| 32300 |
+
"<0xa3>": 32163,
|
| 32301 |
+
"<0xa4>": 32164,
|
| 32302 |
+
"<0xa5>": 32165,
|
| 32303 |
+
"<0xa6>": 32166,
|
| 32304 |
+
"<0xa7>": 32167,
|
| 32305 |
+
"<0xa8>": 32168,
|
| 32306 |
+
"<0xa9>": 32169,
|
| 32307 |
+
"<0xaa>": 32170,
|
| 32308 |
+
"<0xab>": 32171,
|
| 32309 |
+
"<0xac>": 32172,
|
| 32310 |
+
"<0xad>": 32173,
|
| 32311 |
+
"<0xae>": 32174,
|
| 32312 |
+
"<0xaf>": 32175,
|
| 32313 |
+
"<0xb0>": 32176,
|
| 32314 |
+
"<0xb1>": 32177,
|
| 32315 |
+
"<0xb2>": 32178,
|
| 32316 |
+
"<0xb3>": 32179,
|
| 32317 |
+
"<0xb4>": 32180,
|
| 32318 |
+
"<0xb5>": 32181,
|
| 32319 |
+
"<0xb6>": 32182,
|
| 32320 |
+
"<0xb7>": 32183,
|
| 32321 |
+
"<0xb8>": 32184,
|
| 32322 |
+
"<0xb9>": 32185,
|
| 32323 |
+
"<0xba>": 32186,
|
| 32324 |
+
"<0xbb>": 32187,
|
| 32325 |
+
"<0xbc>": 32188,
|
| 32326 |
+
"<0xbd>": 32189,
|
| 32327 |
+
"<0xbe>": 32190,
|
| 32328 |
+
"<0xbf>": 32191,
|
| 32329 |
+
"<0xc0>": 32192,
|
| 32330 |
+
"<0xc1>": 32193,
|
| 32331 |
+
"<0xc2>": 32194,
|
| 32332 |
+
"<0xc3>": 32195,
|
| 32333 |
+
"<0xc4>": 32196,
|
| 32334 |
+
"<0xc5>": 32197,
|
| 32335 |
+
"<0xc6>": 32198,
|
| 32336 |
+
"<0xc7>": 32199,
|
| 32337 |
+
"<0xc8>": 32200,
|
| 32338 |
+
"<0xc9>": 32201,
|
| 32339 |
+
"<0xca>": 32202,
|
| 32340 |
+
"<0xcb>": 32203,
|
| 32341 |
+
"<0xcc>": 32204,
|
| 32342 |
+
"<0xcd>": 32205,
|
| 32343 |
+
"<0xce>": 32206,
|
| 32344 |
+
"<0xcf>": 32207,
|
| 32345 |
+
"<0xd0>": 32208,
|
| 32346 |
+
"<0xd1>": 32209,
|
| 32347 |
+
"<0xd2>": 32210,
|
| 32348 |
+
"<0xd3>": 32211,
|
| 32349 |
+
"<0xd4>": 32212,
|
| 32350 |
+
"<0xd5>": 32213,
|
| 32351 |
+
"<0xd6>": 32214,
|
| 32352 |
+
"<0xd7>": 32215,
|
| 32353 |
+
"<0xd8>": 32216,
|
| 32354 |
+
"<0xd9>": 32217,
|
| 32355 |
+
"<0xda>": 32218,
|
| 32356 |
+
"<0xdb>": 32219,
|
| 32357 |
+
"<0xdc>": 32220,
|
| 32358 |
+
"<0xdd>": 32221,
|
| 32359 |
+
"<0xde>": 32222,
|
| 32360 |
+
"<0xdf>": 32223,
|
| 32361 |
+
"<0xe0>": 32224,
|
| 32362 |
+
"<0xe1>": 32225,
|
| 32363 |
+
"<0xe2>": 32226,
|
| 32364 |
+
"<0xe3>": 32227,
|
| 32365 |
+
"<0xe4>": 32228,
|
| 32366 |
+
"<0xe5>": 32229,
|
| 32367 |
+
"<0xe6>": 32230,
|
| 32368 |
+
"<0xe7>": 32231,
|
| 32369 |
+
"<0xe8>": 32232,
|
| 32370 |
+
"<0xe9>": 32233,
|
| 32371 |
+
"<0xea>": 32234,
|
| 32372 |
+
"<0xeb>": 32235,
|
| 32373 |
+
"<0xec>": 32236,
|
| 32374 |
+
"<0xed>": 32237,
|
| 32375 |
+
"<0xee>": 32238,
|
| 32376 |
+
"<0xef>": 32239,
|
| 32377 |
+
"<0xf0>": 32240,
|
| 32378 |
+
"<0xf1>": 32241,
|
| 32379 |
+
"<0xf2>": 32242,
|
| 32380 |
+
"<0xf3>": 32243,
|
| 32381 |
+
"<0xf4>": 32244,
|
| 32382 |
+
"<0xf5>": 32245,
|
| 32383 |
+
"<0xf6>": 32246,
|
| 32384 |
+
"<0xf7>": 32247,
|
| 32385 |
+
"<0xf8>": 32248,
|
| 32386 |
+
"<0xf9>": 32249,
|
| 32387 |
+
"<0xfa>": 32250,
|
| 32388 |
+
"<0xfb>": 32251,
|
| 32389 |
+
"<0xfc>": 32252,
|
| 32390 |
+
"<0xfd>": 32253,
|
| 32391 |
+
"<0xfe>": 32254,
|
| 32392 |
+
"<0xff>": 32255,
|
| 32393 |
+
"▁": 32256,
|
| 32394 |
+
"▁▁": 32257,
|
| 32395 |
+
"▁▁▁": 32258,
|
| 32396 |
+
"▁▁▁▁": 32259,
|
| 32397 |
+
"▁▁▁▁▁": 32260,
|
| 32398 |
+
"▁▁▁▁▁▁": 32261,
|
| 32399 |
+
"▁▁▁▁▁▁▁": 32262,
|
| 32400 |
+
"▁▁▁▁▁▁▁▁": 32263,
|
| 32401 |
+
"▁▁▁▁▁▁▁▁▁": 32264,
|
| 32402 |
+
"▁▁▁▁▁▁▁▁▁▁": 32265,
|
| 32403 |
+
"▁▁▁▁▁▁▁▁▁▁▁": 32266,
|
| 32404 |
+
"▁▁▁▁▁▁▁▁▁▁▁▁": 32267,
|
| 32405 |
+
"▁▁▁▁▁▁▁▁▁▁▁▁▁": 32268,
|
| 32406 |
+
"▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 32269,
|
| 32407 |
+
"▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 32270
|
| 32408 |
},
|
| 32409 |
"merges": [
|
| 32410 |
"▁ क",
|
|
|
|
| 64238 |
"रे क",
|
| 64239 |
"वर न▁",
|
| 64240 |
"उपयोगकर् ता▁को▁",
|
| 64241 |
+
"से▁ से▁",
|
| 64242 |
+
"▁ ▁",
|
| 64243 |
+
"▁ ▁▁",
|
| 64244 |
+
"▁ ▁▁▁",
|
| 64245 |
+
"▁ ▁▁▁▁",
|
| 64246 |
+
"▁ ▁▁▁▁▁",
|
| 64247 |
+
"▁ ▁▁▁▁▁▁",
|
| 64248 |
+
"▁ ▁▁▁▁▁▁▁",
|
| 64249 |
+
"▁ ▁▁▁▁▁▁▁▁",
|
| 64250 |
+
"▁ ▁▁▁▁▁▁▁▁▁",
|
| 64251 |
+
"▁ ▁▁▁▁▁▁▁▁▁▁",
|
| 64252 |
+
"▁ ▁▁▁▁▁▁▁▁▁▁▁",
|
| 64253 |
+
"▁ ▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 64254 |
+
"▁ ▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 64255 |
+
"▁▁ ▁",
|
| 64256 |
+
"▁▁ ▁▁",
|
| 64257 |
+
"▁▁ ▁▁▁",
|
| 64258 |
+
"▁▁ ▁▁▁▁",
|
| 64259 |
+
"▁▁ ▁▁▁▁▁",
|
| 64260 |
+
"▁▁ ▁▁▁▁▁▁",
|
| 64261 |
+
"▁▁ ▁▁▁▁▁▁▁",
|
| 64262 |
+
"▁▁ ▁▁▁▁▁▁▁▁",
|
| 64263 |
+
"▁▁ ▁▁▁▁▁▁▁▁▁",
|
| 64264 |
+
"▁▁ ▁▁▁▁▁▁▁▁▁▁",
|
| 64265 |
+
"▁▁ ▁▁▁▁▁▁▁▁▁▁▁",
|
| 64266 |
+
"▁▁ ▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 64267 |
+
"▁▁▁ ▁",
|
| 64268 |
+
"▁▁▁ ▁▁",
|
| 64269 |
+
"▁▁▁ ▁▁▁",
|
| 64270 |
+
"▁▁▁ ▁▁▁▁",
|
| 64271 |
+
"▁▁▁ ▁▁▁▁▁",
|
| 64272 |
+
"▁▁▁ ▁▁▁▁▁▁",
|
| 64273 |
+
"▁▁▁ ▁▁▁▁▁▁▁",
|
| 64274 |
+
"▁▁▁ ▁▁▁▁▁▁▁▁",
|
| 64275 |
+
"▁▁▁ ▁▁▁▁▁▁▁▁▁",
|
| 64276 |
+
"▁▁▁ ▁▁▁▁▁▁▁▁▁▁",
|
| 64277 |
+
"▁▁▁ ▁▁▁▁▁▁▁▁▁▁▁",
|
| 64278 |
+
"▁▁▁▁ ▁",
|
| 64279 |
+
"▁▁▁▁ ▁▁",
|
| 64280 |
+
"▁▁▁▁ ▁▁▁",
|
| 64281 |
+
"▁▁▁▁ ▁▁▁▁",
|
| 64282 |
+
"▁▁▁▁ ▁▁▁▁▁",
|
| 64283 |
+
"▁▁▁▁ ▁▁▁▁▁▁",
|
| 64284 |
+
"▁▁▁▁ ▁▁▁▁▁▁▁",
|
| 64285 |
+
"▁▁▁▁ ▁▁▁▁▁▁▁▁",
|
| 64286 |
+
"▁▁▁▁ ▁▁▁▁▁▁▁▁▁",
|
| 64287 |
+
"▁▁▁▁ ▁▁▁▁▁▁▁▁▁▁",
|
| 64288 |
+
"▁▁▁▁▁ ▁",
|
| 64289 |
+
"▁▁▁▁▁ ▁▁",
|
| 64290 |
+
"▁▁▁▁▁ ▁▁▁",
|
| 64291 |
+
"▁▁▁▁▁ ▁▁▁▁",
|
| 64292 |
+
"▁▁▁▁▁ ▁▁▁▁▁",
|
| 64293 |
+
"▁▁▁▁▁ ▁▁▁▁▁▁",
|
| 64294 |
+
"▁▁▁▁▁ ▁▁▁▁▁▁▁",
|
| 64295 |
+
"▁▁▁▁▁ ▁▁▁▁▁▁▁▁",
|
| 64296 |
+
"▁▁▁▁▁ ▁▁▁▁▁▁▁▁▁",
|
| 64297 |
+
"▁▁▁▁▁▁ ▁",
|
| 64298 |
+
"▁▁▁▁▁▁ ▁▁",
|
| 64299 |
+
"▁▁▁▁▁▁ ▁▁▁",
|
| 64300 |
+
"▁▁▁▁▁▁ ▁▁▁▁",
|
| 64301 |
+
"▁▁▁▁▁▁ ▁▁▁▁▁",
|
| 64302 |
+
"▁▁▁▁▁▁ ▁▁▁▁▁▁",
|
| 64303 |
+
"▁▁▁▁▁▁ ▁▁▁▁▁▁▁",
|
| 64304 |
+
"▁▁▁▁▁▁ ▁▁▁▁▁▁▁▁",
|
| 64305 |
+
"▁▁▁▁▁▁▁ ▁",
|
| 64306 |
+
"▁▁▁▁▁▁▁ ▁▁",
|
| 64307 |
+
"▁▁▁▁▁▁▁ ▁▁▁",
|
| 64308 |
+
"▁▁▁▁▁▁▁ ▁▁▁▁",
|
| 64309 |
+
"▁▁▁▁▁▁▁ ▁▁▁▁▁",
|
| 64310 |
+
"▁▁▁▁▁▁▁ ▁▁▁▁▁▁",
|
| 64311 |
+
"▁▁▁▁▁▁▁ ▁▁▁▁▁▁▁",
|
| 64312 |
+
"▁▁▁▁▁▁▁▁ ▁",
|
| 64313 |
+
"▁▁▁▁▁▁▁▁ ▁▁",
|
| 64314 |
+
"▁▁▁▁▁▁▁▁ ▁▁▁",
|
| 64315 |
+
"▁▁▁▁▁▁▁▁ ▁▁▁▁",
|
| 64316 |
+
"▁▁▁▁▁▁▁▁ ▁▁▁▁▁",
|
| 64317 |
+
"▁▁▁▁▁▁▁▁ ▁▁▁▁▁▁",
|
| 64318 |
+
"▁▁▁▁▁▁▁▁▁ ▁",
|
| 64319 |
+
"▁▁▁▁▁▁▁▁▁ ▁▁",
|
| 64320 |
+
"▁▁▁▁▁▁▁▁▁ ▁▁▁",
|
| 64321 |
+
"▁▁▁▁▁▁▁▁▁ ▁▁▁▁",
|
| 64322 |
+
"▁▁▁▁▁▁▁▁▁ ▁▁▁▁▁",
|
| 64323 |
+
"▁▁▁▁▁▁▁▁▁▁ ▁",
|
| 64324 |
+
"▁▁▁▁▁▁▁▁▁▁ ▁▁",
|
| 64325 |
+
"▁▁▁▁▁▁▁▁▁▁ ▁▁▁",
|
| 64326 |
+
"▁▁▁▁▁▁▁▁▁▁ ▁▁▁▁",
|
| 64327 |
+
"▁▁▁▁▁▁▁▁▁▁▁ ▁",
|
| 64328 |
+
"▁▁▁▁▁▁▁▁▁▁▁ ▁▁",
|
| 64329 |
+
"▁▁▁▁▁▁▁▁▁▁▁ ▁▁▁",
|
| 64330 |
+
"▁▁▁▁▁▁▁▁▁▁▁▁ ▁",
|
| 64331 |
+
"▁▁▁▁▁▁▁▁▁▁▁▁ ▁▁",
|
| 64332 |
+
"▁▁▁▁▁▁▁▁▁▁▁▁▁ ▁"
|
| 64333 |
]
|
| 64334 |
}
|
| 64335 |
}
|
tokenizer_config.json
CHANGED
|
@@ -1,5 +1,32 @@
|
|
| 1 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
"bos_token": "<s>",
|
|
|
|
| 3 |
"clean_up_tokenization_spaces": true,
|
| 4 |
"eos_token": "</s>",
|
| 5 |
"model_max_length": 1000000000000000019884624838656,
|
|
|
|
| 1 |
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<unk>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<s>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
}
|
| 27 |
+
},
|
| 28 |
"bos_token": "<s>",
|
| 29 |
+
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|उपयोगकर्ता|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|प्रणाली|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|सहायक|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|सहायक|>' }}\n{% endif %}\n{% endfor %}\n",
|
| 30 |
"clean_up_tokenization_spaces": true,
|
| 31 |
"eos_token": "</s>",
|
| 32 |
"model_max_length": 1000000000000000019884624838656,
|