BespinGlobal, FalconsAI, NPleshkanov, Wanyu, Yanjie
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +45 -35
- BespinGlobal/klue-roberta-small-3i4k-intent-classification/.gitattributes +28 -0
- BespinGlobal/klue-roberta-small-3i4k-intent-classification/README.md +82 -0
- BespinGlobal/klue-roberta-small-3i4k-intent-classification/config.json +46 -0
- BespinGlobal/klue-roberta-small-3i4k-intent-classification/model.safetensors +3 -0
- BespinGlobal/klue-roberta-small-3i4k-intent-classification/pytorch_model.bin +3 -0
- BespinGlobal/klue-roberta-small-3i4k-intent-classification/special_tokens_map.json +1 -0
- BespinGlobal/klue-roberta-small-3i4k-intent-classification/tf_model.h5 +3 -0
- BespinGlobal/klue-roberta-small-3i4k-intent-classification/tokenizer.json +0 -0
- BespinGlobal/klue-roberta-small-3i4k-intent-classification/tokenizer_config.json +1 -0
- BespinGlobal/klue-roberta-small-3i4k-intent-classification/vocab.txt +0 -0
- Falconsai/DATASETS/customer_intents/.gitattributes +55 -0
- Falconsai/DATASETS/customer_intents/dataset_dict.json +23 -0
- Falconsai/DATASETS/customer_intents/train/data-00000-of-00001.arrow +3 -0
- Falconsai/DATASETS/customer_intents/train/dataset_info.json +16 -0
- Falconsai/DATASETS/customer_intents/train/state.json +13 -0
- Falconsai/arc_of_conversation/.gitattributes +35 -0
- Falconsai/arc_of_conversation/README.md +196 -0
- Falconsai/arc_of_conversation/config.json +61 -0
- Falconsai/arc_of_conversation/generation_config.json +6 -0
- Falconsai/arc_of_conversation/model.safetensors +3 -0
- Falconsai/arc_of_conversation/special_tokens_map.json +125 -0
- Falconsai/arc_of_conversation/spiece.model +3 -0
- Falconsai/arc_of_conversation/tokenizer.json +0 -0
- Falconsai/arc_of_conversation/tokenizer_config.json +942 -0
- Falconsai/fear_mongering_detection/.gitattributes +35 -0
- Falconsai/fear_mongering_detection/README.md +104 -0
- Falconsai/fear_mongering_detection/config.json +29 -0
- Falconsai/fear_mongering_detection/model.safetensors +3 -0
- Falconsai/fear_mongering_detection/special_tokens_map.json +7 -0
- Falconsai/fear_mongering_detection/tokenizer.json +0 -0
- Falconsai/fear_mongering_detection/tokenizer_config.json +55 -0
- Falconsai/fear_mongering_detection/vocab.txt +0 -0
- Falconsai/intent_classification/.gitattributes +35 -0
- Falconsai/intent_classification/README.md +70 -0
- Falconsai/intent_classification/config.json +43 -0
- Falconsai/intent_classification/coreml/text-classification/float32_model.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
- Falconsai/intent_classification/coreml/text-classification/float32_model.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
- Falconsai/intent_classification/coreml/text-classification/float32_model.mlpackage/Manifest.json +18 -0
- Falconsai/intent_classification/model.safetensors +3 -0
- Falconsai/intent_classification/pytorch_model.bin +3 -0
- Falconsai/intent_classification/special_tokens_map.json +7 -0
- Falconsai/intent_classification/tokenizer.json +0 -0
- Falconsai/intent_classification/tokenizer_config.json +13 -0
- Falconsai/intent_classification/vocab.txt +0 -0
- Falconsai/offensive_speech_detection/.gitattributes +35 -0
- Falconsai/offensive_speech_detection/README.md +83 -0
- Falconsai/offensive_speech_detection/config.json +33 -0
- Falconsai/offensive_speech_detection/model.safetensors +3 -0
- Falconsai/offensive_speech_detection/optimizer.pt +3 -0
.gitattributes
CHANGED
|
@@ -1,35 +1,45 @@
|
|
| 1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
NPleshkanov/adapter_labse_intent_classifier/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
NPleshkanov/ru-labse-toxic/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
Wanyu/DATASETS/IteraTeR_full_doc/train.json filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
Wanyu/DATASETS/IteraTeR_full_sent/dev.json filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
Wanyu/DATASETS/IteraTeR_full_sent/test.json filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
Wanyu/DATASETS/IteraTeR_full_sent/train.json filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
Wanyu/DATASETS/IteraTeR_v2/r3_demo_snapshot.jpg filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
Wanyu/DATASETS/IteraTeR_v2/sent-level.dev.intents.json filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
Wanyu/DATASETS/IteraTeR_v2/sent-level.test.intents.json filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
Wanyu/DATASETS/IteraTeR_v2/sent-level.train.intents.json filter=lfs diff=lfs merge=lfs -text
|
BespinGlobal/klue-roberta-small-3i4k-intent-classification/.gitattributes
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bin.* filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
model.safetensors filter=lfs diff=lfs merge=lfs -text
|
BespinGlobal/klue-roberta-small-3i4k-intent-classification/README.md
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language: ko
|
| 3 |
+
tags:
|
| 4 |
+
- intent-classification
|
| 5 |
+
datasets:
|
| 6 |
+
- kor_3i4k
|
| 7 |
+
license: cc-by-nc-4.0
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
## Finetuning
|
| 11 |
+
- Pretrain Model : [klue/roberta-small](https://github.com/KLUE-benchmark/KLUE)
|
| 12 |
+
- Dataset for fine-tuning : [3i4k](https://github.com/warnikchow/3i4k)
|
| 13 |
+
- Train : 46,863
|
| 14 |
+
- Validation : 8,271 (15% of Train)
|
| 15 |
+
- Test : 6,121
|
| 16 |
+
- Label info
|
| 17 |
+
- 0: "fragment",
|
| 18 |
+
- 1: "statement",
|
| 19 |
+
- 2: "question",
|
| 20 |
+
- 3: "command",
|
| 21 |
+
- 4: "rhetorical question",
|
| 22 |
+
- 5: "rhetorical command",
|
| 23 |
+
- 6: "intonation-dependent utterance"
|
| 24 |
+
- Parameters of Training
|
| 25 |
+
```
|
| 26 |
+
{
|
| 27 |
+
"epochs": 3 (setting 10 but early stopped),
|
| 28 |
+
"batch_size":32,
|
| 29 |
+
"optimizer_class": "<keras.optimizer_v2.adam.Adam'>",
|
| 30 |
+
"optimizer_params": {
|
| 31 |
+
"lr": 5e-05
|
| 32 |
+
},
|
| 33 |
+
"min_delta": 0.01
|
| 34 |
+
}
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
## Usage
|
| 38 |
+
``` python
|
| 39 |
+
from transformers import RobertaTokenizerFast, RobertaForSequenceClassification, TextClassificationPipeline
|
| 40 |
+
|
| 41 |
+
# Load fine-tuned model by HuggingFace Model Hub
|
| 42 |
+
HUGGINGFACE_MODEL_PATH = "bespin-global/klue-roberta-small-3i4k-intent-classification"
|
| 43 |
+
loaded_tokenizer = RobertaTokenizerFast.from_pretrained(HUGGINGFACE_MODEL_PATH )
|
| 44 |
+
loaded_model = RobertaForSequenceClassification.from_pretrained(HUGGINGFACE_MODEL_PATH )
|
| 45 |
+
|
| 46 |
+
# using Pipeline
|
| 47 |
+
text_classifier = TextClassificationPipeline(
|
| 48 |
+
tokenizer=loaded_tokenizer,
|
| 49 |
+
model=loaded_model,
|
| 50 |
+
return_all_scores=True
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
# predict
|
| 54 |
+
text = "your text"
|
| 55 |
+
|
| 56 |
+
preds_list = text_classifier(text)
|
| 57 |
+
best_pred = preds_list[0]
|
| 58 |
+
print(f"Label of Best Intentatioin: {best_pred['label']}")
|
| 59 |
+
print(f"Score of Best Intentatioin: {best_pred['score']}")
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
## Evaluation
|
| 63 |
+
```
|
| 64 |
+
precision recall f1-score support
|
| 65 |
+
|
| 66 |
+
command 0.89 0.92 0.90 1296
|
| 67 |
+
fragment 0.98 0.96 0.97 600
|
| 68 |
+
intonation-depedent utterance 0.71 0.69 0.70 327
|
| 69 |
+
question 0.95 0.97 0.96 1786
|
| 70 |
+
rhetorical command 0.87 0.64 0.74 108
|
| 71 |
+
rhetorical question 0.61 0.63 0.62 174
|
| 72 |
+
statement 0.91 0.89 0.90 1830
|
| 73 |
+
|
| 74 |
+
accuracy 0.90 6121
|
| 75 |
+
macro avg 0.85 0.81 0.83 6121
|
| 76 |
+
weighted avg 0.90 0.90 0.90 6121
|
| 77 |
+
```
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
## Citing & Authors
|
| 81 |
+
<!--- Describe where people can find more information -->
|
| 82 |
+
[Jaehyeong](https://huggingface.co/jaehyeong) at [Bespin Global](https://www.bespinglobal.com/)
|
BespinGlobal/klue-roberta-small-3i4k-intent-classification/config.json
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "klue/roberta-small",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"RobertaForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"bos_token_id": 0,
|
| 8 |
+
"classifier_dropout": null,
|
| 9 |
+
"eos_token_id": 2,
|
| 10 |
+
"gradient_checkpointing": false,
|
| 11 |
+
"hidden_act": "gelu",
|
| 12 |
+
"hidden_dropout_prob": 0.1,
|
| 13 |
+
"hidden_size": 768,
|
| 14 |
+
"id2label": {
|
| 15 |
+
"0": "fragment",
|
| 16 |
+
"1": "statement",
|
| 17 |
+
"2": "question",
|
| 18 |
+
"3": "command",
|
| 19 |
+
"4": "rhetorical question",
|
| 20 |
+
"5": "rhetorical command",
|
| 21 |
+
"6": "intonation-dependent utterance"
|
| 22 |
+
},
|
| 23 |
+
"initializer_range": 0.02,
|
| 24 |
+
"intermediate_size": 3072,
|
| 25 |
+
"label2id": {
|
| 26 |
+
"command": 3,
|
| 27 |
+
"fragment": 0,
|
| 28 |
+
"intonation-depedent utterance": 6,
|
| 29 |
+
"question": 2,
|
| 30 |
+
"rhetorical command": 5,
|
| 31 |
+
"rhetorical question": 4,
|
| 32 |
+
"statement": 1
|
| 33 |
+
},
|
| 34 |
+
"layer_norm_eps": 1e-05,
|
| 35 |
+
"max_position_embeddings": 514,
|
| 36 |
+
"model_type": "roberta",
|
| 37 |
+
"num_attention_heads": 12,
|
| 38 |
+
"num_hidden_layers": 6,
|
| 39 |
+
"pad_token_id": 1,
|
| 40 |
+
"position_embedding_type": "absolute",
|
| 41 |
+
"tokenizer_class": "BertTokenizer",
|
| 42 |
+
"transformers_version": "4.11.3",
|
| 43 |
+
"type_vocab_size": 1,
|
| 44 |
+
"use_cache": true,
|
| 45 |
+
"vocab_size": 32000
|
| 46 |
+
}
|
BespinGlobal/klue-roberta-small-3i4k-intent-classification/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1e42cbafd51dbfed362c9748fc2aaecd581afd93dc26f54b34c9320baa5eccf
|
| 3 |
+
size 272401784
|
BespinGlobal/klue-roberta-small-3i4k-intent-classification/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d20aa33599de15a3a0003242d2eacadcb477b2e6d119a3ea11e0bf412a3234c
|
| 3 |
+
size 272431767
|
BespinGlobal/klue-roberta-small-3i4k-intent-classification/special_tokens_map.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
|
BespinGlobal/klue-roberta-small-3i4k-intent-classification/tf_model.h5
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e56a64376750165a98d5f370632e7c329bb5169e7604b810c5fbf729ad764a0
|
| 3 |
+
size 272534680
|
BespinGlobal/klue-roberta-small-3i4k-intent-classification/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
BespinGlobal/klue-roberta-small-3i4k-intent-classification/tokenizer_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"unk_token": "[UNK]", "bos_token": "[CLS]", "eos_token": "[SEP]", "add_prefix_space": false, "errors": "replace", "sep_token": "[SEP]", "cls_token": "[CLS]", "pad_token": "[PAD]", "mask_token": "[MASK]", "do_lower_case": false, "do_basic_tokenize": true, "never_split": null, "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": "/home/jupyter/.cache/huggingface/transformers/9ce71a5afff600bb47488785ec31125c4a485302e21d660291b10925f8bfcb67.70c17d6e4d492c8f24f5bb97ab56c7f272e947112c6faf9dd846da42ba13eb23", "name_or_path": "klue/roberta-small", "tokenizer_class": "RobertaTokenizer"}
|
BespinGlobal/klue-roberta-small-3i4k-intent-classification/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Falconsai/DATASETS/customer_intents/.gitattributes
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.lz4 filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
# Audio files - uncompressed
|
| 38 |
+
*.pcm filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
*.sam filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
*.raw filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
# Audio files - compressed
|
| 42 |
+
*.aac filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
*.flac filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
*.mp3 filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
*.ogg filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
# Image files - uncompressed
|
| 48 |
+
*.bmp filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
*.gif filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
*.tiff filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
# Image files - compressed
|
| 53 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
*.webp filter=lfs diff=lfs merge=lfs -text
|
Falconsai/DATASETS/customer_intents/dataset_dict.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"default": {
|
| 2 |
+
"description": "Dataset of labeled customer comments or questions",
|
| 3 |
+
"citation": "",
|
| 4 |
+
"homepage": "https://huggingface.co/datasets/Falconsai/customer_intents",
|
| 5 |
+
"license": "",
|
| 6 |
+
"features": {
|
| 7 |
+
"label": {
|
| 8 |
+
"dtype": "string",
|
| 9 |
+
"_type": "Value"
|
| 10 |
+
},
|
| 11 |
+
"text": {
|
| 12 |
+
"dtype": "string",
|
| 13 |
+
"_type": "Value"
|
| 14 |
+
}
|
| 15 |
+
},
|
| 16 |
+
"splits": {
|
| 17 |
+
"train": {
|
| 18 |
+
"name": "train",
|
| 19 |
+
"num_examples": 27856,
|
| 20 |
+
"dataset_name": null
|
| 21 |
+
}
|
| 22 |
+
}
|
| 23 |
+
}}
|
Falconsai/DATASETS/customer_intents/train/data-00000-of-00001.arrow
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8bbefd2f15df40746ab0a133b805e455b3376431c90b1956f932d0b431fab1c
|
| 3 |
+
size 1950648
|
Falconsai/DATASETS/customer_intents/train/dataset_info.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"citation": "",
|
| 3 |
+
"description": "",
|
| 4 |
+
"features": {
|
| 5 |
+
"label": {
|
| 6 |
+
"dtype": "string",
|
| 7 |
+
"_type": "Value"
|
| 8 |
+
},
|
| 9 |
+
"text": {
|
| 10 |
+
"dtype": "string",
|
| 11 |
+
"_type": "Value"
|
| 12 |
+
}
|
| 13 |
+
},
|
| 14 |
+
"homepage": "",
|
| 15 |
+
"license": ""
|
| 16 |
+
}
|
Falconsai/DATASETS/customer_intents/train/state.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_data_files": [
|
| 3 |
+
{
|
| 4 |
+
"filename": "data-00000-of-00001.arrow"
|
| 5 |
+
}
|
| 6 |
+
],
|
| 7 |
+
"_fingerprint": "d73dee637232af5e",
|
| 8 |
+
"_format_columns": null,
|
| 9 |
+
"_format_kwargs": {},
|
| 10 |
+
"_format_type": null,
|
| 11 |
+
"_output_all_columns": false,
|
| 12 |
+
"_split": null
|
| 13 |
+
}
|
Falconsai/arc_of_conversation/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
Falconsai/arc_of_conversation/README.md
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language:
|
| 3 |
+
- en
|
| 4 |
+
license: apache-2.0
|
| 5 |
+
tags:
|
| 6 |
+
- NLP
|
| 7 |
+
pipeline_tag: summarization
|
| 8 |
+
widget:
|
| 9 |
+
- text: ' Moderator: Welcome, everyone, to this exciting panel discussion. Today,
|
| 10 |
+
we have Elon Musk and Sam Altman, two of the most influential figures in the tech
|
| 11 |
+
industry. We’re here to discuss the future of artificial intelligence and its
|
| 12 |
+
impact on society. Elon, Sam, thank you for joining us. Elon Musk: Happy to be
|
| 13 |
+
here. Sam Altman: Looking forward to the discussion. Moderator: Let’s dive right
|
| 14 |
+
in. Elon, you’ve been very vocal about your concerns regarding AI. Could you elaborate
|
| 15 |
+
on why you believe AI poses such a significant risk to humanity? Elon Musk: Certainly.
|
| 16 |
+
AI has the potential to become more intelligent than humans, which could be extremely
|
| 17 |
+
dangerous if it goes unchecked. The existential threat is real. If we don’t implement
|
| 18 |
+
strict regulations and oversight, we risk creating something that could outsmart
|
| 19 |
+
us and act against our interests. It’s a ticking time bomb. Sam Altman: I respect
|
| 20 |
+
Elon’s concerns, but I think he’s overestimating the threat. The focus should
|
| 21 |
+
be on leveraging AI to solve some of humanity’s biggest problems. With proper
|
| 22 |
+
ethical frameworks and robust safety measures, we can ensure AI benefits everyone.
|
| 23 |
+
The fear-mongering is unproductive and could hinder technological progress. Elon
|
| 24 |
+
Musk: It’s not fear-mongering, Sam. It’s being cautious. We need to ensure that
|
| 25 |
+
we have control mechanisms in place. Without these, we’re playing with fire. You
|
| 26 |
+
can’t possibly believe that AI will always remain benevolent or under our control.
|
| 27 |
+
Sam Altman: Control mechanisms are essential, I agree, but what you’re suggesting
|
| 28 |
+
sounds like stifling innovation out of fear. We need a balanced approach. Overregulation
|
| 29 |
+
could slow down advancements that could otherwise save lives and improve quality
|
| 30 |
+
of life globally. We must foster innovation while ensuring safety, not let fear
|
| 31 |
+
dictate our actions. Elon Musk: Balancing innovation and safety is easier said
|
| 32 |
+
than done. When you’re dealing with something as unpredictable and powerful as
|
| 33 |
+
AI, the risks far outweigh the potential benefits if we don’t tread carefully.
|
| 34 |
+
History has shown us the dangers of underestimating new technologies. Sam Altman:
|
| 35 |
+
And history has also shown us the incredible benefits of technological advancement.
|
| 36 |
+
If we had been overly cautious, we might not have the medical, communication,
|
| 37 |
+
or energy technologies we have today. It’s about finding that middle ground where
|
| 38 |
+
innovation thrives safely. We can’t just halt progress because of hypothetical
|
| 39 |
+
risks. Elon Musk: It’s not hypothetical, Sam. Look at how quickly AI capabilities
|
| 40 |
+
are advancing. We’re already seeing issues with bias, decision-making, and unintended
|
| 41 |
+
consequences. Imagine this on a larger scale. We can’t afford to be complacent.
|
| 42 |
+
Sam Altman: Bias and unintended consequences are exactly why we need to invest
|
| 43 |
+
in research and development to address these issues head-on. By building AI responsibly
|
| 44 |
+
and learning from each iteration, we can mitigate these risks. Shutting down or
|
| 45 |
+
heavily regulating AI development out of fear isn’t the solution. Moderator: Both
|
| 46 |
+
of you make compelling points. Let’s fast forward a bit. Say, ten years from now,
|
| 47 |
+
we have stringent regulations in place, as Elon suggests, or a more flexible framework,
|
| 48 |
+
as Sam proposes. What does the world look like? Elon Musk: With stringent regulations,
|
| 49 |
+
we would have a more controlled and safer AI development environment. This would
|
| 50 |
+
prevent any catastrophic events and ensure that AI works for us, not against us.
|
| 51 |
+
We’d be able to avoid many potential disasters that an unchecked AI might cause.
|
| 52 |
+
Sam Altman: On the other hand, with a more flexible framework, we’d see rapid
|
| 53 |
+
advancements in AI applications across various sectors, from healthcare to education,
|
| 54 |
+
bringing significant improvements to quality of life and solving problems that
|
| 55 |
+
seem insurmountable today. The world would be a much better place with these innovations.
|
| 56 |
+
Moderator: And what if both of you are wrong? Elon Musk: Wrong? Sam Altman: How
|
| 57 |
+
so? Moderator: Suppose the future shows that neither stringent regulations nor
|
| 58 |
+
a flexible framework were the key factors. Instead, what if the major breakthroughs
|
| 59 |
+
and safety measures came from unexpected areas like quantum computing advancements
|
| 60 |
+
or new forms of human-computer symbiosis, rendering this entire debate moot? Elon
|
| 61 |
+
Musk: Well, that’s a possibility. If breakthroughs in quantum computing or other
|
| 62 |
+
technologies overshadow our current AI concerns, it could change the entire landscape.
|
| 63 |
+
It’s difficult to predict all variables. Sam Altman: Agreed. Technology often
|
| 64 |
+
takes unexpected turns. If future advancements make our current debate irrelevant,
|
| 65 |
+
it just goes to show how unpredictable and fast-moving the tech world is. The
|
| 66 |
+
key takeaway would be the importance of adaptability and continuous learning.
|
| 67 |
+
Moderator: Fascinating. It appears that the only certainty in the tech world is
|
| 68 |
+
uncertainty itself. Thank you both for this engaging discussion.'
|
| 69 |
+
example_title: Sample 1
|
| 70 |
+
---
|
| 71 |
+
# Arc of the Conversation Model
|
| 72 |
+
## Model Details
|
| 73 |
+
|
| 74 |
+
- **Model Name:** arc_of_conversation
|
| 75 |
+
- **Model Type:** Fine-tuned `google/t5-small`
|
| 76 |
+
- **Language:** English
|
| 77 |
+
- **License:** MIT
|
| 78 |
+
|
| 79 |
+
## Overview
|
| 80 |
+
|
| 81 |
+
The Conversation Arc Predictor model is designed to predict the arc of a conversation given its text. It is based on the `google/t5-small` model, fine-tuned on a custom dataset of conversations and their corresponding arcs. This model can be used to analyze and categorize conversation texts into predefined arcs.
|
| 82 |
+
|
| 83 |
+
## Model Description
|
| 84 |
+
|
| 85 |
+
### Model Architecture
|
| 86 |
+
|
| 87 |
+
The base model architecture is T5 (Text-To-Text Transfer Transformer), which treats every NLP problem as a text-to-text problem. The specific version used here is `google/t5-small`, which has been fine-tuned to understand and predict conversation arcs.
|
| 88 |
+
|
| 89 |
+
### Fine-Tuning Data
|
| 90 |
+
|
| 91 |
+
The model was fine-tuned on a dataset consisting of conversation texts and their corresponding arcs. The dataset should be formatted in a CSV file with two columns: `conversation` and `arc`.
|
| 92 |
+
|
| 93 |
+
### Intended Use
|
| 94 |
+
|
| 95 |
+
The model is intended for categorizing the arc of conversation texts. It can be useful for applications in customer service, chatbots, conversational analysis, and other areas where understanding the flow of a conversation is important.
|
| 96 |
+
|
| 97 |
+
## How to Use
|
| 98 |
+
|
| 99 |
+
### Inference
|
| 100 |
+
|
| 101 |
+
To use this model for inference, you need to load the fine-tuned model and tokenizer. Here is an example of how to do this using the `transformers` library:
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
Running Pipeline
|
| 105 |
+
```python
|
| 106 |
+
# Use a pipeline as a high-level helper
|
| 107 |
+
from transformers import pipeline
|
| 108 |
+
|
| 109 |
+
convo1 = 'Your conversation text here.'
|
| 110 |
+
pipe = pipeline("summarization", model="Falconsai/arc_of_conversation")
|
| 111 |
+
res1 = pipe(convo1, max_length=1024, min_length=512, do_sample=False)
|
| 112 |
+
print(res1)
|
| 113 |
+
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
Running on CPU
|
| 119 |
+
```python
|
| 120 |
+
# Load model directly
|
| 121 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 122 |
+
|
| 123 |
+
tokenizer = AutoTokenizer.from_pretrained("Falconsai/arc_of_conversation")
|
| 124 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("Falconsai/arc_of_conversation")
|
| 125 |
+
|
| 126 |
+
input_text = "Your conversation Here"
|
| 127 |
+
input_ids = tokenizer(input_text, return_tensors="pt").input_ids
|
| 128 |
+
|
| 129 |
+
outputs = model.generate(input_ids)
|
| 130 |
+
print(tokenizer.decode(outputs[0]))
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
Running on GPU
|
| 134 |
+
```python
|
| 135 |
+
# pip install accelerate
|
| 136 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 137 |
+
|
| 138 |
+
tokenizer = AutoTokenizer.from_pretrained("Falconsai/arc_of_conversation")
|
| 139 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("Falconsai/arc_of_conversation", device_map="auto")
|
| 140 |
+
|
| 141 |
+
input_text = "Your conversation Here"
|
| 142 |
+
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
|
| 143 |
+
|
| 144 |
+
outputs = model.generate(input_ids)
|
| 145 |
+
print(tokenizer.decode(outputs[0]))
|
| 146 |
+
|
| 147 |
+
```
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
## Training
|
| 154 |
+
|
| 155 |
+
The training process involves the following steps:
|
| 156 |
+
|
| 157 |
+
1. **Load and Explore Data:** Load the dataset and perform initial exploration to understand the data distribution.
|
| 158 |
+
2. **Preprocess Data:** Tokenize the conversations and prepare them for the T5 model.
|
| 159 |
+
3. **Fine-Tune Model:** Fine-tune the `google/t5-small` model using the preprocessed data.
|
| 160 |
+
4. **Evaluate Model:** Evaluate the model's performance on a validation set to ensure it's learning correctly.
|
| 161 |
+
5. **Save Model:** Save the fine-tuned model for future use.
|
| 162 |
+
|
| 163 |
+
## Evaluation
|
| 164 |
+
|
| 165 |
+
The model's performance should be evaluated on a separate validation set to ensure it accurately predicts the conversation arcs. Metrics such as accuracy, precision, recall, and F1 score can be used to assess its performance.
|
| 166 |
+
|
| 167 |
+
## Limitations
|
| 168 |
+
|
| 169 |
+
- **Data Dependency:** The model's performance is highly dependent on the quality and representativeness of the training data.
|
| 170 |
+
- **Generalization:** The model may not generalize well to conversation texts that are significantly different from the training data.
|
| 171 |
+
|
| 172 |
+
## Ethical Considerations
|
| 173 |
+
|
| 174 |
+
When deploying the model, be mindful of the ethical implications, including but not limited to:
|
| 175 |
+
|
| 176 |
+
- **Privacy:** Ensure that conversation data used for training and inference does not contain sensitive or personally identifiable information.
|
| 177 |
+
- **Bias:** Be aware of potential biases in the training data that could affect the model's predictions.
|
| 178 |
+
|
| 179 |
+
## License
|
| 180 |
+
|
| 181 |
+
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
|
| 182 |
+
|
| 183 |
+
## Citation
|
| 184 |
+
|
| 185 |
+
If you use this model in your research, please cite it as follows:
|
| 186 |
+
|
| 187 |
+
```
|
| 188 |
+
@misc{conversation_arc_predictor,
|
| 189 |
+
author = {Michael Stattelman},
|
| 190 |
+
title = {Arc of the Conversation Generator},
|
| 191 |
+
year = {2024},
|
| 192 |
+
publisher = {Falcons.ai},
|
| 193 |
+
}
|
| 194 |
+
```
|
| 195 |
+
|
| 196 |
+
---
|
Falconsai/arc_of_conversation/config.json
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "Falconsai/arc_of_conversation",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"T5ForConditionalGeneration"
|
| 5 |
+
],
|
| 6 |
+
"classifier_dropout": 0.0,
|
| 7 |
+
"d_ff": 2048,
|
| 8 |
+
"d_kv": 64,
|
| 9 |
+
"d_model": 512,
|
| 10 |
+
"decoder_start_token_id": 0,
|
| 11 |
+
"dense_act_fn": "relu",
|
| 12 |
+
"dropout_rate": 0.1,
|
| 13 |
+
"eos_token_id": 1,
|
| 14 |
+
"feed_forward_proj": "relu",
|
| 15 |
+
"initializer_factor": 1.0,
|
| 16 |
+
"is_encoder_decoder": true,
|
| 17 |
+
"is_gated_act": false,
|
| 18 |
+
"layer_norm_epsilon": 1e-06,
|
| 19 |
+
"model_type": "t5",
|
| 20 |
+
"n_positions": 512,
|
| 21 |
+
"num_decoder_layers": 6,
|
| 22 |
+
"num_heads": 8,
|
| 23 |
+
"num_layers": 6,
|
| 24 |
+
"output_past": true,
|
| 25 |
+
"pad_token_id": 0,
|
| 26 |
+
"relative_attention_max_distance": 128,
|
| 27 |
+
"relative_attention_num_buckets": 32,
|
| 28 |
+
"task_specific_params": {
|
| 29 |
+
"summarization": {
|
| 30 |
+
"early_stopping": true,
|
| 31 |
+
"length_penalty": 2.0,
|
| 32 |
+
"max_length": 200,
|
| 33 |
+
"min_length": 30,
|
| 34 |
+
"no_repeat_ngram_size": 3,
|
| 35 |
+
"num_beams": 4,
|
| 36 |
+
"prefix": "summarize: "
|
| 37 |
+
},
|
| 38 |
+
"translation_en_to_de": {
|
| 39 |
+
"early_stopping": true,
|
| 40 |
+
"max_length": 300,
|
| 41 |
+
"num_beams": 4,
|
| 42 |
+
"prefix": "translate English to German: "
|
| 43 |
+
},
|
| 44 |
+
"translation_en_to_fr": {
|
| 45 |
+
"early_stopping": true,
|
| 46 |
+
"max_length": 300,
|
| 47 |
+
"num_beams": 4,
|
| 48 |
+
"prefix": "translate English to French: "
|
| 49 |
+
},
|
| 50 |
+
"translation_en_to_ro": {
|
| 51 |
+
"early_stopping": true,
|
| 52 |
+
"max_length": 300,
|
| 53 |
+
"num_beams": 4,
|
| 54 |
+
"prefix": "translate English to Romanian: "
|
| 55 |
+
}
|
| 56 |
+
},
|
| 57 |
+
"torch_dtype": "float32",
|
| 58 |
+
"transformers_version": "4.41.2",
|
| 59 |
+
"use_cache": true,
|
| 60 |
+
"vocab_size": 32128
|
| 61 |
+
}
|
Falconsai/arc_of_conversation/generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"decoder_start_token_id": 0,
|
| 3 |
+
"eos_token_id": 1,
|
| 4 |
+
"pad_token_id": 0,
|
| 5 |
+
"transformers_version": "4.41.2"
|
| 6 |
+
}
|
Falconsai/arc_of_conversation/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0535b99dfbe93161243de8981dd28720918d83d9ad00182d0ed052943594a6b9
|
| 3 |
+
size 242041896
|
Falconsai/arc_of_conversation/special_tokens_map.json
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<extra_id_0>",
|
| 4 |
+
"<extra_id_1>",
|
| 5 |
+
"<extra_id_2>",
|
| 6 |
+
"<extra_id_3>",
|
| 7 |
+
"<extra_id_4>",
|
| 8 |
+
"<extra_id_5>",
|
| 9 |
+
"<extra_id_6>",
|
| 10 |
+
"<extra_id_7>",
|
| 11 |
+
"<extra_id_8>",
|
| 12 |
+
"<extra_id_9>",
|
| 13 |
+
"<extra_id_10>",
|
| 14 |
+
"<extra_id_11>",
|
| 15 |
+
"<extra_id_12>",
|
| 16 |
+
"<extra_id_13>",
|
| 17 |
+
"<extra_id_14>",
|
| 18 |
+
"<extra_id_15>",
|
| 19 |
+
"<extra_id_16>",
|
| 20 |
+
"<extra_id_17>",
|
| 21 |
+
"<extra_id_18>",
|
| 22 |
+
"<extra_id_19>",
|
| 23 |
+
"<extra_id_20>",
|
| 24 |
+
"<extra_id_21>",
|
| 25 |
+
"<extra_id_22>",
|
| 26 |
+
"<extra_id_23>",
|
| 27 |
+
"<extra_id_24>",
|
| 28 |
+
"<extra_id_25>",
|
| 29 |
+
"<extra_id_26>",
|
| 30 |
+
"<extra_id_27>",
|
| 31 |
+
"<extra_id_28>",
|
| 32 |
+
"<extra_id_29>",
|
| 33 |
+
"<extra_id_30>",
|
| 34 |
+
"<extra_id_31>",
|
| 35 |
+
"<extra_id_32>",
|
| 36 |
+
"<extra_id_33>",
|
| 37 |
+
"<extra_id_34>",
|
| 38 |
+
"<extra_id_35>",
|
| 39 |
+
"<extra_id_36>",
|
| 40 |
+
"<extra_id_37>",
|
| 41 |
+
"<extra_id_38>",
|
| 42 |
+
"<extra_id_39>",
|
| 43 |
+
"<extra_id_40>",
|
| 44 |
+
"<extra_id_41>",
|
| 45 |
+
"<extra_id_42>",
|
| 46 |
+
"<extra_id_43>",
|
| 47 |
+
"<extra_id_44>",
|
| 48 |
+
"<extra_id_45>",
|
| 49 |
+
"<extra_id_46>",
|
| 50 |
+
"<extra_id_47>",
|
| 51 |
+
"<extra_id_48>",
|
| 52 |
+
"<extra_id_49>",
|
| 53 |
+
"<extra_id_50>",
|
| 54 |
+
"<extra_id_51>",
|
| 55 |
+
"<extra_id_52>",
|
| 56 |
+
"<extra_id_53>",
|
| 57 |
+
"<extra_id_54>",
|
| 58 |
+
"<extra_id_55>",
|
| 59 |
+
"<extra_id_56>",
|
| 60 |
+
"<extra_id_57>",
|
| 61 |
+
"<extra_id_58>",
|
| 62 |
+
"<extra_id_59>",
|
| 63 |
+
"<extra_id_60>",
|
| 64 |
+
"<extra_id_61>",
|
| 65 |
+
"<extra_id_62>",
|
| 66 |
+
"<extra_id_63>",
|
| 67 |
+
"<extra_id_64>",
|
| 68 |
+
"<extra_id_65>",
|
| 69 |
+
"<extra_id_66>",
|
| 70 |
+
"<extra_id_67>",
|
| 71 |
+
"<extra_id_68>",
|
| 72 |
+
"<extra_id_69>",
|
| 73 |
+
"<extra_id_70>",
|
| 74 |
+
"<extra_id_71>",
|
| 75 |
+
"<extra_id_72>",
|
| 76 |
+
"<extra_id_73>",
|
| 77 |
+
"<extra_id_74>",
|
| 78 |
+
"<extra_id_75>",
|
| 79 |
+
"<extra_id_76>",
|
| 80 |
+
"<extra_id_77>",
|
| 81 |
+
"<extra_id_78>",
|
| 82 |
+
"<extra_id_79>",
|
| 83 |
+
"<extra_id_80>",
|
| 84 |
+
"<extra_id_81>",
|
| 85 |
+
"<extra_id_82>",
|
| 86 |
+
"<extra_id_83>",
|
| 87 |
+
"<extra_id_84>",
|
| 88 |
+
"<extra_id_85>",
|
| 89 |
+
"<extra_id_86>",
|
| 90 |
+
"<extra_id_87>",
|
| 91 |
+
"<extra_id_88>",
|
| 92 |
+
"<extra_id_89>",
|
| 93 |
+
"<extra_id_90>",
|
| 94 |
+
"<extra_id_91>",
|
| 95 |
+
"<extra_id_92>",
|
| 96 |
+
"<extra_id_93>",
|
| 97 |
+
"<extra_id_94>",
|
| 98 |
+
"<extra_id_95>",
|
| 99 |
+
"<extra_id_96>",
|
| 100 |
+
"<extra_id_97>",
|
| 101 |
+
"<extra_id_98>",
|
| 102 |
+
"<extra_id_99>"
|
| 103 |
+
],
|
| 104 |
+
"eos_token": {
|
| 105 |
+
"content": "</s>",
|
| 106 |
+
"lstrip": false,
|
| 107 |
+
"normalized": false,
|
| 108 |
+
"rstrip": false,
|
| 109 |
+
"single_word": false
|
| 110 |
+
},
|
| 111 |
+
"pad_token": {
|
| 112 |
+
"content": "<pad>",
|
| 113 |
+
"lstrip": false,
|
| 114 |
+
"normalized": false,
|
| 115 |
+
"rstrip": false,
|
| 116 |
+
"single_word": false
|
| 117 |
+
},
|
| 118 |
+
"unk_token": {
|
| 119 |
+
"content": "<unk>",
|
| 120 |
+
"lstrip": false,
|
| 121 |
+
"normalized": false,
|
| 122 |
+
"rstrip": false,
|
| 123 |
+
"single_word": false
|
| 124 |
+
}
|
| 125 |
+
}
|
Falconsai/arc_of_conversation/spiece.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
|
| 3 |
+
size 791656
|
Falconsai/arc_of_conversation/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Falconsai/arc_of_conversation/tokenizer_config.json
ADDED
|
@@ -0,0 +1,942 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<pad>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "</s>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "<unk>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"32000": {
|
| 28 |
+
"content": "<extra_id_99>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"32001": {
|
| 36 |
+
"content": "<extra_id_98>",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
},
|
| 43 |
+
"32002": {
|
| 44 |
+
"content": "<extra_id_97>",
|
| 45 |
+
"lstrip": false,
|
| 46 |
+
"normalized": false,
|
| 47 |
+
"rstrip": false,
|
| 48 |
+
"single_word": false,
|
| 49 |
+
"special": true
|
| 50 |
+
},
|
| 51 |
+
"32003": {
|
| 52 |
+
"content": "<extra_id_96>",
|
| 53 |
+
"lstrip": false,
|
| 54 |
+
"normalized": false,
|
| 55 |
+
"rstrip": false,
|
| 56 |
+
"single_word": false,
|
| 57 |
+
"special": true
|
| 58 |
+
},
|
| 59 |
+
"32004": {
|
| 60 |
+
"content": "<extra_id_95>",
|
| 61 |
+
"lstrip": false,
|
| 62 |
+
"normalized": false,
|
| 63 |
+
"rstrip": false,
|
| 64 |
+
"single_word": false,
|
| 65 |
+
"special": true
|
| 66 |
+
},
|
| 67 |
+
"32005": {
|
| 68 |
+
"content": "<extra_id_94>",
|
| 69 |
+
"lstrip": false,
|
| 70 |
+
"normalized": false,
|
| 71 |
+
"rstrip": false,
|
| 72 |
+
"single_word": false,
|
| 73 |
+
"special": true
|
| 74 |
+
},
|
| 75 |
+
"32006": {
|
| 76 |
+
"content": "<extra_id_93>",
|
| 77 |
+
"lstrip": false,
|
| 78 |
+
"normalized": false,
|
| 79 |
+
"rstrip": false,
|
| 80 |
+
"single_word": false,
|
| 81 |
+
"special": true
|
| 82 |
+
},
|
| 83 |
+
"32007": {
|
| 84 |
+
"content": "<extra_id_92>",
|
| 85 |
+
"lstrip": false,
|
| 86 |
+
"normalized": false,
|
| 87 |
+
"rstrip": false,
|
| 88 |
+
"single_word": false,
|
| 89 |
+
"special": true
|
| 90 |
+
},
|
| 91 |
+
"32008": {
|
| 92 |
+
"content": "<extra_id_91>",
|
| 93 |
+
"lstrip": false,
|
| 94 |
+
"normalized": false,
|
| 95 |
+
"rstrip": false,
|
| 96 |
+
"single_word": false,
|
| 97 |
+
"special": true
|
| 98 |
+
},
|
| 99 |
+
"32009": {
|
| 100 |
+
"content": "<extra_id_90>",
|
| 101 |
+
"lstrip": false,
|
| 102 |
+
"normalized": false,
|
| 103 |
+
"rstrip": false,
|
| 104 |
+
"single_word": false,
|
| 105 |
+
"special": true
|
| 106 |
+
},
|
| 107 |
+
"32010": {
|
| 108 |
+
"content": "<extra_id_89>",
|
| 109 |
+
"lstrip": false,
|
| 110 |
+
"normalized": false,
|
| 111 |
+
"rstrip": false,
|
| 112 |
+
"single_word": false,
|
| 113 |
+
"special": true
|
| 114 |
+
},
|
| 115 |
+
"32011": {
|
| 116 |
+
"content": "<extra_id_88>",
|
| 117 |
+
"lstrip": false,
|
| 118 |
+
"normalized": false,
|
| 119 |
+
"rstrip": false,
|
| 120 |
+
"single_word": false,
|
| 121 |
+
"special": true
|
| 122 |
+
},
|
| 123 |
+
"32012": {
|
| 124 |
+
"content": "<extra_id_87>",
|
| 125 |
+
"lstrip": false,
|
| 126 |
+
"normalized": false,
|
| 127 |
+
"rstrip": false,
|
| 128 |
+
"single_word": false,
|
| 129 |
+
"special": true
|
| 130 |
+
},
|
| 131 |
+
"32013": {
|
| 132 |
+
"content": "<extra_id_86>",
|
| 133 |
+
"lstrip": false,
|
| 134 |
+
"normalized": false,
|
| 135 |
+
"rstrip": false,
|
| 136 |
+
"single_word": false,
|
| 137 |
+
"special": true
|
| 138 |
+
},
|
| 139 |
+
"32014": {
|
| 140 |
+
"content": "<extra_id_85>",
|
| 141 |
+
"lstrip": false,
|
| 142 |
+
"normalized": false,
|
| 143 |
+
"rstrip": false,
|
| 144 |
+
"single_word": false,
|
| 145 |
+
"special": true
|
| 146 |
+
},
|
| 147 |
+
"32015": {
|
| 148 |
+
"content": "<extra_id_84>",
|
| 149 |
+
"lstrip": false,
|
| 150 |
+
"normalized": false,
|
| 151 |
+
"rstrip": false,
|
| 152 |
+
"single_word": false,
|
| 153 |
+
"special": true
|
| 154 |
+
},
|
| 155 |
+
"32016": {
|
| 156 |
+
"content": "<extra_id_83>",
|
| 157 |
+
"lstrip": false,
|
| 158 |
+
"normalized": false,
|
| 159 |
+
"rstrip": false,
|
| 160 |
+
"single_word": false,
|
| 161 |
+
"special": true
|
| 162 |
+
},
|
| 163 |
+
"32017": {
|
| 164 |
+
"content": "<extra_id_82>",
|
| 165 |
+
"lstrip": false,
|
| 166 |
+
"normalized": false,
|
| 167 |
+
"rstrip": false,
|
| 168 |
+
"single_word": false,
|
| 169 |
+
"special": true
|
| 170 |
+
},
|
| 171 |
+
"32018": {
|
| 172 |
+
"content": "<extra_id_81>",
|
| 173 |
+
"lstrip": false,
|
| 174 |
+
"normalized": false,
|
| 175 |
+
"rstrip": false,
|
| 176 |
+
"single_word": false,
|
| 177 |
+
"special": true
|
| 178 |
+
},
|
| 179 |
+
"32019": {
|
| 180 |
+
"content": "<extra_id_80>",
|
| 181 |
+
"lstrip": false,
|
| 182 |
+
"normalized": false,
|
| 183 |
+
"rstrip": false,
|
| 184 |
+
"single_word": false,
|
| 185 |
+
"special": true
|
| 186 |
+
},
|
| 187 |
+
"32020": {
|
| 188 |
+
"content": "<extra_id_79>",
|
| 189 |
+
"lstrip": false,
|
| 190 |
+
"normalized": false,
|
| 191 |
+
"rstrip": false,
|
| 192 |
+
"single_word": false,
|
| 193 |
+
"special": true
|
| 194 |
+
},
|
| 195 |
+
"32021": {
|
| 196 |
+
"content": "<extra_id_78>",
|
| 197 |
+
"lstrip": false,
|
| 198 |
+
"normalized": false,
|
| 199 |
+
"rstrip": false,
|
| 200 |
+
"single_word": false,
|
| 201 |
+
"special": true
|
| 202 |
+
},
|
| 203 |
+
"32022": {
|
| 204 |
+
"content": "<extra_id_77>",
|
| 205 |
+
"lstrip": false,
|
| 206 |
+
"normalized": false,
|
| 207 |
+
"rstrip": false,
|
| 208 |
+
"single_word": false,
|
| 209 |
+
"special": true
|
| 210 |
+
},
|
| 211 |
+
"32023": {
|
| 212 |
+
"content": "<extra_id_76>",
|
| 213 |
+
"lstrip": false,
|
| 214 |
+
"normalized": false,
|
| 215 |
+
"rstrip": false,
|
| 216 |
+
"single_word": false,
|
| 217 |
+
"special": true
|
| 218 |
+
},
|
| 219 |
+
"32024": {
|
| 220 |
+
"content": "<extra_id_75>",
|
| 221 |
+
"lstrip": false,
|
| 222 |
+
"normalized": false,
|
| 223 |
+
"rstrip": false,
|
| 224 |
+
"single_word": false,
|
| 225 |
+
"special": true
|
| 226 |
+
},
|
| 227 |
+
"32025": {
|
| 228 |
+
"content": "<extra_id_74>",
|
| 229 |
+
"lstrip": false,
|
| 230 |
+
"normalized": false,
|
| 231 |
+
"rstrip": false,
|
| 232 |
+
"single_word": false,
|
| 233 |
+
"special": true
|
| 234 |
+
},
|
| 235 |
+
"32026": {
|
| 236 |
+
"content": "<extra_id_73>",
|
| 237 |
+
"lstrip": false,
|
| 238 |
+
"normalized": false,
|
| 239 |
+
"rstrip": false,
|
| 240 |
+
"single_word": false,
|
| 241 |
+
"special": true
|
| 242 |
+
},
|
| 243 |
+
"32027": {
|
| 244 |
+
"content": "<extra_id_72>",
|
| 245 |
+
"lstrip": false,
|
| 246 |
+
"normalized": false,
|
| 247 |
+
"rstrip": false,
|
| 248 |
+
"single_word": false,
|
| 249 |
+
"special": true
|
| 250 |
+
},
|
| 251 |
+
"32028": {
|
| 252 |
+
"content": "<extra_id_71>",
|
| 253 |
+
"lstrip": false,
|
| 254 |
+
"normalized": false,
|
| 255 |
+
"rstrip": false,
|
| 256 |
+
"single_word": false,
|
| 257 |
+
"special": true
|
| 258 |
+
},
|
| 259 |
+
"32029": {
|
| 260 |
+
"content": "<extra_id_70>",
|
| 261 |
+
"lstrip": false,
|
| 262 |
+
"normalized": false,
|
| 263 |
+
"rstrip": false,
|
| 264 |
+
"single_word": false,
|
| 265 |
+
"special": true
|
| 266 |
+
},
|
| 267 |
+
"32030": {
|
| 268 |
+
"content": "<extra_id_69>",
|
| 269 |
+
"lstrip": false,
|
| 270 |
+
"normalized": false,
|
| 271 |
+
"rstrip": false,
|
| 272 |
+
"single_word": false,
|
| 273 |
+
"special": true
|
| 274 |
+
},
|
| 275 |
+
"32031": {
|
| 276 |
+
"content": "<extra_id_68>",
|
| 277 |
+
"lstrip": false,
|
| 278 |
+
"normalized": false,
|
| 279 |
+
"rstrip": false,
|
| 280 |
+
"single_word": false,
|
| 281 |
+
"special": true
|
| 282 |
+
},
|
| 283 |
+
"32032": {
|
| 284 |
+
"content": "<extra_id_67>",
|
| 285 |
+
"lstrip": false,
|
| 286 |
+
"normalized": false,
|
| 287 |
+
"rstrip": false,
|
| 288 |
+
"single_word": false,
|
| 289 |
+
"special": true
|
| 290 |
+
},
|
| 291 |
+
"32033": {
|
| 292 |
+
"content": "<extra_id_66>",
|
| 293 |
+
"lstrip": false,
|
| 294 |
+
"normalized": false,
|
| 295 |
+
"rstrip": false,
|
| 296 |
+
"single_word": false,
|
| 297 |
+
"special": true
|
| 298 |
+
},
|
| 299 |
+
"32034": {
|
| 300 |
+
"content": "<extra_id_65>",
|
| 301 |
+
"lstrip": false,
|
| 302 |
+
"normalized": false,
|
| 303 |
+
"rstrip": false,
|
| 304 |
+
"single_word": false,
|
| 305 |
+
"special": true
|
| 306 |
+
},
|
| 307 |
+
"32035": {
|
| 308 |
+
"content": "<extra_id_64>",
|
| 309 |
+
"lstrip": false,
|
| 310 |
+
"normalized": false,
|
| 311 |
+
"rstrip": false,
|
| 312 |
+
"single_word": false,
|
| 313 |
+
"special": true
|
| 314 |
+
},
|
| 315 |
+
"32036": {
|
| 316 |
+
"content": "<extra_id_63>",
|
| 317 |
+
"lstrip": false,
|
| 318 |
+
"normalized": false,
|
| 319 |
+
"rstrip": false,
|
| 320 |
+
"single_word": false,
|
| 321 |
+
"special": true
|
| 322 |
+
},
|
| 323 |
+
"32037": {
|
| 324 |
+
"content": "<extra_id_62>",
|
| 325 |
+
"lstrip": false,
|
| 326 |
+
"normalized": false,
|
| 327 |
+
"rstrip": false,
|
| 328 |
+
"single_word": false,
|
| 329 |
+
"special": true
|
| 330 |
+
},
|
| 331 |
+
"32038": {
|
| 332 |
+
"content": "<extra_id_61>",
|
| 333 |
+
"lstrip": false,
|
| 334 |
+
"normalized": false,
|
| 335 |
+
"rstrip": false,
|
| 336 |
+
"single_word": false,
|
| 337 |
+
"special": true
|
| 338 |
+
},
|
| 339 |
+
"32039": {
|
| 340 |
+
"content": "<extra_id_60>",
|
| 341 |
+
"lstrip": false,
|
| 342 |
+
"normalized": false,
|
| 343 |
+
"rstrip": false,
|
| 344 |
+
"single_word": false,
|
| 345 |
+
"special": true
|
| 346 |
+
},
|
| 347 |
+
"32040": {
|
| 348 |
+
"content": "<extra_id_59>",
|
| 349 |
+
"lstrip": false,
|
| 350 |
+
"normalized": false,
|
| 351 |
+
"rstrip": false,
|
| 352 |
+
"single_word": false,
|
| 353 |
+
"special": true
|
| 354 |
+
},
|
| 355 |
+
"32041": {
|
| 356 |
+
"content": "<extra_id_58>",
|
| 357 |
+
"lstrip": false,
|
| 358 |
+
"normalized": false,
|
| 359 |
+
"rstrip": false,
|
| 360 |
+
"single_word": false,
|
| 361 |
+
"special": true
|
| 362 |
+
},
|
| 363 |
+
"32042": {
|
| 364 |
+
"content": "<extra_id_57>",
|
| 365 |
+
"lstrip": false,
|
| 366 |
+
"normalized": false,
|
| 367 |
+
"rstrip": false,
|
| 368 |
+
"single_word": false,
|
| 369 |
+
"special": true
|
| 370 |
+
},
|
| 371 |
+
"32043": {
|
| 372 |
+
"content": "<extra_id_56>",
|
| 373 |
+
"lstrip": false,
|
| 374 |
+
"normalized": false,
|
| 375 |
+
"rstrip": false,
|
| 376 |
+
"single_word": false,
|
| 377 |
+
"special": true
|
| 378 |
+
},
|
| 379 |
+
"32044": {
|
| 380 |
+
"content": "<extra_id_55>",
|
| 381 |
+
"lstrip": false,
|
| 382 |
+
"normalized": false,
|
| 383 |
+
"rstrip": false,
|
| 384 |
+
"single_word": false,
|
| 385 |
+
"special": true
|
| 386 |
+
},
|
| 387 |
+
"32045": {
|
| 388 |
+
"content": "<extra_id_54>",
|
| 389 |
+
"lstrip": false,
|
| 390 |
+
"normalized": false,
|
| 391 |
+
"rstrip": false,
|
| 392 |
+
"single_word": false,
|
| 393 |
+
"special": true
|
| 394 |
+
},
|
| 395 |
+
"32046": {
|
| 396 |
+
"content": "<extra_id_53>",
|
| 397 |
+
"lstrip": false,
|
| 398 |
+
"normalized": false,
|
| 399 |
+
"rstrip": false,
|
| 400 |
+
"single_word": false,
|
| 401 |
+
"special": true
|
| 402 |
+
},
|
| 403 |
+
"32047": {
|
| 404 |
+
"content": "<extra_id_52>",
|
| 405 |
+
"lstrip": false,
|
| 406 |
+
"normalized": false,
|
| 407 |
+
"rstrip": false,
|
| 408 |
+
"single_word": false,
|
| 409 |
+
"special": true
|
| 410 |
+
},
|
| 411 |
+
"32048": {
|
| 412 |
+
"content": "<extra_id_51>",
|
| 413 |
+
"lstrip": false,
|
| 414 |
+
"normalized": false,
|
| 415 |
+
"rstrip": false,
|
| 416 |
+
"single_word": false,
|
| 417 |
+
"special": true
|
| 418 |
+
},
|
| 419 |
+
"32049": {
|
| 420 |
+
"content": "<extra_id_50>",
|
| 421 |
+
"lstrip": false,
|
| 422 |
+
"normalized": false,
|
| 423 |
+
"rstrip": false,
|
| 424 |
+
"single_word": false,
|
| 425 |
+
"special": true
|
| 426 |
+
},
|
| 427 |
+
"32050": {
|
| 428 |
+
"content": "<extra_id_49>",
|
| 429 |
+
"lstrip": false,
|
| 430 |
+
"normalized": false,
|
| 431 |
+
"rstrip": false,
|
| 432 |
+
"single_word": false,
|
| 433 |
+
"special": true
|
| 434 |
+
},
|
| 435 |
+
"32051": {
|
| 436 |
+
"content": "<extra_id_48>",
|
| 437 |
+
"lstrip": false,
|
| 438 |
+
"normalized": false,
|
| 439 |
+
"rstrip": false,
|
| 440 |
+
"single_word": false,
|
| 441 |
+
"special": true
|
| 442 |
+
},
|
| 443 |
+
"32052": {
|
| 444 |
+
"content": "<extra_id_47>",
|
| 445 |
+
"lstrip": false,
|
| 446 |
+
"normalized": false,
|
| 447 |
+
"rstrip": false,
|
| 448 |
+
"single_word": false,
|
| 449 |
+
"special": true
|
| 450 |
+
},
|
| 451 |
+
"32053": {
|
| 452 |
+
"content": "<extra_id_46>",
|
| 453 |
+
"lstrip": false,
|
| 454 |
+
"normalized": false,
|
| 455 |
+
"rstrip": false,
|
| 456 |
+
"single_word": false,
|
| 457 |
+
"special": true
|
| 458 |
+
},
|
| 459 |
+
"32054": {
|
| 460 |
+
"content": "<extra_id_45>",
|
| 461 |
+
"lstrip": false,
|
| 462 |
+
"normalized": false,
|
| 463 |
+
"rstrip": false,
|
| 464 |
+
"single_word": false,
|
| 465 |
+
"special": true
|
| 466 |
+
},
|
| 467 |
+
"32055": {
|
| 468 |
+
"content": "<extra_id_44>",
|
| 469 |
+
"lstrip": false,
|
| 470 |
+
"normalized": false,
|
| 471 |
+
"rstrip": false,
|
| 472 |
+
"single_word": false,
|
| 473 |
+
"special": true
|
| 474 |
+
},
|
| 475 |
+
"32056": {
|
| 476 |
+
"content": "<extra_id_43>",
|
| 477 |
+
"lstrip": false,
|
| 478 |
+
"normalized": false,
|
| 479 |
+
"rstrip": false,
|
| 480 |
+
"single_word": false,
|
| 481 |
+
"special": true
|
| 482 |
+
},
|
| 483 |
+
"32057": {
|
| 484 |
+
"content": "<extra_id_42>",
|
| 485 |
+
"lstrip": false,
|
| 486 |
+
"normalized": false,
|
| 487 |
+
"rstrip": false,
|
| 488 |
+
"single_word": false,
|
| 489 |
+
"special": true
|
| 490 |
+
},
|
| 491 |
+
"32058": {
|
| 492 |
+
"content": "<extra_id_41>",
|
| 493 |
+
"lstrip": false,
|
| 494 |
+
"normalized": false,
|
| 495 |
+
"rstrip": false,
|
| 496 |
+
"single_word": false,
|
| 497 |
+
"special": true
|
| 498 |
+
},
|
| 499 |
+
"32059": {
|
| 500 |
+
"content": "<extra_id_40>",
|
| 501 |
+
"lstrip": false,
|
| 502 |
+
"normalized": false,
|
| 503 |
+
"rstrip": false,
|
| 504 |
+
"single_word": false,
|
| 505 |
+
"special": true
|
| 506 |
+
},
|
| 507 |
+
"32060": {
|
| 508 |
+
"content": "<extra_id_39>",
|
| 509 |
+
"lstrip": false,
|
| 510 |
+
"normalized": false,
|
| 511 |
+
"rstrip": false,
|
| 512 |
+
"single_word": false,
|
| 513 |
+
"special": true
|
| 514 |
+
},
|
| 515 |
+
"32061": {
|
| 516 |
+
"content": "<extra_id_38>",
|
| 517 |
+
"lstrip": false,
|
| 518 |
+
"normalized": false,
|
| 519 |
+
"rstrip": false,
|
| 520 |
+
"single_word": false,
|
| 521 |
+
"special": true
|
| 522 |
+
},
|
| 523 |
+
"32062": {
|
| 524 |
+
"content": "<extra_id_37>",
|
| 525 |
+
"lstrip": false,
|
| 526 |
+
"normalized": false,
|
| 527 |
+
"rstrip": false,
|
| 528 |
+
"single_word": false,
|
| 529 |
+
"special": true
|
| 530 |
+
},
|
| 531 |
+
"32063": {
|
| 532 |
+
"content": "<extra_id_36>",
|
| 533 |
+
"lstrip": false,
|
| 534 |
+
"normalized": false,
|
| 535 |
+
"rstrip": false,
|
| 536 |
+
"single_word": false,
|
| 537 |
+
"special": true
|
| 538 |
+
},
|
| 539 |
+
"32064": {
|
| 540 |
+
"content": "<extra_id_35>",
|
| 541 |
+
"lstrip": false,
|
| 542 |
+
"normalized": false,
|
| 543 |
+
"rstrip": false,
|
| 544 |
+
"single_word": false,
|
| 545 |
+
"special": true
|
| 546 |
+
},
|
| 547 |
+
"32065": {
|
| 548 |
+
"content": "<extra_id_34>",
|
| 549 |
+
"lstrip": false,
|
| 550 |
+
"normalized": false,
|
| 551 |
+
"rstrip": false,
|
| 552 |
+
"single_word": false,
|
| 553 |
+
"special": true
|
| 554 |
+
},
|
| 555 |
+
"32066": {
|
| 556 |
+
"content": "<extra_id_33>",
|
| 557 |
+
"lstrip": false,
|
| 558 |
+
"normalized": false,
|
| 559 |
+
"rstrip": false,
|
| 560 |
+
"single_word": false,
|
| 561 |
+
"special": true
|
| 562 |
+
},
|
| 563 |
+
"32067": {
|
| 564 |
+
"content": "<extra_id_32>",
|
| 565 |
+
"lstrip": false,
|
| 566 |
+
"normalized": false,
|
| 567 |
+
"rstrip": false,
|
| 568 |
+
"single_word": false,
|
| 569 |
+
"special": true
|
| 570 |
+
},
|
| 571 |
+
"32068": {
|
| 572 |
+
"content": "<extra_id_31>",
|
| 573 |
+
"lstrip": false,
|
| 574 |
+
"normalized": false,
|
| 575 |
+
"rstrip": false,
|
| 576 |
+
"single_word": false,
|
| 577 |
+
"special": true
|
| 578 |
+
},
|
| 579 |
+
"32069": {
|
| 580 |
+
"content": "<extra_id_30>",
|
| 581 |
+
"lstrip": false,
|
| 582 |
+
"normalized": false,
|
| 583 |
+
"rstrip": false,
|
| 584 |
+
"single_word": false,
|
| 585 |
+
"special": true
|
| 586 |
+
},
|
| 587 |
+
"32070": {
|
| 588 |
+
"content": "<extra_id_29>",
|
| 589 |
+
"lstrip": false,
|
| 590 |
+
"normalized": false,
|
| 591 |
+
"rstrip": false,
|
| 592 |
+
"single_word": false,
|
| 593 |
+
"special": true
|
| 594 |
+
},
|
| 595 |
+
"32071": {
|
| 596 |
+
"content": "<extra_id_28>",
|
| 597 |
+
"lstrip": false,
|
| 598 |
+
"normalized": false,
|
| 599 |
+
"rstrip": false,
|
| 600 |
+
"single_word": false,
|
| 601 |
+
"special": true
|
| 602 |
+
},
|
| 603 |
+
"32072": {
|
| 604 |
+
"content": "<extra_id_27>",
|
| 605 |
+
"lstrip": false,
|
| 606 |
+
"normalized": false,
|
| 607 |
+
"rstrip": false,
|
| 608 |
+
"single_word": false,
|
| 609 |
+
"special": true
|
| 610 |
+
},
|
| 611 |
+
"32073": {
|
| 612 |
+
"content": "<extra_id_26>",
|
| 613 |
+
"lstrip": false,
|
| 614 |
+
"normalized": false,
|
| 615 |
+
"rstrip": false,
|
| 616 |
+
"single_word": false,
|
| 617 |
+
"special": true
|
| 618 |
+
},
|
| 619 |
+
"32074": {
|
| 620 |
+
"content": "<extra_id_25>",
|
| 621 |
+
"lstrip": false,
|
| 622 |
+
"normalized": false,
|
| 623 |
+
"rstrip": false,
|
| 624 |
+
"single_word": false,
|
| 625 |
+
"special": true
|
| 626 |
+
},
|
| 627 |
+
"32075": {
|
| 628 |
+
"content": "<extra_id_24>",
|
| 629 |
+
"lstrip": false,
|
| 630 |
+
"normalized": false,
|
| 631 |
+
"rstrip": false,
|
| 632 |
+
"single_word": false,
|
| 633 |
+
"special": true
|
| 634 |
+
},
|
| 635 |
+
"32076": {
|
| 636 |
+
"content": "<extra_id_23>",
|
| 637 |
+
"lstrip": false,
|
| 638 |
+
"normalized": false,
|
| 639 |
+
"rstrip": false,
|
| 640 |
+
"single_word": false,
|
| 641 |
+
"special": true
|
| 642 |
+
},
|
| 643 |
+
"32077": {
|
| 644 |
+
"content": "<extra_id_22>",
|
| 645 |
+
"lstrip": false,
|
| 646 |
+
"normalized": false,
|
| 647 |
+
"rstrip": false,
|
| 648 |
+
"single_word": false,
|
| 649 |
+
"special": true
|
| 650 |
+
},
|
| 651 |
+
"32078": {
|
| 652 |
+
"content": "<extra_id_21>",
|
| 653 |
+
"lstrip": false,
|
| 654 |
+
"normalized": false,
|
| 655 |
+
"rstrip": false,
|
| 656 |
+
"single_word": false,
|
| 657 |
+
"special": true
|
| 658 |
+
},
|
| 659 |
+
"32079": {
|
| 660 |
+
"content": "<extra_id_20>",
|
| 661 |
+
"lstrip": false,
|
| 662 |
+
"normalized": false,
|
| 663 |
+
"rstrip": false,
|
| 664 |
+
"single_word": false,
|
| 665 |
+
"special": true
|
| 666 |
+
},
|
| 667 |
+
"32080": {
|
| 668 |
+
"content": "<extra_id_19>",
|
| 669 |
+
"lstrip": false,
|
| 670 |
+
"normalized": false,
|
| 671 |
+
"rstrip": false,
|
| 672 |
+
"single_word": false,
|
| 673 |
+
"special": true
|
| 674 |
+
},
|
| 675 |
+
"32081": {
|
| 676 |
+
"content": "<extra_id_18>",
|
| 677 |
+
"lstrip": false,
|
| 678 |
+
"normalized": false,
|
| 679 |
+
"rstrip": false,
|
| 680 |
+
"single_word": false,
|
| 681 |
+
"special": true
|
| 682 |
+
},
|
| 683 |
+
"32082": {
|
| 684 |
+
"content": "<extra_id_17>",
|
| 685 |
+
"lstrip": false,
|
| 686 |
+
"normalized": false,
|
| 687 |
+
"rstrip": false,
|
| 688 |
+
"single_word": false,
|
| 689 |
+
"special": true
|
| 690 |
+
},
|
| 691 |
+
"32083": {
|
| 692 |
+
"content": "<extra_id_16>",
|
| 693 |
+
"lstrip": false,
|
| 694 |
+
"normalized": false,
|
| 695 |
+
"rstrip": false,
|
| 696 |
+
"single_word": false,
|
| 697 |
+
"special": true
|
| 698 |
+
},
|
| 699 |
+
"32084": {
|
| 700 |
+
"content": "<extra_id_15>",
|
| 701 |
+
"lstrip": false,
|
| 702 |
+
"normalized": false,
|
| 703 |
+
"rstrip": false,
|
| 704 |
+
"single_word": false,
|
| 705 |
+
"special": true
|
| 706 |
+
},
|
| 707 |
+
"32085": {
|
| 708 |
+
"content": "<extra_id_14>",
|
| 709 |
+
"lstrip": false,
|
| 710 |
+
"normalized": false,
|
| 711 |
+
"rstrip": false,
|
| 712 |
+
"single_word": false,
|
| 713 |
+
"special": true
|
| 714 |
+
},
|
| 715 |
+
"32086": {
|
| 716 |
+
"content": "<extra_id_13>",
|
| 717 |
+
"lstrip": false,
|
| 718 |
+
"normalized": false,
|
| 719 |
+
"rstrip": false,
|
| 720 |
+
"single_word": false,
|
| 721 |
+
"special": true
|
| 722 |
+
},
|
| 723 |
+
"32087": {
|
| 724 |
+
"content": "<extra_id_12>",
|
| 725 |
+
"lstrip": false,
|
| 726 |
+
"normalized": false,
|
| 727 |
+
"rstrip": false,
|
| 728 |
+
"single_word": false,
|
| 729 |
+
"special": true
|
| 730 |
+
},
|
| 731 |
+
"32088": {
|
| 732 |
+
"content": "<extra_id_11>",
|
| 733 |
+
"lstrip": false,
|
| 734 |
+
"normalized": false,
|
| 735 |
+
"rstrip": false,
|
| 736 |
+
"single_word": false,
|
| 737 |
+
"special": true
|
| 738 |
+
},
|
| 739 |
+
"32089": {
|
| 740 |
+
"content": "<extra_id_10>",
|
| 741 |
+
"lstrip": false,
|
| 742 |
+
"normalized": false,
|
| 743 |
+
"rstrip": false,
|
| 744 |
+
"single_word": false,
|
| 745 |
+
"special": true
|
| 746 |
+
},
|
| 747 |
+
"32090": {
|
| 748 |
+
"content": "<extra_id_9>",
|
| 749 |
+
"lstrip": false,
|
| 750 |
+
"normalized": false,
|
| 751 |
+
"rstrip": false,
|
| 752 |
+
"single_word": false,
|
| 753 |
+
"special": true
|
| 754 |
+
},
|
| 755 |
+
"32091": {
|
| 756 |
+
"content": "<extra_id_8>",
|
| 757 |
+
"lstrip": false,
|
| 758 |
+
"normalized": false,
|
| 759 |
+
"rstrip": false,
|
| 760 |
+
"single_word": false,
|
| 761 |
+
"special": true
|
| 762 |
+
},
|
| 763 |
+
"32092": {
|
| 764 |
+
"content": "<extra_id_7>",
|
| 765 |
+
"lstrip": false,
|
| 766 |
+
"normalized": false,
|
| 767 |
+
"rstrip": false,
|
| 768 |
+
"single_word": false,
|
| 769 |
+
"special": true
|
| 770 |
+
},
|
| 771 |
+
"32093": {
|
| 772 |
+
"content": "<extra_id_6>",
|
| 773 |
+
"lstrip": false,
|
| 774 |
+
"normalized": false,
|
| 775 |
+
"rstrip": false,
|
| 776 |
+
"single_word": false,
|
| 777 |
+
"special": true
|
| 778 |
+
},
|
| 779 |
+
"32094": {
|
| 780 |
+
"content": "<extra_id_5>",
|
| 781 |
+
"lstrip": false,
|
| 782 |
+
"normalized": false,
|
| 783 |
+
"rstrip": false,
|
| 784 |
+
"single_word": false,
|
| 785 |
+
"special": true
|
| 786 |
+
},
|
| 787 |
+
"32095": {
|
| 788 |
+
"content": "<extra_id_4>",
|
| 789 |
+
"lstrip": false,
|
| 790 |
+
"normalized": false,
|
| 791 |
+
"rstrip": false,
|
| 792 |
+
"single_word": false,
|
| 793 |
+
"special": true
|
| 794 |
+
},
|
| 795 |
+
"32096": {
|
| 796 |
+
"content": "<extra_id_3>",
|
| 797 |
+
"lstrip": false,
|
| 798 |
+
"normalized": false,
|
| 799 |
+
"rstrip": false,
|
| 800 |
+
"single_word": false,
|
| 801 |
+
"special": true
|
| 802 |
+
},
|
| 803 |
+
"32097": {
|
| 804 |
+
"content": "<extra_id_2>",
|
| 805 |
+
"lstrip": false,
|
| 806 |
+
"normalized": false,
|
| 807 |
+
"rstrip": false,
|
| 808 |
+
"single_word": false,
|
| 809 |
+
"special": true
|
| 810 |
+
},
|
| 811 |
+
"32098": {
|
| 812 |
+
"content": "<extra_id_1>",
|
| 813 |
+
"lstrip": false,
|
| 814 |
+
"normalized": false,
|
| 815 |
+
"rstrip": false,
|
| 816 |
+
"single_word": false,
|
| 817 |
+
"special": true
|
| 818 |
+
},
|
| 819 |
+
"32099": {
|
| 820 |
+
"content": "<extra_id_0>",
|
| 821 |
+
"lstrip": false,
|
| 822 |
+
"normalized": false,
|
| 823 |
+
"rstrip": false,
|
| 824 |
+
"single_word": false,
|
| 825 |
+
"special": true
|
| 826 |
+
}
|
| 827 |
+
},
|
| 828 |
+
"additional_special_tokens": [
|
| 829 |
+
"<extra_id_0>",
|
| 830 |
+
"<extra_id_1>",
|
| 831 |
+
"<extra_id_2>",
|
| 832 |
+
"<extra_id_3>",
|
| 833 |
+
"<extra_id_4>",
|
| 834 |
+
"<extra_id_5>",
|
| 835 |
+
"<extra_id_6>",
|
| 836 |
+
"<extra_id_7>",
|
| 837 |
+
"<extra_id_8>",
|
| 838 |
+
"<extra_id_9>",
|
| 839 |
+
"<extra_id_10>",
|
| 840 |
+
"<extra_id_11>",
|
| 841 |
+
"<extra_id_12>",
|
| 842 |
+
"<extra_id_13>",
|
| 843 |
+
"<extra_id_14>",
|
| 844 |
+
"<extra_id_15>",
|
| 845 |
+
"<extra_id_16>",
|
| 846 |
+
"<extra_id_17>",
|
| 847 |
+
"<extra_id_18>",
|
| 848 |
+
"<extra_id_19>",
|
| 849 |
+
"<extra_id_20>",
|
| 850 |
+
"<extra_id_21>",
|
| 851 |
+
"<extra_id_22>",
|
| 852 |
+
"<extra_id_23>",
|
| 853 |
+
"<extra_id_24>",
|
| 854 |
+
"<extra_id_25>",
|
| 855 |
+
"<extra_id_26>",
|
| 856 |
+
"<extra_id_27>",
|
| 857 |
+
"<extra_id_28>",
|
| 858 |
+
"<extra_id_29>",
|
| 859 |
+
"<extra_id_30>",
|
| 860 |
+
"<extra_id_31>",
|
| 861 |
+
"<extra_id_32>",
|
| 862 |
+
"<extra_id_33>",
|
| 863 |
+
"<extra_id_34>",
|
| 864 |
+
"<extra_id_35>",
|
| 865 |
+
"<extra_id_36>",
|
| 866 |
+
"<extra_id_37>",
|
| 867 |
+
"<extra_id_38>",
|
| 868 |
+
"<extra_id_39>",
|
| 869 |
+
"<extra_id_40>",
|
| 870 |
+
"<extra_id_41>",
|
| 871 |
+
"<extra_id_42>",
|
| 872 |
+
"<extra_id_43>",
|
| 873 |
+
"<extra_id_44>",
|
| 874 |
+
"<extra_id_45>",
|
| 875 |
+
"<extra_id_46>",
|
| 876 |
+
"<extra_id_47>",
|
| 877 |
+
"<extra_id_48>",
|
| 878 |
+
"<extra_id_49>",
|
| 879 |
+
"<extra_id_50>",
|
| 880 |
+
"<extra_id_51>",
|
| 881 |
+
"<extra_id_52>",
|
| 882 |
+
"<extra_id_53>",
|
| 883 |
+
"<extra_id_54>",
|
| 884 |
+
"<extra_id_55>",
|
| 885 |
+
"<extra_id_56>",
|
| 886 |
+
"<extra_id_57>",
|
| 887 |
+
"<extra_id_58>",
|
| 888 |
+
"<extra_id_59>",
|
| 889 |
+
"<extra_id_60>",
|
| 890 |
+
"<extra_id_61>",
|
| 891 |
+
"<extra_id_62>",
|
| 892 |
+
"<extra_id_63>",
|
| 893 |
+
"<extra_id_64>",
|
| 894 |
+
"<extra_id_65>",
|
| 895 |
+
"<extra_id_66>",
|
| 896 |
+
"<extra_id_67>",
|
| 897 |
+
"<extra_id_68>",
|
| 898 |
+
"<extra_id_69>",
|
| 899 |
+
"<extra_id_70>",
|
| 900 |
+
"<extra_id_71>",
|
| 901 |
+
"<extra_id_72>",
|
| 902 |
+
"<extra_id_73>",
|
| 903 |
+
"<extra_id_74>",
|
| 904 |
+
"<extra_id_75>",
|
| 905 |
+
"<extra_id_76>",
|
| 906 |
+
"<extra_id_77>",
|
| 907 |
+
"<extra_id_78>",
|
| 908 |
+
"<extra_id_79>",
|
| 909 |
+
"<extra_id_80>",
|
| 910 |
+
"<extra_id_81>",
|
| 911 |
+
"<extra_id_82>",
|
| 912 |
+
"<extra_id_83>",
|
| 913 |
+
"<extra_id_84>",
|
| 914 |
+
"<extra_id_85>",
|
| 915 |
+
"<extra_id_86>",
|
| 916 |
+
"<extra_id_87>",
|
| 917 |
+
"<extra_id_88>",
|
| 918 |
+
"<extra_id_89>",
|
| 919 |
+
"<extra_id_90>",
|
| 920 |
+
"<extra_id_91>",
|
| 921 |
+
"<extra_id_92>",
|
| 922 |
+
"<extra_id_93>",
|
| 923 |
+
"<extra_id_94>",
|
| 924 |
+
"<extra_id_95>",
|
| 925 |
+
"<extra_id_96>",
|
| 926 |
+
"<extra_id_97>",
|
| 927 |
+
"<extra_id_98>",
|
| 928 |
+
"<extra_id_99>"
|
| 929 |
+
],
|
| 930 |
+
"clean_up_tokenization_spaces": true,
|
| 931 |
+
"eos_token": "</s>",
|
| 932 |
+
"extra_ids": 100,
|
| 933 |
+
"max_length": 2048,
|
| 934 |
+
"model_max_length": 512,
|
| 935 |
+
"pad_token": "<pad>",
|
| 936 |
+
"stride": 0,
|
| 937 |
+
"tokenizer_class": "T5Tokenizer",
|
| 938 |
+
"truncation": true,
|
| 939 |
+
"truncation_side": "right",
|
| 940 |
+
"truncation_strategy": "longest_first",
|
| 941 |
+
"unk_token": "<unk>"
|
| 942 |
+
}
|
Falconsai/fear_mongering_detection/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
Falconsai/fear_mongering_detection/README.md
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language: en
|
| 3 |
+
tags:
|
| 4 |
+
- text-classification
|
| 5 |
+
- distilbert
|
| 6 |
+
license: apache-2.0
|
| 7 |
+
widget:
|
| 8 |
+
- text: >-
|
| 9 |
+
A secret society is orchestrating a global experiment in emotional
|
| 10 |
+
manipulation, using mass media to incite fear and anxiety among the
|
| 11 |
+
population.
|
| 12 |
+
example_title: Fear Mongering
|
| 13 |
+
- text: >-
|
| 14 |
+
Each year, the Internal Revenue Service (IRS) determines the staffing level
|
| 15 |
+
for its toll-free telephone customer service operations. GAO found that IRS
|
| 16 |
+
lacks a long-term telephone customer service goal that reflects the needs of
|
| 17 |
+
taxpayers and the costs and benefits of meeting that goal. Rather, IRS
|
| 18 |
+
annually determines the level of funding it will seek for its customer
|
| 19 |
+
service workforce, using its judgment of how to best balance service and
|
| 20 |
+
compliance activities.
|
| 21 |
+
example_title: Normal Speech
|
| 22 |
+
---
|
| 23 |
+
# Model Card: Finetuned DistilBERT for Fear Mongering Detection
|
| 24 |
+
|
| 25 |
+
## Model Description
|
| 26 |
+
|
| 27 |
+
The **Fine-Tuned DistilBERT** is a variant of the BERT transformer model,
|
| 28 |
+
distilled for efficient performance while maintaining high accuracy.
|
| 29 |
+
It has been adapted and fine-tuned for the specific task of classifying user intent in text data.
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
### Definition
|
| 33 |
+
Fear Monger:
|
| 34 |
+
/ˈfɪrˌmʌŋ.ɡɚ/ to intentionally try to make people afraid of something when this is not necessary or reasonable.
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
The model, named "Falconsai/fear_mongering_detection" is pre-trained on a substantial amount of text data,
|
| 38 |
+
which allows it to capture semantic nuances and contextual information present in natural language text.
|
| 39 |
+
It has been fine-tuned with meticulous attention to hyperparameter settings, including batch size and learning rate, to ensure optimal model performance for the user intent classification task.
|
| 40 |
+
|
| 41 |
+
During the fine-tuning process, a batch size of 16 for efficient computation and learning was chosen.
|
| 42 |
+
Additionally, a learning rate (2e-5) was selected to strike a balance between rapid convergence and steady optimization,
|
| 43 |
+
ensuring the model not only learns quickly but also steadily refines its capabilities throughout training.
|
| 44 |
+
|
| 45 |
+
This model has been trained on a rather small dataset of under 50k, 100 epochs, specifically designed for "Fear Mongering Identification".
|
| 46 |
+
|
| 47 |
+
The goal of this meticulous training process is to equip the model with the ability to identify instances of Fear Mongering in text data effectively, making it ready to contribute to a wide range of applications involving human speech, text and generated content applications.
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
### How to Use
|
| 51 |
+
To use this model for user Fear Monger classification, you can follow these steps:
|
| 52 |
+
|
| 53 |
+
```markdown
|
| 54 |
+
from transformers import pipeline
|
| 55 |
+
|
| 56 |
+
statement = "The rise of smart cities is part of a covert plan to create a global surveillance network, where every move and action is monitored and controlled."
|
| 57 |
+
classifier = pipeline("text-classification", model="Falconsai/fear_mongering_detection")
|
| 58 |
+
classifier(statement)
|
| 59 |
+
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
## Model Details
|
| 63 |
+
|
| 64 |
+
- **Model Name:** Falconsai/fear_mongering_detection
|
| 65 |
+
- **Model Type:** Text Classification
|
| 66 |
+
- **Architecture:** DistilBERT-base-uncased
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
## Use Cases
|
| 70 |
+
|
| 71 |
+
### 1. Social Media Monitoring
|
| 72 |
+
|
| 73 |
+
- **Description:** The model can be applied to analyze social media posts and comments to identify instances of fear mongering. This can be useful for social media platforms to monitor and moderate content that may spread fear or misinformation.
|
| 74 |
+
|
| 75 |
+
### 2. News Article Analysis
|
| 76 |
+
|
| 77 |
+
- **Description:** The model can be utilized to analyze news articles and identify sections containing fear-mongering language. This can help media outlets and fact-checking organizations to assess the tone and potential bias in news reporting.
|
| 78 |
+
|
| 79 |
+
### 3. Content Moderation in Online Platforms
|
| 80 |
+
|
| 81 |
+
- **Description:** Online platforms and forums can deploy the model to automatically flag or filter out content that may be perceived as fear-mongering. This helps maintain a more positive and constructive online environment.
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
## Limitations
|
| 85 |
+
|
| 86 |
+
- **Domain Specificity:** The model's performance will be limited to the Identification of fear Mongering as this was the intent and may not generalize well to other contexts.
|
| 87 |
+
- **False Positives:** The model may occasionally misclassify non-fear-mongering text as fear-mongering. Users should be aware of this limitation.
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
## Responsible Usage
|
| 91 |
+
|
| 92 |
+
It is essential to use this model responsibly and ethically, adhering to content guidelines and applicable regulations when implementing it in real-world applications, particularly those involving potentially sensitive content.
|
| 93 |
+
|
| 94 |
+
## References
|
| 95 |
+
|
| 96 |
+
- [Hugging Face Model Hub](https://huggingface.co/models)
|
| 97 |
+
- [DistilBERT Paper](https://arxiv.org/abs/1910.01108)
|
| 98 |
+
|
| 99 |
+
**Disclaimer:** The model's performance may be influenced by the quality and representativeness of the data it was fine-tuned on. Users are encouraged to assess the model's suitability for their specific applications and datasets.
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
## Conclusion
|
| 103 |
+
|
| 104 |
+
This model card provides an overview of a fine-tuned DistilBERT model for fear mongering detection. Users are encouraged to consider the model's performance, limitations, and ethical considerations when applying it in different scenarios.
|
Falconsai/fear_mongering_detection/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "Falconsai/fear_mongering_detection",
|
| 3 |
+
"activation": "gelu",
|
| 4 |
+
"architectures": [
|
| 5 |
+
"DistilBertForSequenceClassification"
|
| 6 |
+
],
|
| 7 |
+
"attention_dropout": 0.1,
|
| 8 |
+
"dim": 768,
|
| 9 |
+
"dropout": 0.1,
|
| 10 |
+
"hidden_dim": 3072,
|
| 11 |
+
"id2label": {
|
| 12 |
+
"0": "Fear_Mongering",
|
| 13 |
+
"1": "Non_Fear_Mongering"
|
| 14 |
+
},
|
| 15 |
+
"initializer_range": 0.02,
|
| 16 |
+
"max_position_embeddings": 512,
|
| 17 |
+
"model_type": "distilbert",
|
| 18 |
+
"n_heads": 12,
|
| 19 |
+
"n_layers": 6,
|
| 20 |
+
"pad_token_id": 0,
|
| 21 |
+
"problem_type": "single_label_classification",
|
| 22 |
+
"qa_dropout": 0.1,
|
| 23 |
+
"seq_classif_dropout": 0.2,
|
| 24 |
+
"sinusoidal_pos_embds": false,
|
| 25 |
+
"tie_weights_": true,
|
| 26 |
+
"torch_dtype": "float32",
|
| 27 |
+
"transformers_version": "4.36.0.dev0",
|
| 28 |
+
"vocab_size": 30522
|
| 29 |
+
}
|
Falconsai/fear_mongering_detection/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9445136e7577532d34e133fba40304e56836bf0237a321ec0b5d9834ebfbde67
|
| 3 |
+
size 267832560
|
Falconsai/fear_mongering_detection/special_tokens_map.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"mask_token": "[MASK]",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"sep_token": "[SEP]",
|
| 6 |
+
"unk_token": "[UNK]"
|
| 7 |
+
}
|
Falconsai/fear_mongering_detection/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Falconsai/fear_mongering_detection/tokenizer_config.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"clean_up_tokenization_spaces": true,
|
| 45 |
+
"cls_token": "[CLS]",
|
| 46 |
+
"do_lower_case": true,
|
| 47 |
+
"mask_token": "[MASK]",
|
| 48 |
+
"model_max_length": 512,
|
| 49 |
+
"pad_token": "[PAD]",
|
| 50 |
+
"sep_token": "[SEP]",
|
| 51 |
+
"strip_accents": null,
|
| 52 |
+
"tokenize_chinese_chars": true,
|
| 53 |
+
"tokenizer_class": "DistilBertTokenizer",
|
| 54 |
+
"unk_token": "[UNK]"
|
| 55 |
+
}
|
Falconsai/fear_mongering_detection/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Falconsai/intent_classification/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
Falconsai/intent_classification/README.md
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
pipeline_tag: text-classification
|
| 4 |
+
language:
|
| 5 |
+
- en
|
| 6 |
+
widget:
|
| 7 |
+
- text: I ordered from you 2 weeks ago and its stil not here.
|
| 8 |
+
- text: I need to bring in my daughter for a checkup.
|
| 9 |
+
---
|
| 10 |
+
# Model Card: Fine-Tuned DistilBERT for User Intent Classification
|
| 11 |
+
|
| 12 |
+
## Model Description
|
| 13 |
+
|
| 14 |
+
The **Fine-Tuned DistilBERT** is a variant of the BERT transformer model,
|
| 15 |
+
distilled for efficient performance while maintaining high accuracy.
|
| 16 |
+
It has been adapted and fine-tuned for the specific task of classifying user intent in text data.
|
| 17 |
+
|
| 18 |
+
The model, named "distilbert-base-uncased," is pre-trained on a substantial amount of text data,
|
| 19 |
+
which allows it to capture semantic nuances and contextual information present in natural language text.
|
| 20 |
+
It has been fine-tuned with meticulous attention to hyperparameter settings, including batch size and learning rate, to ensure optimal model performance for the user intent classification task.
|
| 21 |
+
|
| 22 |
+
During the fine-tuning process, a batch size of 8 for efficient computation and learning was chosen.
|
| 23 |
+
Additionally, a learning rate (2e-5) was selected to strike a balance between rapid convergence and steady optimization,
|
| 24 |
+
ensuring the model not only learns quickly but also steadily refines its capabilities throughout training.
|
| 25 |
+
|
| 26 |
+
This model has been trained on a rather small dataset of under 50k, 100 epochs, specifically designed for user intent classification.
|
| 27 |
+
The dataset consists of text samples, each labeled with different user intents, such as "information seeking," "question asking," or "opinion expressing." The diversity within the dataset allowed the model to learn to identify user intent accurately. This dataset was carefully curated from a variety of sources.
|
| 28 |
+
|
| 29 |
+
The goal of this meticulous training process is to equip the model with the ability to classify user intent in text data effectively, making it ready to contribute to a wide range of applications involving user interaction analysis and personalization.
|
| 30 |
+
|
| 31 |
+
## Intended Uses & Limitations
|
| 32 |
+
|
| 33 |
+
### Intended Uses
|
| 34 |
+
- **User Intent Classification**: The primary intended use of this model is to classify user intent in text data. It is well-suited for applications that involve understanding user intentions, such as chatbots, virtual assistants, and recommendation systems.
|
| 35 |
+
|
| 36 |
+
### How to Use
|
| 37 |
+
To use this model for user intent classification, you can follow these steps:
|
| 38 |
+
|
| 39 |
+
```markdown
|
| 40 |
+
from transformers import pipeline
|
| 41 |
+
|
| 42 |
+
classifier = pipeline("text-classification", model="Falconsai/intent_classification")
|
| 43 |
+
text = "Your text to classify here."
|
| 44 |
+
result = classifier(text)
|
| 45 |
+
```
|
| 46 |
+
|
| 47 |
+
### Limitations
|
| 48 |
+
- **Specialized Task Fine-Tuning**: While the model excels at user intent classification, its performance may vary when applied to other natural language processing tasks. Users interested in employing this model for different tasks should explore fine-tuned versions available in the model hub for optimal results.
|
| 49 |
+
|
| 50 |
+
## Training Data
|
| 51 |
+
|
| 52 |
+
The model's training data includes a proprietary dataset designed for user intent classification. This dataset comprises a diverse collection of text samples, categorized into various user intent classes. The training process aimed to equip the model with the ability to classify user intent effectively.
|
| 53 |
+
|
| 54 |
+
### Training Stats
|
| 55 |
+
- Evaluation Loss: 0.011744413524866104
|
| 56 |
+
- Evaluation Accuracy: 0.9986976744186047
|
| 57 |
+
- Evaluation Runtime: 3.1136
|
| 58 |
+
- Evaluation Samples per Second: 1726.29
|
| 59 |
+
- Evaluation Steps per Second: 215.826
|
| 60 |
+
|
| 61 |
+
## Responsible Usage
|
| 62 |
+
|
| 63 |
+
It is essential to use this model responsibly and ethically, adhering to content guidelines and applicable regulations when implementing it in real-world applications, particularly those involving potentially sensitive content.
|
| 64 |
+
|
| 65 |
+
## References
|
| 66 |
+
|
| 67 |
+
- [Hugging Face Model Hub](https://huggingface.co/models)
|
| 68 |
+
- [DistilBERT Paper](https://arxiv.org/abs/1910.01108)
|
| 69 |
+
|
| 70 |
+
**Disclaimer:** The model's performance may be influenced by the quality and representativeness of the data it was fine-tuned on. Users are encouraged to assess the model's suitability for their specific applications and datasets.
|
Falconsai/intent_classification/config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "Falconsai/intent_classification",
|
| 3 |
+
"activation": "gelu",
|
| 4 |
+
"architectures": [
|
| 5 |
+
"DistilBertForSequenceClassification"
|
| 6 |
+
],
|
| 7 |
+
"attention_dropout": 0.1,
|
| 8 |
+
"dim": 768,
|
| 9 |
+
"dropout": 0.1,
|
| 10 |
+
"hidden_dim": 3072,
|
| 11 |
+
"id2label": {
|
| 12 |
+
"0": "cancellation",
|
| 13 |
+
"1": "ordering",
|
| 14 |
+
"2": "shipping",
|
| 15 |
+
"3": "invoicing",
|
| 16 |
+
"4": "billing and payment",
|
| 17 |
+
"5": "returns and refunds",
|
| 18 |
+
"6": "complaints and feedback",
|
| 19 |
+
"7": "speak to person",
|
| 20 |
+
"8": "edit account",
|
| 21 |
+
"9": "delete account",
|
| 22 |
+
"10": "delivery information",
|
| 23 |
+
"11": "subscription",
|
| 24 |
+
"12": "recover password",
|
| 25 |
+
"13": "registration problems",
|
| 26 |
+
"14": "appointment"
|
| 27 |
+
},
|
| 28 |
+
"initializer_range": 0.02,
|
| 29 |
+
"label2id": null,
|
| 30 |
+
"max_position_embeddings": 512,
|
| 31 |
+
"model_type": "distilbert",
|
| 32 |
+
"n_heads": 12,
|
| 33 |
+
"n_layers": 6,
|
| 34 |
+
"pad_token_id": 0,
|
| 35 |
+
"problem_type": "single_label_classification",
|
| 36 |
+
"qa_dropout": 0.1,
|
| 37 |
+
"seq_classif_dropout": 0.2,
|
| 38 |
+
"sinusoidal_pos_embds": false,
|
| 39 |
+
"tie_weights_": true,
|
| 40 |
+
"torch_dtype": "float32",
|
| 41 |
+
"transformers_version": "4.31.0",
|
| 42 |
+
"vocab_size": 30522
|
| 43 |
+
}
|
Falconsai/intent_classification/coreml/text-classification/float32_model.mlpackage/Data/com.apple.CoreML/model.mlmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:16df93af33748a9127940878f4c720bc157da4f89a6a1756904bcfcb835d362c
|
| 3 |
+
size 66210
|
Falconsai/intent_classification/coreml/text-classification/float32_model.mlpackage/Data/com.apple.CoreML/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6dca982d7f198072ac884ebe092a40fcd65547c434bb5995dc074909524fb80
|
| 3 |
+
size 266687104
|
Falconsai/intent_classification/coreml/text-classification/float32_model.mlpackage/Manifest.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"fileFormatVersion": "1.0.0",
|
| 3 |
+
"itemInfoEntries": {
|
| 4 |
+
"AFA8F27A-567D-483E-9161-8CCF1082DB9C": {
|
| 5 |
+
"author": "com.apple.CoreML",
|
| 6 |
+
"description": "CoreML Model Specification",
|
| 7 |
+
"name": "model.mlmodel",
|
| 8 |
+
"path": "com.apple.CoreML/model.mlmodel"
|
| 9 |
+
},
|
| 10 |
+
"E8C20928-9550-43A8-8294-A3CB7A28DCB1": {
|
| 11 |
+
"author": "com.apple.CoreML",
|
| 12 |
+
"description": "CoreML Model Weights",
|
| 13 |
+
"name": "weights",
|
| 14 |
+
"path": "com.apple.CoreML/weights"
|
| 15 |
+
}
|
| 16 |
+
},
|
| 17 |
+
"rootModelIdentifier": "AFA8F27A-567D-483E-9161-8CCF1082DB9C"
|
| 18 |
+
}
|
Falconsai/intent_classification/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:292e786305afd79f04f799a7c6f7756b29f261ff6e944b23c3a540baa24741ba
|
| 3 |
+
size 267872556
|
Falconsai/intent_classification/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9827b446f7bbcaca6b56c2a8accecc97264953e9d5d9adaf7c29d6a6dad61f3e
|
| 3 |
+
size 267894125
|
Falconsai/intent_classification/special_tokens_map.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"mask_token": "[MASK]",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"sep_token": "[SEP]",
|
| 6 |
+
"unk_token": "[UNK]"
|
| 7 |
+
}
|
Falconsai/intent_classification/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Falconsai/intent_classification/tokenizer_config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"clean_up_tokenization_spaces": true,
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"do_lower_case": true,
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"model_max_length": 512,
|
| 7 |
+
"pad_token": "[PAD]",
|
| 8 |
+
"sep_token": "[SEP]",
|
| 9 |
+
"strip_accents": null,
|
| 10 |
+
"tokenize_chinese_chars": true,
|
| 11 |
+
"tokenizer_class": "DistilBertTokenizer",
|
| 12 |
+
"unk_token": "[UNK]"
|
| 13 |
+
}
|
Falconsai/intent_classification/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Falconsai/offensive_speech_detection/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
Falconsai/offensive_speech_detection/README.md
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
pipeline_tag: text-classification
|
| 4 |
+
language:
|
| 5 |
+
- en
|
| 6 |
+
widget:
|
| 7 |
+
- text: I think this is great.
|
| 8 |
+
example_title: Example 1
|
| 9 |
+
- text: You are awful
|
| 10 |
+
example_title: Example 2
|
| 11 |
+
---
|
| 12 |
+
# Model Card: Fine-Tuned DistilBERT for Offensive/Hate Speech Detection
|
| 13 |
+
|
| 14 |
+
## Model Description
|
| 15 |
+
|
| 16 |
+
The **Fine-Tuned DistilBERT** is a variant of the BERT transformer model,
|
| 17 |
+
distilled for efficient performance while maintaining high accuracy.
|
| 18 |
+
It has been adapted and fine-tuned for the specific task of offensive/hate speech detection in text data.
|
| 19 |
+
|
| 20 |
+
The model, named "distilbert-base-uncased," is pre-trained on a substantial amount of text data,
|
| 21 |
+
which allows it to capture semantic nuances and contextual information present in natural language text.
|
| 22 |
+
It has been fine-tuned with meticulous attention to hyperparameter settings, including batch size and learning rate, to ensure optimal model performance for the offensive/hate speech detection task.
|
| 23 |
+
|
| 24 |
+
During the fine-tuning process, a batch size of 16 for efficient computation and learning was chosen.
|
| 25 |
+
Additionally, a learning rate (2e-5) was selected to strike a balance between rapid convergence and steady optimization,
|
| 26 |
+
ensuring the model not only learns quickly but also steadily refines its capabilities throughout training.
|
| 27 |
+
|
| 28 |
+
This model has been trained on a proprietary dataset < 100k, specifically designed for offensive/hate speech detection.
|
| 29 |
+
The dataset consists of text samples, each labeled as "non-offensive" or "offensive."
|
| 30 |
+
The diversity within the dataset allowed the model to learn to identify offensive content accurately. THis dataset was combined from reseearch datasets on this topivc as well as news headliens. All duplicates were removed and meticulous effort was taken to ensure the dataset quality.
|
| 31 |
+
|
| 32 |
+
The goal of this meticulous training process is to equip the model with the ability to detect offensive and hate speech in text data effectively. The result is a model ready to contribute significantly to content moderation and safety, while maintaining high standards of accuracy and reliability.
|
| 33 |
+
|
| 34 |
+
## Intended Uses & Limitations
|
| 35 |
+
|
| 36 |
+
### Intended Uses
|
| 37 |
+
- **Offensive/Hate Speech Detection**: The primary intended use of this model is to detect offensive or hate speech in text data. It is well-suited for filtering and identifying inappropriate content in various applications.
|
| 38 |
+
|
| 39 |
+
- **Of Special Note**: The data suggests the word "like" is most often used as a comparative statement in the derogatory.
|
| 40 |
+
- These have numerous instances within the "Offensive Speech Dataset". "You look like X" or "He smells like X" are quite common.
|
| 41 |
+
- Also of note, the ABSENCE/LACK OF of punctuation lends itself heavily to the "Offensive" dataset.
|
| 42 |
+
- Accordingly the model will identify these as well, based on their prominence in the training data.
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
### How to Use
|
| 46 |
+
To use this model for offensive/hate speech detection, you can follow these steps:
|
| 47 |
+
```markdown
|
| 48 |
+
from transformers import pipeline
|
| 49 |
+
|
| 50 |
+
classifier = pipeline("text-classification", model="Falconsai/offensive_speech_detection")
|
| 51 |
+
text = "Your text to classify here."
|
| 52 |
+
result = classifier(text)
|
| 53 |
+
|
| 54 |
+
```
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
### Limitations
|
| 58 |
+
- **Specialized Task Fine-Tuning**: While the model is adept at offensive/hate speech detection, its performance may vary when applied to other natural language processing tasks.
|
| 59 |
+
- Users interested in employing this model for different tasks should explore fine-tuned versions available in the model hub for optimal results.
|
| 60 |
+
|
| 61 |
+
## Training Data
|
| 62 |
+
|
| 63 |
+
The model's training data includes a proprietary dataset designed for offensive/hate speech detection. This dataset comprises a diverse collection of text samples, categorized into "non-offensive" and "offensive" classes. The training process aimed to equip the model with the ability to distinguish between offensive and non-offensive content effectively.
|
| 64 |
+
|
| 65 |
+
### Training Stats
|
| 66 |
+
- Evaluation Loss: 0.018403256312012672
|
| 67 |
+
- Evaluation Accuracy: 0.9973234886940471
|
| 68 |
+
- Evaluation Runtime: 85.0789
|
| 69 |
+
- Evaluation Samples per Second: 127.352
|
| 70 |
+
- Evaluation Steps per Second: 7.969
|
| 71 |
+
|
| 72 |
+
**Note:** Specific evaluation statistics should be provided based on the model's performance.
|
| 73 |
+
|
| 74 |
+
## Responsible Usage
|
| 75 |
+
|
| 76 |
+
It is essential to use this model responsibly and ethically, adhering to content guidelines and applicable regulations when implementing it in real-world applications, particularly those involving potentially sensitive content.
|
| 77 |
+
|
| 78 |
+
## References
|
| 79 |
+
|
| 80 |
+
- [Hugging Face Model Hub](https://huggingface.co/models)
|
| 81 |
+
- [DistilBERT Paper](https://arxiv.org/abs/1910.01108)
|
| 82 |
+
|
| 83 |
+
**Disclaimer:** The model's performance may be influenced by the quality and representativeness of the data it was fine-tuned on. Users are encouraged to assess the model's suitability for their specific applications and datasets.
|
Falconsai/offensive_speech_detection/config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "Falconsai/offensive_speech_detection",
|
| 3 |
+
"activation": "gelu",
|
| 4 |
+
"architectures": [
|
| 5 |
+
"DistilBertForSequenceClassification"
|
| 6 |
+
],
|
| 7 |
+
"attention_dropout": 0.1,
|
| 8 |
+
"dim": 768,
|
| 9 |
+
"dropout": 0.1,
|
| 10 |
+
"hidden_dim": 3072,
|
| 11 |
+
"id2label": {
|
| 12 |
+
"0": "OFFENSIVE",
|
| 13 |
+
"1": "SAFE"
|
| 14 |
+
},
|
| 15 |
+
"initializer_range": 0.02,
|
| 16 |
+
"label2id": {
|
| 17 |
+
"OFFENSIVE": 0,
|
| 18 |
+
"SAFE": 1
|
| 19 |
+
},
|
| 20 |
+
"max_position_embeddings": 512,
|
| 21 |
+
"model_type": "distilbert",
|
| 22 |
+
"n_heads": 12,
|
| 23 |
+
"n_layers": 6,
|
| 24 |
+
"pad_token_id": 0,
|
| 25 |
+
"problem_type": "single_label_classification",
|
| 26 |
+
"qa_dropout": 0.1,
|
| 27 |
+
"seq_classif_dropout": 0.2,
|
| 28 |
+
"sinusoidal_pos_embds": false,
|
| 29 |
+
"tie_weights_": true,
|
| 30 |
+
"torch_dtype": "float32",
|
| 31 |
+
"transformers_version": "4.31.0",
|
| 32 |
+
"vocab_size": 30522
|
| 33 |
+
}
|
Falconsai/offensive_speech_detection/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70666d9324c110df5cf43cd9f7b11b9a00077bfe5847bfe6302a0866c74c9611
|
| 3 |
+
size 267832560
|
Falconsai/offensive_speech_detection/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b899111ea3377e5da91f3d3a7b9a5469fe38a5d32f5d5de7ba78be8d9d05dd06
|
| 3 |
+
size 535701061
|