niobures commited on
Commit
affcd57
·
verified ·
1 Parent(s): ec246f0

BespinGlobal, FalconsAI, NPleshkanov, Wanyu, Yanjie

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +45 -35
  2. BespinGlobal/klue-roberta-small-3i4k-intent-classification/.gitattributes +28 -0
  3. BespinGlobal/klue-roberta-small-3i4k-intent-classification/README.md +82 -0
  4. BespinGlobal/klue-roberta-small-3i4k-intent-classification/config.json +46 -0
  5. BespinGlobal/klue-roberta-small-3i4k-intent-classification/model.safetensors +3 -0
  6. BespinGlobal/klue-roberta-small-3i4k-intent-classification/pytorch_model.bin +3 -0
  7. BespinGlobal/klue-roberta-small-3i4k-intent-classification/special_tokens_map.json +1 -0
  8. BespinGlobal/klue-roberta-small-3i4k-intent-classification/tf_model.h5 +3 -0
  9. BespinGlobal/klue-roberta-small-3i4k-intent-classification/tokenizer.json +0 -0
  10. BespinGlobal/klue-roberta-small-3i4k-intent-classification/tokenizer_config.json +1 -0
  11. BespinGlobal/klue-roberta-small-3i4k-intent-classification/vocab.txt +0 -0
  12. Falconsai/DATASETS/customer_intents/.gitattributes +55 -0
  13. Falconsai/DATASETS/customer_intents/dataset_dict.json +23 -0
  14. Falconsai/DATASETS/customer_intents/train/data-00000-of-00001.arrow +3 -0
  15. Falconsai/DATASETS/customer_intents/train/dataset_info.json +16 -0
  16. Falconsai/DATASETS/customer_intents/train/state.json +13 -0
  17. Falconsai/arc_of_conversation/.gitattributes +35 -0
  18. Falconsai/arc_of_conversation/README.md +196 -0
  19. Falconsai/arc_of_conversation/config.json +61 -0
  20. Falconsai/arc_of_conversation/generation_config.json +6 -0
  21. Falconsai/arc_of_conversation/model.safetensors +3 -0
  22. Falconsai/arc_of_conversation/special_tokens_map.json +125 -0
  23. Falconsai/arc_of_conversation/spiece.model +3 -0
  24. Falconsai/arc_of_conversation/tokenizer.json +0 -0
  25. Falconsai/arc_of_conversation/tokenizer_config.json +942 -0
  26. Falconsai/fear_mongering_detection/.gitattributes +35 -0
  27. Falconsai/fear_mongering_detection/README.md +104 -0
  28. Falconsai/fear_mongering_detection/config.json +29 -0
  29. Falconsai/fear_mongering_detection/model.safetensors +3 -0
  30. Falconsai/fear_mongering_detection/special_tokens_map.json +7 -0
  31. Falconsai/fear_mongering_detection/tokenizer.json +0 -0
  32. Falconsai/fear_mongering_detection/tokenizer_config.json +55 -0
  33. Falconsai/fear_mongering_detection/vocab.txt +0 -0
  34. Falconsai/intent_classification/.gitattributes +35 -0
  35. Falconsai/intent_classification/README.md +70 -0
  36. Falconsai/intent_classification/config.json +43 -0
  37. Falconsai/intent_classification/coreml/text-classification/float32_model.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  38. Falconsai/intent_classification/coreml/text-classification/float32_model.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  39. Falconsai/intent_classification/coreml/text-classification/float32_model.mlpackage/Manifest.json +18 -0
  40. Falconsai/intent_classification/model.safetensors +3 -0
  41. Falconsai/intent_classification/pytorch_model.bin +3 -0
  42. Falconsai/intent_classification/special_tokens_map.json +7 -0
  43. Falconsai/intent_classification/tokenizer.json +0 -0
  44. Falconsai/intent_classification/tokenizer_config.json +13 -0
  45. Falconsai/intent_classification/vocab.txt +0 -0
  46. Falconsai/offensive_speech_detection/.gitattributes +35 -0
  47. Falconsai/offensive_speech_detection/README.md +83 -0
  48. Falconsai/offensive_speech_detection/config.json +33 -0
  49. Falconsai/offensive_speech_detection/model.safetensors +3 -0
  50. Falconsai/offensive_speech_detection/optimizer.pt +3 -0
.gitattributes CHANGED
@@ -1,35 +1,45 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ NPleshkanov/adapter_labse_intent_classifier/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ NPleshkanov/ru-labse-toxic/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ Wanyu/DATASETS/IteraTeR_full_doc/train.json filter=lfs diff=lfs merge=lfs -text
39
+ Wanyu/DATASETS/IteraTeR_full_sent/dev.json filter=lfs diff=lfs merge=lfs -text
40
+ Wanyu/DATASETS/IteraTeR_full_sent/test.json filter=lfs diff=lfs merge=lfs -text
41
+ Wanyu/DATASETS/IteraTeR_full_sent/train.json filter=lfs diff=lfs merge=lfs -text
42
+ Wanyu/DATASETS/IteraTeR_v2/r3_demo_snapshot.jpg filter=lfs diff=lfs merge=lfs -text
43
+ Wanyu/DATASETS/IteraTeR_v2/sent-level.dev.intents.json filter=lfs diff=lfs merge=lfs -text
44
+ Wanyu/DATASETS/IteraTeR_v2/sent-level.test.intents.json filter=lfs diff=lfs merge=lfs -text
45
+ Wanyu/DATASETS/IteraTeR_v2/sent-level.train.intents.json filter=lfs diff=lfs merge=lfs -text
BespinGlobal/klue-roberta-small-3i4k-intent-classification/.gitattributes ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ model.safetensors filter=lfs diff=lfs merge=lfs -text
BespinGlobal/klue-roberta-small-3i4k-intent-classification/README.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: ko
3
+ tags:
4
+ - intent-classification
5
+ datasets:
6
+ - kor_3i4k
7
+ license: cc-by-nc-4.0
8
+ ---
9
+
10
+ ## Finetuning
11
+ - Pretrain Model : [klue/roberta-small](https://github.com/KLUE-benchmark/KLUE)
12
+ - Dataset for fine-tuning : [3i4k](https://github.com/warnikchow/3i4k)
13
+ - Train : 46,863
14
+ - Validation : 8,271 (15% of Train)
15
+ - Test : 6,121
16
+ - Label info
17
+ - 0: "fragment",
18
+ - 1: "statement",
19
+ - 2: "question",
20
+ - 3: "command",
21
+ - 4: "rhetorical question",
22
+ - 5: "rhetorical command",
23
+ - 6: "intonation-dependent utterance"
24
+ - Parameters of Training
25
+ ```
26
+ {
27
+ "epochs": 3 (setting 10 but early stopped),
28
+ "batch_size":32,
29
+ "optimizer_class": "<keras.optimizer_v2.adam.Adam'>",
30
+ "optimizer_params": {
31
+ "lr": 5e-05
32
+ },
33
+ "min_delta": 0.01
34
+ }
35
+ ```
36
+
37
+ ## Usage
38
+ ``` python
39
+ from transformers import RobertaTokenizerFast, RobertaForSequenceClassification, TextClassificationPipeline
40
+
41
+ # Load fine-tuned model by HuggingFace Model Hub
42
+ HUGGINGFACE_MODEL_PATH = "bespin-global/klue-roberta-small-3i4k-intent-classification"
43
+ loaded_tokenizer = RobertaTokenizerFast.from_pretrained(HUGGINGFACE_MODEL_PATH )
44
+ loaded_model = RobertaForSequenceClassification.from_pretrained(HUGGINGFACE_MODEL_PATH )
45
+
46
+ # using Pipeline
47
+ text_classifier = TextClassificationPipeline(
48
+ tokenizer=loaded_tokenizer,
49
+ model=loaded_model,
50
+ return_all_scores=True
51
+ )
52
+
53
+ # predict
54
+ text = "your text"
55
+
56
+ preds_list = text_classifier(text)
57
+ best_pred = preds_list[0]
58
+ print(f"Label of Best Intentatioin: {best_pred['label']}")
59
+ print(f"Score of Best Intentatioin: {best_pred['score']}")
60
+ ```
61
+
62
+ ## Evaluation
63
+ ```
64
+ precision recall f1-score support
65
+
66
+ command 0.89 0.92 0.90 1296
67
+ fragment 0.98 0.96 0.97 600
68
+ intonation-depedent utterance 0.71 0.69 0.70 327
69
+ question 0.95 0.97 0.96 1786
70
+ rhetorical command 0.87 0.64 0.74 108
71
+ rhetorical question 0.61 0.63 0.62 174
72
+ statement 0.91 0.89 0.90 1830
73
+
74
+ accuracy 0.90 6121
75
+ macro avg 0.85 0.81 0.83 6121
76
+ weighted avg 0.90 0.90 0.90 6121
77
+ ```
78
+
79
+
80
+ ## Citing & Authors
81
+ <!--- Describe where people can find more information -->
82
+ [Jaehyeong](https://huggingface.co/jaehyeong) at [Bespin Global](https://www.bespinglobal.com/)
BespinGlobal/klue-roberta-small-3i4k-intent-classification/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "klue/roberta-small",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "fragment",
16
+ "1": "statement",
17
+ "2": "question",
18
+ "3": "command",
19
+ "4": "rhetorical question",
20
+ "5": "rhetorical command",
21
+ "6": "intonation-dependent utterance"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 3072,
25
+ "label2id": {
26
+ "command": 3,
27
+ "fragment": 0,
28
+ "intonation-depedent utterance": 6,
29
+ "question": 2,
30
+ "rhetorical command": 5,
31
+ "rhetorical question": 4,
32
+ "statement": 1
33
+ },
34
+ "layer_norm_eps": 1e-05,
35
+ "max_position_embeddings": 514,
36
+ "model_type": "roberta",
37
+ "num_attention_heads": 12,
38
+ "num_hidden_layers": 6,
39
+ "pad_token_id": 1,
40
+ "position_embedding_type": "absolute",
41
+ "tokenizer_class": "BertTokenizer",
42
+ "transformers_version": "4.11.3",
43
+ "type_vocab_size": 1,
44
+ "use_cache": true,
45
+ "vocab_size": 32000
46
+ }
BespinGlobal/klue-roberta-small-3i4k-intent-classification/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1e42cbafd51dbfed362c9748fc2aaecd581afd93dc26f54b34c9320baa5eccf
3
+ size 272401784
BespinGlobal/klue-roberta-small-3i4k-intent-classification/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d20aa33599de15a3a0003242d2eacadcb477b2e6d119a3ea11e0bf412a3234c
3
+ size 272431767
BespinGlobal/klue-roberta-small-3i4k-intent-classification/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
BespinGlobal/klue-roberta-small-3i4k-intent-classification/tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e56a64376750165a98d5f370632e7c329bb5169e7604b810c5fbf729ad764a0
3
+ size 272534680
BespinGlobal/klue-roberta-small-3i4k-intent-classification/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
BespinGlobal/klue-roberta-small-3i4k-intent-classification/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "[CLS]", "eos_token": "[SEP]", "add_prefix_space": false, "errors": "replace", "sep_token": "[SEP]", "cls_token": "[CLS]", "pad_token": "[PAD]", "mask_token": "[MASK]", "do_lower_case": false, "do_basic_tokenize": true, "never_split": null, "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": "/home/jupyter/.cache/huggingface/transformers/9ce71a5afff600bb47488785ec31125c4a485302e21d660291b10925f8bfcb67.70c17d6e4d492c8f24f5bb97ab56c7f272e947112c6faf9dd846da42ba13eb23", "name_or_path": "klue/roberta-small", "tokenizer_class": "RobertaTokenizer"}
BespinGlobal/klue-roberta-small-3i4k-intent-classification/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
Falconsai/DATASETS/customer_intents/.gitattributes ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.lz4 filter=lfs diff=lfs merge=lfs -text
12
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
13
+ *.model filter=lfs diff=lfs merge=lfs -text
14
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
15
+ *.npy filter=lfs diff=lfs merge=lfs -text
16
+ *.npz filter=lfs diff=lfs merge=lfs -text
17
+ *.onnx filter=lfs diff=lfs merge=lfs -text
18
+ *.ot filter=lfs diff=lfs merge=lfs -text
19
+ *.parquet filter=lfs diff=lfs merge=lfs -text
20
+ *.pb filter=lfs diff=lfs merge=lfs -text
21
+ *.pickle filter=lfs diff=lfs merge=lfs -text
22
+ *.pkl filter=lfs diff=lfs merge=lfs -text
23
+ *.pt filter=lfs diff=lfs merge=lfs -text
24
+ *.pth filter=lfs diff=lfs merge=lfs -text
25
+ *.rar filter=lfs diff=lfs merge=lfs -text
26
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
27
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
29
+ *.tar filter=lfs diff=lfs merge=lfs -text
30
+ *.tflite filter=lfs diff=lfs merge=lfs -text
31
+ *.tgz filter=lfs diff=lfs merge=lfs -text
32
+ *.wasm filter=lfs diff=lfs merge=lfs -text
33
+ *.xz filter=lfs diff=lfs merge=lfs -text
34
+ *.zip filter=lfs diff=lfs merge=lfs -text
35
+ *.zst filter=lfs diff=lfs merge=lfs -text
36
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
37
+ # Audio files - uncompressed
38
+ *.pcm filter=lfs diff=lfs merge=lfs -text
39
+ *.sam filter=lfs diff=lfs merge=lfs -text
40
+ *.raw filter=lfs diff=lfs merge=lfs -text
41
+ # Audio files - compressed
42
+ *.aac filter=lfs diff=lfs merge=lfs -text
43
+ *.flac filter=lfs diff=lfs merge=lfs -text
44
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
45
+ *.ogg filter=lfs diff=lfs merge=lfs -text
46
+ *.wav filter=lfs diff=lfs merge=lfs -text
47
+ # Image files - uncompressed
48
+ *.bmp filter=lfs diff=lfs merge=lfs -text
49
+ *.gif filter=lfs diff=lfs merge=lfs -text
50
+ *.png filter=lfs diff=lfs merge=lfs -text
51
+ *.tiff filter=lfs diff=lfs merge=lfs -text
52
+ # Image files - compressed
53
+ *.jpg filter=lfs diff=lfs merge=lfs -text
54
+ *.jpeg filter=lfs diff=lfs merge=lfs -text
55
+ *.webp filter=lfs diff=lfs merge=lfs -text
Falconsai/DATASETS/customer_intents/dataset_dict.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"default": {
2
+ "description": "Dataset of labeled customer comments or questions",
3
+ "citation": "",
4
+ "homepage": "https://huggingface.co/datasets/Falconsai/customer_intents",
5
+ "license": "",
6
+ "features": {
7
+ "label": {
8
+ "dtype": "string",
9
+ "_type": "Value"
10
+ },
11
+ "text": {
12
+ "dtype": "string",
13
+ "_type": "Value"
14
+ }
15
+ },
16
+ "splits": {
17
+ "train": {
18
+ "name": "train",
19
+ "num_examples": 27856,
20
+ "dataset_name": null
21
+ }
22
+ }
23
+ }}
Falconsai/DATASETS/customer_intents/train/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8bbefd2f15df40746ab0a133b805e455b3376431c90b1956f932d0b431fab1c
3
+ size 1950648
Falconsai/DATASETS/customer_intents/train/dataset_info.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "label": {
6
+ "dtype": "string",
7
+ "_type": "Value"
8
+ },
9
+ "text": {
10
+ "dtype": "string",
11
+ "_type": "Value"
12
+ }
13
+ },
14
+ "homepage": "",
15
+ "license": ""
16
+ }
Falconsai/DATASETS/customer_intents/train/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "d73dee637232af5e",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
Falconsai/arc_of_conversation/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Falconsai/arc_of_conversation/README.md ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - NLP
7
+ pipeline_tag: summarization
8
+ widget:
9
+ - text: ' Moderator: Welcome, everyone, to this exciting panel discussion. Today,
10
+ we have Elon Musk and Sam Altman, two of the most influential figures in the tech
11
+ industry. We’re here to discuss the future of artificial intelligence and its
12
+ impact on society. Elon, Sam, thank you for joining us. Elon Musk: Happy to be
13
+ here. Sam Altman: Looking forward to the discussion. Moderator: Let’s dive right
14
+ in. Elon, you’ve been very vocal about your concerns regarding AI. Could you elaborate
15
+ on why you believe AI poses such a significant risk to humanity? Elon Musk: Certainly.
16
+ AI has the potential to become more intelligent than humans, which could be extremely
17
+ dangerous if it goes unchecked. The existential threat is real. If we don’t implement
18
+ strict regulations and oversight, we risk creating something that could outsmart
19
+ us and act against our interests. It’s a ticking time bomb. Sam Altman: I respect
20
+ Elon’s concerns, but I think he’s overestimating the threat. The focus should
21
+ be on leveraging AI to solve some of humanity’s biggest problems. With proper
22
+ ethical frameworks and robust safety measures, we can ensure AI benefits everyone.
23
+ The fear-mongering is unproductive and could hinder technological progress. Elon
24
+ Musk: It’s not fear-mongering, Sam. It’s being cautious. We need to ensure that
25
+ we have control mechanisms in place. Without these, we’re playing with fire. You
26
+ can’t possibly believe that AI will always remain benevolent or under our control.
27
+ Sam Altman: Control mechanisms are essential, I agree, but what you’re suggesting
28
+ sounds like stifling innovation out of fear. We need a balanced approach. Overregulation
29
+ could slow down advancements that could otherwise save lives and improve quality
30
+ of life globally. We must foster innovation while ensuring safety, not let fear
31
+ dictate our actions. Elon Musk: Balancing innovation and safety is easier said
32
+ than done. When you’re dealing with something as unpredictable and powerful as
33
+ AI, the risks far outweigh the potential benefits if we don’t tread carefully.
34
+ History has shown us the dangers of underestimating new technologies. Sam Altman:
35
+ And history has also shown us the incredible benefits of technological advancement.
36
+ If we had been overly cautious, we might not have the medical, communication,
37
+ or energy technologies we have today. It’s about finding that middle ground where
38
+ innovation thrives safely. We can’t just halt progress because of hypothetical
39
+ risks. Elon Musk: It’s not hypothetical, Sam. Look at how quickly AI capabilities
40
+ are advancing. We’re already seeing issues with bias, decision-making, and unintended
41
+ consequences. Imagine this on a larger scale. We can’t afford to be complacent.
42
+ Sam Altman: Bias and unintended consequences are exactly why we need to invest
43
+ in research and development to address these issues head-on. By building AI responsibly
44
+ and learning from each iteration, we can mitigate these risks. Shutting down or
45
+ heavily regulating AI development out of fear isn’t the solution. Moderator: Both
46
+ of you make compelling points. Let’s fast forward a bit. Say, ten years from now,
47
+ we have stringent regulations in place, as Elon suggests, or a more flexible framework,
48
+ as Sam proposes. What does the world look like? Elon Musk: With stringent regulations,
49
+ we would have a more controlled and safer AI development environment. This would
50
+ prevent any catastrophic events and ensure that AI works for us, not against us.
51
+ We’d be able to avoid many potential disasters that an unchecked AI might cause.
52
+ Sam Altman: On the other hand, with a more flexible framework, we’d see rapid
53
+ advancements in AI applications across various sectors, from healthcare to education,
54
+ bringing significant improvements to quality of life and solving problems that
55
+ seem insurmountable today. The world would be a much better place with these innovations.
56
+ Moderator: And what if both of you are wrong? Elon Musk: Wrong? Sam Altman: How
57
+ so? Moderator: Suppose the future shows that neither stringent regulations nor
58
+ a flexible framework were the key factors. Instead, what if the major breakthroughs
59
+ and safety measures came from unexpected areas like quantum computing advancements
60
+ or new forms of human-computer symbiosis, rendering this entire debate moot? Elon
61
+ Musk: Well, that’s a possibility. If breakthroughs in quantum computing or other
62
+ technologies overshadow our current AI concerns, it could change the entire landscape.
63
+ It’s difficult to predict all variables. Sam Altman: Agreed. Technology often
64
+ takes unexpected turns. If future advancements make our current debate irrelevant,
65
+ it just goes to show how unpredictable and fast-moving the tech world is. The
66
+ key takeaway would be the importance of adaptability and continuous learning.
67
+ Moderator: Fascinating. It appears that the only certainty in the tech world is
68
+ uncertainty itself. Thank you both for this engaging discussion.'
69
+ example_title: Sample 1
70
+ ---
71
+ # Arc of the Conversation Model
72
+ ## Model Details
73
+
74
+ - **Model Name:** arc_of_conversation
75
+ - **Model Type:** Fine-tuned `google/t5-small`
76
+ - **Language:** English
77
+ - **License:** MIT
78
+
79
+ ## Overview
80
+
81
+ The Conversation Arc Predictor model is designed to predict the arc of a conversation given its text. It is based on the `google/t5-small` model, fine-tuned on a custom dataset of conversations and their corresponding arcs. This model can be used to analyze and categorize conversation texts into predefined arcs.
82
+
83
+ ## Model Description
84
+
85
+ ### Model Architecture
86
+
87
+ The base model architecture is T5 (Text-To-Text Transfer Transformer), which treats every NLP problem as a text-to-text problem. The specific version used here is `google/t5-small`, which has been fine-tuned to understand and predict conversation arcs.
88
+
89
+ ### Fine-Tuning Data
90
+
91
+ The model was fine-tuned on a dataset consisting of conversation texts and their corresponding arcs. The dataset should be formatted in a CSV file with two columns: `conversation` and `arc`.
92
+
93
+ ### Intended Use
94
+
95
+ The model is intended for categorizing the arc of conversation texts. It can be useful for applications in customer service, chatbots, conversational analysis, and other areas where understanding the flow of a conversation is important.
96
+
97
+ ## How to Use
98
+
99
+ ### Inference
100
+
101
+ To use this model for inference, you need to load the fine-tuned model and tokenizer. Here is an example of how to do this using the `transformers` library:
102
+
103
+
104
+ Running Pipeline
105
+ ```python
106
+ # Use a pipeline as a high-level helper
107
+ from transformers import pipeline
108
+
109
+ convo1 = 'Your conversation text here.'
110
+ pipe = pipeline("summarization", model="Falconsai/arc_of_conversation")
111
+ res1 = pipe(convo1, max_length=1024, min_length=512, do_sample=False)
112
+ print(res1)
113
+
114
+ ```
115
+
116
+
117
+
118
+ Running on CPU
119
+ ```python
120
+ # Load model directly
121
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
122
+
123
+ tokenizer = AutoTokenizer.from_pretrained("Falconsai/arc_of_conversation")
124
+ model = AutoModelForSeq2SeqLM.from_pretrained("Falconsai/arc_of_conversation")
125
+
126
+ input_text = "Your conversation Here"
127
+ input_ids = tokenizer(input_text, return_tensors="pt").input_ids
128
+
129
+ outputs = model.generate(input_ids)
130
+ print(tokenizer.decode(outputs[0]))
131
+ ```
132
+
133
+ Running on GPU
134
+ ```python
135
+ # pip install accelerate
136
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
137
+
138
+ tokenizer = AutoTokenizer.from_pretrained("Falconsai/arc_of_conversation")
139
+ model = AutoModelForSeq2SeqLM.from_pretrained("Falconsai/arc_of_conversation", device_map="auto")
140
+
141
+ input_text = "Your conversation Here"
142
+ input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
143
+
144
+ outputs = model.generate(input_ids)
145
+ print(tokenizer.decode(outputs[0]))
146
+
147
+ ```
148
+
149
+
150
+
151
+
152
+
153
+ ## Training
154
+
155
+ The training process involves the following steps:
156
+
157
+ 1. **Load and Explore Data:** Load the dataset and perform initial exploration to understand the data distribution.
158
+ 2. **Preprocess Data:** Tokenize the conversations and prepare them for the T5 model.
159
+ 3. **Fine-Tune Model:** Fine-tune the `google/t5-small` model using the preprocessed data.
160
+ 4. **Evaluate Model:** Evaluate the model's performance on a validation set to ensure it's learning correctly.
161
+ 5. **Save Model:** Save the fine-tuned model for future use.
162
+
163
+ ## Evaluation
164
+
165
+ The model's performance should be evaluated on a separate validation set to ensure it accurately predicts the conversation arcs. Metrics such as accuracy, precision, recall, and F1 score can be used to assess its performance.
166
+
167
+ ## Limitations
168
+
169
+ - **Data Dependency:** The model's performance is highly dependent on the quality and representativeness of the training data.
170
+ - **Generalization:** The model may not generalize well to conversation texts that are significantly different from the training data.
171
+
172
+ ## Ethical Considerations
173
+
174
+ When deploying the model, be mindful of the ethical implications, including but not limited to:
175
+
176
+ - **Privacy:** Ensure that conversation data used for training and inference does not contain sensitive or personally identifiable information.
177
+ - **Bias:** Be aware of potential biases in the training data that could affect the model's predictions.
178
+
179
+ ## License
180
+
181
+ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
182
+
183
+ ## Citation
184
+
185
+ If you use this model in your research, please cite it as follows:
186
+
187
+ ```
188
+ @misc{conversation_arc_predictor,
189
+ author = {Michael Stattelman},
190
+ title = {Arc of the Conversation Generator},
191
+ year = {2024},
192
+ publisher = {Falcons.ai},
193
+ }
194
+ ```
195
+
196
+ ---
Falconsai/arc_of_conversation/config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Falconsai/arc_of_conversation",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
+ "d_kv": 64,
9
+ "d_model": 512,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 6,
22
+ "num_heads": 8,
23
+ "num_layers": 6,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "relative_attention_max_distance": 128,
27
+ "relative_attention_num_buckets": 32,
28
+ "task_specific_params": {
29
+ "summarization": {
30
+ "early_stopping": true,
31
+ "length_penalty": 2.0,
32
+ "max_length": 200,
33
+ "min_length": 30,
34
+ "no_repeat_ngram_size": 3,
35
+ "num_beams": 4,
36
+ "prefix": "summarize: "
37
+ },
38
+ "translation_en_to_de": {
39
+ "early_stopping": true,
40
+ "max_length": 300,
41
+ "num_beams": 4,
42
+ "prefix": "translate English to German: "
43
+ },
44
+ "translation_en_to_fr": {
45
+ "early_stopping": true,
46
+ "max_length": 300,
47
+ "num_beams": 4,
48
+ "prefix": "translate English to French: "
49
+ },
50
+ "translation_en_to_ro": {
51
+ "early_stopping": true,
52
+ "max_length": 300,
53
+ "num_beams": 4,
54
+ "prefix": "translate English to Romanian: "
55
+ }
56
+ },
57
+ "torch_dtype": "float32",
58
+ "transformers_version": "4.41.2",
59
+ "use_cache": true,
60
+ "vocab_size": 32128
61
+ }
Falconsai/arc_of_conversation/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_start_token_id": 0,
3
+ "eos_token_id": 1,
4
+ "pad_token_id": 0,
5
+ "transformers_version": "4.41.2"
6
+ }
Falconsai/arc_of_conversation/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0535b99dfbe93161243de8981dd28720918d83d9ad00182d0ed052943594a6b9
3
+ size 242041896
Falconsai/arc_of_conversation/special_tokens_map.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": {
112
+ "content": "<pad>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "unk_token": {
119
+ "content": "<unk>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ }
125
+ }
Falconsai/arc_of_conversation/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
Falconsai/arc_of_conversation/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
Falconsai/arc_of_conversation/tokenizer_config.json ADDED
@@ -0,0 +1,942 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "32000": {
28
+ "content": "<extra_id_99>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "32001": {
36
+ "content": "<extra_id_98>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "32002": {
44
+ "content": "<extra_id_97>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "32003": {
52
+ "content": "<extra_id_96>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "32004": {
60
+ "content": "<extra_id_95>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "32005": {
68
+ "content": "<extra_id_94>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "32006": {
76
+ "content": "<extra_id_93>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "32007": {
84
+ "content": "<extra_id_92>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "32008": {
92
+ "content": "<extra_id_91>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "32009": {
100
+ "content": "<extra_id_90>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "32010": {
108
+ "content": "<extra_id_89>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "32011": {
116
+ "content": "<extra_id_88>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "32012": {
124
+ "content": "<extra_id_87>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "32013": {
132
+ "content": "<extra_id_86>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "32014": {
140
+ "content": "<extra_id_85>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "32015": {
148
+ "content": "<extra_id_84>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "32016": {
156
+ "content": "<extra_id_83>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "32017": {
164
+ "content": "<extra_id_82>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "32018": {
172
+ "content": "<extra_id_81>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "32019": {
180
+ "content": "<extra_id_80>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "32020": {
188
+ "content": "<extra_id_79>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "32021": {
196
+ "content": "<extra_id_78>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "32022": {
204
+ "content": "<extra_id_77>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "32023": {
212
+ "content": "<extra_id_76>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "32024": {
220
+ "content": "<extra_id_75>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "32025": {
228
+ "content": "<extra_id_74>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "32026": {
236
+ "content": "<extra_id_73>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "32027": {
244
+ "content": "<extra_id_72>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "32028": {
252
+ "content": "<extra_id_71>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "32029": {
260
+ "content": "<extra_id_70>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "32030": {
268
+ "content": "<extra_id_69>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "32031": {
276
+ "content": "<extra_id_68>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "32032": {
284
+ "content": "<extra_id_67>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "32033": {
292
+ "content": "<extra_id_66>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "32034": {
300
+ "content": "<extra_id_65>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "32035": {
308
+ "content": "<extra_id_64>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "32036": {
316
+ "content": "<extra_id_63>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "32037": {
324
+ "content": "<extra_id_62>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "32038": {
332
+ "content": "<extra_id_61>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "32039": {
340
+ "content": "<extra_id_60>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "32040": {
348
+ "content": "<extra_id_59>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "32041": {
356
+ "content": "<extra_id_58>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "32042": {
364
+ "content": "<extra_id_57>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "32043": {
372
+ "content": "<extra_id_56>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "32044": {
380
+ "content": "<extra_id_55>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "32045": {
388
+ "content": "<extra_id_54>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "32046": {
396
+ "content": "<extra_id_53>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "32047": {
404
+ "content": "<extra_id_52>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "32048": {
412
+ "content": "<extra_id_51>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "32049": {
420
+ "content": "<extra_id_50>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "32050": {
428
+ "content": "<extra_id_49>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "32051": {
436
+ "content": "<extra_id_48>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "32052": {
444
+ "content": "<extra_id_47>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "32053": {
452
+ "content": "<extra_id_46>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "32054": {
460
+ "content": "<extra_id_45>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "32055": {
468
+ "content": "<extra_id_44>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "32056": {
476
+ "content": "<extra_id_43>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "32057": {
484
+ "content": "<extra_id_42>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "32058": {
492
+ "content": "<extra_id_41>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "32059": {
500
+ "content": "<extra_id_40>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "32060": {
508
+ "content": "<extra_id_39>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "32061": {
516
+ "content": "<extra_id_38>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "32062": {
524
+ "content": "<extra_id_37>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "32063": {
532
+ "content": "<extra_id_36>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "32064": {
540
+ "content": "<extra_id_35>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "32065": {
548
+ "content": "<extra_id_34>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "32066": {
556
+ "content": "<extra_id_33>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "32067": {
564
+ "content": "<extra_id_32>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "32068": {
572
+ "content": "<extra_id_31>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "32069": {
580
+ "content": "<extra_id_30>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "32070": {
588
+ "content": "<extra_id_29>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "32071": {
596
+ "content": "<extra_id_28>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "32072": {
604
+ "content": "<extra_id_27>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "32073": {
612
+ "content": "<extra_id_26>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "32074": {
620
+ "content": "<extra_id_25>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "32075": {
628
+ "content": "<extra_id_24>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "32076": {
636
+ "content": "<extra_id_23>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "32077": {
644
+ "content": "<extra_id_22>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "32078": {
652
+ "content": "<extra_id_21>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "32079": {
660
+ "content": "<extra_id_20>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "32080": {
668
+ "content": "<extra_id_19>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "32081": {
676
+ "content": "<extra_id_18>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "32082": {
684
+ "content": "<extra_id_17>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "32083": {
692
+ "content": "<extra_id_16>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "32084": {
700
+ "content": "<extra_id_15>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "32085": {
708
+ "content": "<extra_id_14>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "32086": {
716
+ "content": "<extra_id_13>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "32087": {
724
+ "content": "<extra_id_12>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "32088": {
732
+ "content": "<extra_id_11>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "32089": {
740
+ "content": "<extra_id_10>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "32090": {
748
+ "content": "<extra_id_9>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "32091": {
756
+ "content": "<extra_id_8>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "32092": {
764
+ "content": "<extra_id_7>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "32093": {
772
+ "content": "<extra_id_6>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "32094": {
780
+ "content": "<extra_id_5>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "32095": {
788
+ "content": "<extra_id_4>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "32096": {
796
+ "content": "<extra_id_3>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "32097": {
804
+ "content": "<extra_id_2>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "32098": {
812
+ "content": "<extra_id_1>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "32099": {
820
+ "content": "<extra_id_0>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ }
827
+ },
828
+ "additional_special_tokens": [
829
+ "<extra_id_0>",
830
+ "<extra_id_1>",
831
+ "<extra_id_2>",
832
+ "<extra_id_3>",
833
+ "<extra_id_4>",
834
+ "<extra_id_5>",
835
+ "<extra_id_6>",
836
+ "<extra_id_7>",
837
+ "<extra_id_8>",
838
+ "<extra_id_9>",
839
+ "<extra_id_10>",
840
+ "<extra_id_11>",
841
+ "<extra_id_12>",
842
+ "<extra_id_13>",
843
+ "<extra_id_14>",
844
+ "<extra_id_15>",
845
+ "<extra_id_16>",
846
+ "<extra_id_17>",
847
+ "<extra_id_18>",
848
+ "<extra_id_19>",
849
+ "<extra_id_20>",
850
+ "<extra_id_21>",
851
+ "<extra_id_22>",
852
+ "<extra_id_23>",
853
+ "<extra_id_24>",
854
+ "<extra_id_25>",
855
+ "<extra_id_26>",
856
+ "<extra_id_27>",
857
+ "<extra_id_28>",
858
+ "<extra_id_29>",
859
+ "<extra_id_30>",
860
+ "<extra_id_31>",
861
+ "<extra_id_32>",
862
+ "<extra_id_33>",
863
+ "<extra_id_34>",
864
+ "<extra_id_35>",
865
+ "<extra_id_36>",
866
+ "<extra_id_37>",
867
+ "<extra_id_38>",
868
+ "<extra_id_39>",
869
+ "<extra_id_40>",
870
+ "<extra_id_41>",
871
+ "<extra_id_42>",
872
+ "<extra_id_43>",
873
+ "<extra_id_44>",
874
+ "<extra_id_45>",
875
+ "<extra_id_46>",
876
+ "<extra_id_47>",
877
+ "<extra_id_48>",
878
+ "<extra_id_49>",
879
+ "<extra_id_50>",
880
+ "<extra_id_51>",
881
+ "<extra_id_52>",
882
+ "<extra_id_53>",
883
+ "<extra_id_54>",
884
+ "<extra_id_55>",
885
+ "<extra_id_56>",
886
+ "<extra_id_57>",
887
+ "<extra_id_58>",
888
+ "<extra_id_59>",
889
+ "<extra_id_60>",
890
+ "<extra_id_61>",
891
+ "<extra_id_62>",
892
+ "<extra_id_63>",
893
+ "<extra_id_64>",
894
+ "<extra_id_65>",
895
+ "<extra_id_66>",
896
+ "<extra_id_67>",
897
+ "<extra_id_68>",
898
+ "<extra_id_69>",
899
+ "<extra_id_70>",
900
+ "<extra_id_71>",
901
+ "<extra_id_72>",
902
+ "<extra_id_73>",
903
+ "<extra_id_74>",
904
+ "<extra_id_75>",
905
+ "<extra_id_76>",
906
+ "<extra_id_77>",
907
+ "<extra_id_78>",
908
+ "<extra_id_79>",
909
+ "<extra_id_80>",
910
+ "<extra_id_81>",
911
+ "<extra_id_82>",
912
+ "<extra_id_83>",
913
+ "<extra_id_84>",
914
+ "<extra_id_85>",
915
+ "<extra_id_86>",
916
+ "<extra_id_87>",
917
+ "<extra_id_88>",
918
+ "<extra_id_89>",
919
+ "<extra_id_90>",
920
+ "<extra_id_91>",
921
+ "<extra_id_92>",
922
+ "<extra_id_93>",
923
+ "<extra_id_94>",
924
+ "<extra_id_95>",
925
+ "<extra_id_96>",
926
+ "<extra_id_97>",
927
+ "<extra_id_98>",
928
+ "<extra_id_99>"
929
+ ],
930
+ "clean_up_tokenization_spaces": true,
931
+ "eos_token": "</s>",
932
+ "extra_ids": 100,
933
+ "max_length": 2048,
934
+ "model_max_length": 512,
935
+ "pad_token": "<pad>",
936
+ "stride": 0,
937
+ "tokenizer_class": "T5Tokenizer",
938
+ "truncation": true,
939
+ "truncation_side": "right",
940
+ "truncation_strategy": "longest_first",
941
+ "unk_token": "<unk>"
942
+ }
Falconsai/fear_mongering_detection/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Falconsai/fear_mongering_detection/README.md ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ tags:
4
+ - text-classification
5
+ - distilbert
6
+ license: apache-2.0
7
+ widget:
8
+ - text: >-
9
+ A secret society is orchestrating a global experiment in emotional
10
+ manipulation, using mass media to incite fear and anxiety among the
11
+ population.
12
+ example_title: Fear Mongering
13
+ - text: >-
14
+ Each year, the Internal Revenue Service (IRS) determines the staffing level
15
+ for its toll-free telephone customer service operations. GAO found that IRS
16
+ lacks a long-term telephone customer service goal that reflects the needs of
17
+ taxpayers and the costs and benefits of meeting that goal. Rather, IRS
18
+ annually determines the level of funding it will seek for its customer
19
+ service workforce, using its judgment of how to best balance service and
20
+ compliance activities.
21
+ example_title: Normal Speech
22
+ ---
23
+ # Model Card: Finetuned DistilBERT for Fear Mongering Detection
24
+
25
+ ## Model Description
26
+
27
+ The **Fine-Tuned DistilBERT** is a variant of the BERT transformer model,
28
+ distilled for efficient performance while maintaining high accuracy.
29
+ It has been adapted and fine-tuned for the specific task of classifying user intent in text data.
30
+
31
+
32
+ ### Definition
33
+ Fear Monger:
34
+ /ˈfɪrˌmʌŋ.ɡɚ/ to intentionally try to make people afraid of something when this is not necessary or reasonable.
35
+
36
+
37
+ The model, named "Falconsai/fear_mongering_detection" is pre-trained on a substantial amount of text data,
38
+ which allows it to capture semantic nuances and contextual information present in natural language text.
39
+ It has been fine-tuned with meticulous attention to hyperparameter settings, including batch size and learning rate, to ensure optimal model performance for the user intent classification task.
40
+
41
+ During the fine-tuning process, a batch size of 16 for efficient computation and learning was chosen.
42
+ Additionally, a learning rate (2e-5) was selected to strike a balance between rapid convergence and steady optimization,
43
+ ensuring the model not only learns quickly but also steadily refines its capabilities throughout training.
44
+
45
+ This model has been trained on a rather small dataset of under 50k, 100 epochs, specifically designed for "Fear Mongering Identification".
46
+
47
+ The goal of this meticulous training process is to equip the model with the ability to identify instances of Fear Mongering in text data effectively, making it ready to contribute to a wide range of applications involving human speech, text and generated content applications.
48
+
49
+
50
+ ### How to Use
51
+ To use this model for user Fear Monger classification, you can follow these steps:
52
+
53
+ ```markdown
54
+ from transformers import pipeline
55
+
56
+ statement = "The rise of smart cities is part of a covert plan to create a global surveillance network, where every move and action is monitored and controlled."
57
+ classifier = pipeline("text-classification", model="Falconsai/fear_mongering_detection")
58
+ classifier(statement)
59
+
60
+ ```
61
+
62
+ ## Model Details
63
+
64
+ - **Model Name:** Falconsai/fear_mongering_detection
65
+ - **Model Type:** Text Classification
66
+ - **Architecture:** DistilBERT-base-uncased
67
+
68
+
69
+ ## Use Cases
70
+
71
+ ### 1. Social Media Monitoring
72
+
73
+ - **Description:** The model can be applied to analyze social media posts and comments to identify instances of fear mongering. This can be useful for social media platforms to monitor and moderate content that may spread fear or misinformation.
74
+
75
+ ### 2. News Article Analysis
76
+
77
+ - **Description:** The model can be utilized to analyze news articles and identify sections containing fear-mongering language. This can help media outlets and fact-checking organizations to assess the tone and potential bias in news reporting.
78
+
79
+ ### 3. Content Moderation in Online Platforms
80
+
81
+ - **Description:** Online platforms and forums can deploy the model to automatically flag or filter out content that may be perceived as fear-mongering. This helps maintain a more positive and constructive online environment.
82
+
83
+
84
+ ## Limitations
85
+
86
+ - **Domain Specificity:** The model's performance will be limited to the Identification of fear Mongering as this was the intent and may not generalize well to other contexts.
87
+ - **False Positives:** The model may occasionally misclassify non-fear-mongering text as fear-mongering. Users should be aware of this limitation.
88
+
89
+
90
+ ## Responsible Usage
91
+
92
+ It is essential to use this model responsibly and ethically, adhering to content guidelines and applicable regulations when implementing it in real-world applications, particularly those involving potentially sensitive content.
93
+
94
+ ## References
95
+
96
+ - [Hugging Face Model Hub](https://huggingface.co/models)
97
+ - [DistilBERT Paper](https://arxiv.org/abs/1910.01108)
98
+
99
+ **Disclaimer:** The model's performance may be influenced by the quality and representativeness of the data it was fine-tuned on. Users are encouraged to assess the model's suitability for their specific applications and datasets.
100
+
101
+
102
+ ## Conclusion
103
+
104
+ This model card provides an overview of a fine-tuned DistilBERT model for fear mongering detection. Users are encouraged to consider the model's performance, limitations, and ethical considerations when applying it in different scenarios.
Falconsai/fear_mongering_detection/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Falconsai/fear_mongering_detection",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "Fear_Mongering",
13
+ "1": "Non_Fear_Mongering"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "max_position_embeddings": 512,
17
+ "model_type": "distilbert",
18
+ "n_heads": 12,
19
+ "n_layers": 6,
20
+ "pad_token_id": 0,
21
+ "problem_type": "single_label_classification",
22
+ "qa_dropout": 0.1,
23
+ "seq_classif_dropout": 0.2,
24
+ "sinusoidal_pos_embds": false,
25
+ "tie_weights_": true,
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.36.0.dev0",
28
+ "vocab_size": 30522
29
+ }
Falconsai/fear_mongering_detection/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9445136e7577532d34e133fba40304e56836bf0237a321ec0b5d9834ebfbde67
3
+ size 267832560
Falconsai/fear_mongering_detection/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
Falconsai/fear_mongering_detection/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
Falconsai/fear_mongering_detection/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "DistilBertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
Falconsai/fear_mongering_detection/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
Falconsai/intent_classification/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Falconsai/intent_classification/README.md ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ pipeline_tag: text-classification
4
+ language:
5
+ - en
6
+ widget:
7
+ - text: I ordered from you 2 weeks ago and its stil not here.
8
+ - text: I need to bring in my daughter for a checkup.
9
+ ---
10
+ # Model Card: Fine-Tuned DistilBERT for User Intent Classification
11
+
12
+ ## Model Description
13
+
14
+ The **Fine-Tuned DistilBERT** is a variant of the BERT transformer model,
15
+ distilled for efficient performance while maintaining high accuracy.
16
+ It has been adapted and fine-tuned for the specific task of classifying user intent in text data.
17
+
18
+ The model, named "distilbert-base-uncased," is pre-trained on a substantial amount of text data,
19
+ which allows it to capture semantic nuances and contextual information present in natural language text.
20
+ It has been fine-tuned with meticulous attention to hyperparameter settings, including batch size and learning rate, to ensure optimal model performance for the user intent classification task.
21
+
22
+ During the fine-tuning process, a batch size of 8 for efficient computation and learning was chosen.
23
+ Additionally, a learning rate (2e-5) was selected to strike a balance between rapid convergence and steady optimization,
24
+ ensuring the model not only learns quickly but also steadily refines its capabilities throughout training.
25
+
26
+ This model has been trained on a rather small dataset of under 50k, 100 epochs, specifically designed for user intent classification.
27
+ The dataset consists of text samples, each labeled with different user intents, such as "information seeking," "question asking," or "opinion expressing." The diversity within the dataset allowed the model to learn to identify user intent accurately. This dataset was carefully curated from a variety of sources.
28
+
29
+ The goal of this meticulous training process is to equip the model with the ability to classify user intent in text data effectively, making it ready to contribute to a wide range of applications involving user interaction analysis and personalization.
30
+
31
+ ## Intended Uses & Limitations
32
+
33
+ ### Intended Uses
34
+ - **User Intent Classification**: The primary intended use of this model is to classify user intent in text data. It is well-suited for applications that involve understanding user intentions, such as chatbots, virtual assistants, and recommendation systems.
35
+
36
+ ### How to Use
37
+ To use this model for user intent classification, you can follow these steps:
38
+
39
+ ```markdown
40
+ from transformers import pipeline
41
+
42
+ classifier = pipeline("text-classification", model="Falconsai/intent_classification")
43
+ text = "Your text to classify here."
44
+ result = classifier(text)
45
+ ```
46
+
47
+ ### Limitations
48
+ - **Specialized Task Fine-Tuning**: While the model excels at user intent classification, its performance may vary when applied to other natural language processing tasks. Users interested in employing this model for different tasks should explore fine-tuned versions available in the model hub for optimal results.
49
+
50
+ ## Training Data
51
+
52
+ The model's training data includes a proprietary dataset designed for user intent classification. This dataset comprises a diverse collection of text samples, categorized into various user intent classes. The training process aimed to equip the model with the ability to classify user intent effectively.
53
+
54
+ ### Training Stats
55
+ - Evaluation Loss: 0.011744413524866104
56
+ - Evaluation Accuracy: 0.9986976744186047
57
+ - Evaluation Runtime: 3.1136
58
+ - Evaluation Samples per Second: 1726.29
59
+ - Evaluation Steps per Second: 215.826
60
+
61
+ ## Responsible Usage
62
+
63
+ It is essential to use this model responsibly and ethically, adhering to content guidelines and applicable regulations when implementing it in real-world applications, particularly those involving potentially sensitive content.
64
+
65
+ ## References
66
+
67
+ - [Hugging Face Model Hub](https://huggingface.co/models)
68
+ - [DistilBERT Paper](https://arxiv.org/abs/1910.01108)
69
+
70
+ **Disclaimer:** The model's performance may be influenced by the quality and representativeness of the data it was fine-tuned on. Users are encouraged to assess the model's suitability for their specific applications and datasets.
Falconsai/intent_classification/config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Falconsai/intent_classification",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "cancellation",
13
+ "1": "ordering",
14
+ "2": "shipping",
15
+ "3": "invoicing",
16
+ "4": "billing and payment",
17
+ "5": "returns and refunds",
18
+ "6": "complaints and feedback",
19
+ "7": "speak to person",
20
+ "8": "edit account",
21
+ "9": "delete account",
22
+ "10": "delivery information",
23
+ "11": "subscription",
24
+ "12": "recover password",
25
+ "13": "registration problems",
26
+ "14": "appointment"
27
+ },
28
+ "initializer_range": 0.02,
29
+ "label2id": null,
30
+ "max_position_embeddings": 512,
31
+ "model_type": "distilbert",
32
+ "n_heads": 12,
33
+ "n_layers": 6,
34
+ "pad_token_id": 0,
35
+ "problem_type": "single_label_classification",
36
+ "qa_dropout": 0.1,
37
+ "seq_classif_dropout": 0.2,
38
+ "sinusoidal_pos_embds": false,
39
+ "tie_weights_": true,
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.31.0",
42
+ "vocab_size": 30522
43
+ }
Falconsai/intent_classification/coreml/text-classification/float32_model.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16df93af33748a9127940878f4c720bc157da4f89a6a1756904bcfcb835d362c
3
+ size 66210
Falconsai/intent_classification/coreml/text-classification/float32_model.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6dca982d7f198072ac884ebe092a40fcd65547c434bb5995dc074909524fb80
3
+ size 266687104
Falconsai/intent_classification/coreml/text-classification/float32_model.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "AFA8F27A-567D-483E-9161-8CCF1082DB9C": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Specification",
7
+ "name": "model.mlmodel",
8
+ "path": "com.apple.CoreML/model.mlmodel"
9
+ },
10
+ "E8C20928-9550-43A8-8294-A3CB7A28DCB1": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Weights",
13
+ "name": "weights",
14
+ "path": "com.apple.CoreML/weights"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "AFA8F27A-567D-483E-9161-8CCF1082DB9C"
18
+ }
Falconsai/intent_classification/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:292e786305afd79f04f799a7c6f7756b29f261ff6e944b23c3a540baa24741ba
3
+ size 267872556
Falconsai/intent_classification/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9827b446f7bbcaca6b56c2a8accecc97264953e9d5d9adaf7c29d6a6dad61f3e
3
+ size 267894125
Falconsai/intent_classification/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
Falconsai/intent_classification/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
Falconsai/intent_classification/tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": true,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "strip_accents": null,
10
+ "tokenize_chinese_chars": true,
11
+ "tokenizer_class": "DistilBertTokenizer",
12
+ "unk_token": "[UNK]"
13
+ }
Falconsai/intent_classification/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
Falconsai/offensive_speech_detection/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Falconsai/offensive_speech_detection/README.md ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ pipeline_tag: text-classification
4
+ language:
5
+ - en
6
+ widget:
7
+ - text: I think this is great.
8
+ example_title: Example 1
9
+ - text: You are awful
10
+ example_title: Example 2
11
+ ---
12
+ # Model Card: Fine-Tuned DistilBERT for Offensive/Hate Speech Detection
13
+
14
+ ## Model Description
15
+
16
+ The **Fine-Tuned DistilBERT** is a variant of the BERT transformer model,
17
+ distilled for efficient performance while maintaining high accuracy.
18
+ It has been adapted and fine-tuned for the specific task of offensive/hate speech detection in text data.
19
+
20
+ The model, named "distilbert-base-uncased," is pre-trained on a substantial amount of text data,
21
+ which allows it to capture semantic nuances and contextual information present in natural language text.
22
+ It has been fine-tuned with meticulous attention to hyperparameter settings, including batch size and learning rate, to ensure optimal model performance for the offensive/hate speech detection task.
23
+
24
+ During the fine-tuning process, a batch size of 16 for efficient computation and learning was chosen.
25
+ Additionally, a learning rate (2e-5) was selected to strike a balance between rapid convergence and steady optimization,
26
+ ensuring the model not only learns quickly but also steadily refines its capabilities throughout training.
27
+
28
+ This model has been trained on a proprietary dataset < 100k, specifically designed for offensive/hate speech detection.
29
+ The dataset consists of text samples, each labeled as "non-offensive" or "offensive."
30
+ The diversity within the dataset allowed the model to learn to identify offensive content accurately. THis dataset was combined from reseearch datasets on this topivc as well as news headliens. All duplicates were removed and meticulous effort was taken to ensure the dataset quality.
31
+
32
+ The goal of this meticulous training process is to equip the model with the ability to detect offensive and hate speech in text data effectively. The result is a model ready to contribute significantly to content moderation and safety, while maintaining high standards of accuracy and reliability.
33
+
34
+ ## Intended Uses & Limitations
35
+
36
+ ### Intended Uses
37
+ - **Offensive/Hate Speech Detection**: The primary intended use of this model is to detect offensive or hate speech in text data. It is well-suited for filtering and identifying inappropriate content in various applications.
38
+
39
+ - **Of Special Note**: The data suggests the word "like" is most often used as a comparative statement in the derogatory.
40
+ - These have numerous instances within the "Offensive Speech Dataset". "You look like X" or "He smells like X" are quite common.
41
+ - Also of note, the ABSENCE/LACK OF of punctuation lends itself heavily to the "Offensive" dataset.
42
+ - Accordingly the model will identify these as well, based on their prominence in the training data.
43
+
44
+
45
+ ### How to Use
46
+ To use this model for offensive/hate speech detection, you can follow these steps:
47
+ ```markdown
48
+ from transformers import pipeline
49
+
50
+ classifier = pipeline("text-classification", model="Falconsai/offensive_speech_detection")
51
+ text = "Your text to classify here."
52
+ result = classifier(text)
53
+
54
+ ```
55
+
56
+
57
+ ### Limitations
58
+ - **Specialized Task Fine-Tuning**: While the model is adept at offensive/hate speech detection, its performance may vary when applied to other natural language processing tasks.
59
+ - Users interested in employing this model for different tasks should explore fine-tuned versions available in the model hub for optimal results.
60
+
61
+ ## Training Data
62
+
63
+ The model's training data includes a proprietary dataset designed for offensive/hate speech detection. This dataset comprises a diverse collection of text samples, categorized into "non-offensive" and "offensive" classes. The training process aimed to equip the model with the ability to distinguish between offensive and non-offensive content effectively.
64
+
65
+ ### Training Stats
66
+ - Evaluation Loss: 0.018403256312012672
67
+ - Evaluation Accuracy: 0.9973234886940471
68
+ - Evaluation Runtime: 85.0789
69
+ - Evaluation Samples per Second: 127.352
70
+ - Evaluation Steps per Second: 7.969
71
+
72
+ **Note:** Specific evaluation statistics should be provided based on the model's performance.
73
+
74
+ ## Responsible Usage
75
+
76
+ It is essential to use this model responsibly and ethically, adhering to content guidelines and applicable regulations when implementing it in real-world applications, particularly those involving potentially sensitive content.
77
+
78
+ ## References
79
+
80
+ - [Hugging Face Model Hub](https://huggingface.co/models)
81
+ - [DistilBERT Paper](https://arxiv.org/abs/1910.01108)
82
+
83
+ **Disclaimer:** The model's performance may be influenced by the quality and representativeness of the data it was fine-tuned on. Users are encouraged to assess the model's suitability for their specific applications and datasets.
Falconsai/offensive_speech_detection/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Falconsai/offensive_speech_detection",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "OFFENSIVE",
13
+ "1": "SAFE"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "label2id": {
17
+ "OFFENSIVE": 0,
18
+ "SAFE": 1
19
+ },
20
+ "max_position_embeddings": 512,
21
+ "model_type": "distilbert",
22
+ "n_heads": 12,
23
+ "n_layers": 6,
24
+ "pad_token_id": 0,
25
+ "problem_type": "single_label_classification",
26
+ "qa_dropout": 0.1,
27
+ "seq_classif_dropout": 0.2,
28
+ "sinusoidal_pos_embds": false,
29
+ "tie_weights_": true,
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.31.0",
32
+ "vocab_size": 30522
33
+ }
Falconsai/offensive_speech_detection/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70666d9324c110df5cf43cd9f7b11b9a00077bfe5847bfe6302a0866c74c9611
3
+ size 267832560
Falconsai/offensive_speech_detection/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b899111ea3377e5da91f3d3a7b9a5469fe38a5d32f5d5de7ba78be8d9d05dd06
3
+ size 535701061