Upload folder using huggingface_hub
Browse files- Time period classification/camelbert-era-classifier.zip +3 -0
- Time period classification/camelbert-era-classifier/__MACOSX/._camelbert-era-classifier +0 -0
- Time period classification/camelbert-era-classifier/__MACOSX/camelbert-era-classifier/._config.json +0 -0
- Time period classification/camelbert-era-classifier/__MACOSX/camelbert-era-classifier/._era_labels.txt +0 -0
- Time period classification/camelbert-era-classifier/__MACOSX/camelbert-era-classifier/._model.safetensors +3 -0
- Time period classification/camelbert-era-classifier/__MACOSX/camelbert-era-classifier/._tokenizer.json +0 -0
- Time period classification/camelbert-era-classifier/__MACOSX/camelbert-era-classifier/._tokenizer_config.json +0 -0
- Time period classification/camelbert-era-classifier/camelbert-era-classifier/config.json +39 -0
- Time period classification/camelbert-era-classifier/camelbert-era-classifier/era_labels.txt +3 -0
- Time period classification/camelbert-era-classifier/camelbert-era-classifier/model.safetensors +3 -0
- Time period classification/camelbert-era-classifier/camelbert-era-classifier/tokenizer.json +0 -0
- Time period classification/camelbert-era-classifier/camelbert-era-classifier/tokenizer_config.json +15 -0
- Time period classification/config.json +39 -0
- Time period classification/era_labels.txt +3 -0
- Time period classification/era_model.ipynb +0 -0
- Time period classification/model.safetensors +3 -0
- Time period classification/tokenizer.json +0 -0
- Time period classification/tokenizer_config.json +15 -0
- Time_period_classification_Model_V2/BERTerav2-20260416T182356Z-3-001.zip +3 -0
- Time_period_classification_Model_V2/BERTerav2/config.json +29 -0
- Time_period_classification_Model_V2/BERTerav2/model.safetensors +3 -0
- Time_period_classification_Model_V2/BERTerav2/tokenizer.json +0 -0
- Time_period_classification_Model_V2/BERTerav2/tokenizer_config.json +21 -0
- Time_period_classification_Model_V2/BERTerav2/training_args.bin +3 -0
- Time_period_classification_Model_V2/Eraclassifer.ipynb +0 -0
- Topic_classifiication/TopicModel.ipynb +0 -0
- Topic_classifiication/best_model-20260416T175330Z-3-001.zip +3 -0
- Topic_classifiication/best_model/config.json +46 -0
- Topic_classifiication/best_model/label_info.pkl +3 -0
- Topic_classifiication/best_model/model.safetensors +3 -0
- Topic_classifiication/best_model/tokenizer.json +0 -0
- Topic_classifiication/best_model/tokenizer_config.json +21 -0
- Topic_classifiication/best_model/training_args.bin +3 -0
- config.json +61 -0
- labels.txt +14 -0
- model.safetensors +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +15 -0
Time period classification/camelbert-era-classifier.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6ffe5281862289490f47edb7da1eefa597c2d345dba213312977f06cbdf15ca
|
| 3 |
+
size 405265498
|
Time period classification/camelbert-era-classifier/__MACOSX/._camelbert-era-classifier
ADDED
|
Binary file (176 Bytes). View file
|
|
|
Time period classification/camelbert-era-classifier/__MACOSX/camelbert-era-classifier/._config.json
ADDED
|
Binary file (176 Bytes). View file
|
|
|
Time period classification/camelbert-era-classifier/__MACOSX/camelbert-era-classifier/._era_labels.txt
ADDED
|
Binary file (176 Bytes). View file
|
|
|
Time period classification/camelbert-era-classifier/__MACOSX/camelbert-era-classifier/._model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d4cc8aab155bd9fce77f27cba1087d98f3b5966b33be37efa313f4959855a5f
|
| 3 |
+
size 176
|
Time period classification/camelbert-era-classifier/__MACOSX/camelbert-era-classifier/._tokenizer.json
ADDED
|
Binary file (176 Bytes). View file
|
|
|
Time period classification/camelbert-era-classifier/__MACOSX/camelbert-era-classifier/._tokenizer_config.json
ADDED
|
Binary file (176 Bytes). View file
|
|
|
Time period classification/camelbert-era-classifier/camelbert-era-classifier/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_cross_attention": false,
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"bos_token_id": null,
|
| 8 |
+
"classifier_dropout": null,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"eos_token_id": null,
|
| 11 |
+
"gradient_checkpointing": false,
|
| 12 |
+
"hidden_act": "gelu",
|
| 13 |
+
"hidden_dropout_prob": 0.1,
|
| 14 |
+
"hidden_size": 768,
|
| 15 |
+
"id2label": {
|
| 16 |
+
"0": "LABEL_0",
|
| 17 |
+
"1": "LABEL_1",
|
| 18 |
+
"2": "LABEL_2"
|
| 19 |
+
},
|
| 20 |
+
"initializer_range": 0.02,
|
| 21 |
+
"intermediate_size": 3072,
|
| 22 |
+
"is_decoder": false,
|
| 23 |
+
"label2id": {
|
| 24 |
+
"LABEL_0": 0,
|
| 25 |
+
"LABEL_1": 1,
|
| 26 |
+
"LABEL_2": 2
|
| 27 |
+
},
|
| 28 |
+
"layer_norm_eps": 1e-12,
|
| 29 |
+
"max_position_embeddings": 512,
|
| 30 |
+
"model_type": "bert",
|
| 31 |
+
"num_attention_heads": 12,
|
| 32 |
+
"num_hidden_layers": 12,
|
| 33 |
+
"pad_token_id": 0,
|
| 34 |
+
"tie_word_embeddings": true,
|
| 35 |
+
"transformers_version": "5.0.0",
|
| 36 |
+
"type_vocab_size": 2,
|
| 37 |
+
"use_cache": false,
|
| 38 |
+
"vocab_size": 30000
|
| 39 |
+
}
|
Time period classification/camelbert-era-classifier/camelbert-era-classifier/era_labels.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
قديم
|
| 2 |
+
وسيط
|
| 3 |
+
حديث
|
Time period classification/camelbert-era-classifier/camelbert-era-classifier/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a017935e208cd28f19e610037910fdb18ff9903ec921d0e60cfc1757d3f4bb57
|
| 3 |
+
size 436358108
|
Time period classification/camelbert-era-classifier/camelbert-era-classifier/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Time period classification/camelbert-era-classifier/camelbert-era-classifier/tokenizer_config.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"backend": "tokenizers",
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"do_lower_case": false,
|
| 5 |
+
"full_tokenizer_file": null,
|
| 6 |
+
"is_local": false,
|
| 7 |
+
"mask_token": "[MASK]",
|
| 8 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 9 |
+
"pad_token": "[PAD]",
|
| 10 |
+
"sep_token": "[SEP]",
|
| 11 |
+
"strip_accents": null,
|
| 12 |
+
"tokenize_chinese_chars": true,
|
| 13 |
+
"tokenizer_class": "BertTokenizer",
|
| 14 |
+
"unk_token": "[UNK]"
|
| 15 |
+
}
|
Time period classification/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_cross_attention": false,
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"bos_token_id": null,
|
| 8 |
+
"classifier_dropout": null,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"eos_token_id": null,
|
| 11 |
+
"gradient_checkpointing": false,
|
| 12 |
+
"hidden_act": "gelu",
|
| 13 |
+
"hidden_dropout_prob": 0.1,
|
| 14 |
+
"hidden_size": 768,
|
| 15 |
+
"id2label": {
|
| 16 |
+
"0": "LABEL_0",
|
| 17 |
+
"1": "LABEL_1",
|
| 18 |
+
"2": "LABEL_2"
|
| 19 |
+
},
|
| 20 |
+
"initializer_range": 0.02,
|
| 21 |
+
"intermediate_size": 3072,
|
| 22 |
+
"is_decoder": false,
|
| 23 |
+
"label2id": {
|
| 24 |
+
"LABEL_0": 0,
|
| 25 |
+
"LABEL_1": 1,
|
| 26 |
+
"LABEL_2": 2
|
| 27 |
+
},
|
| 28 |
+
"layer_norm_eps": 1e-12,
|
| 29 |
+
"max_position_embeddings": 512,
|
| 30 |
+
"model_type": "bert",
|
| 31 |
+
"num_attention_heads": 12,
|
| 32 |
+
"num_hidden_layers": 12,
|
| 33 |
+
"pad_token_id": 0,
|
| 34 |
+
"tie_word_embeddings": true,
|
| 35 |
+
"transformers_version": "5.0.0",
|
| 36 |
+
"type_vocab_size": 2,
|
| 37 |
+
"use_cache": false,
|
| 38 |
+
"vocab_size": 30000
|
| 39 |
+
}
|
Time period classification/era_labels.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
قديم
|
| 2 |
+
وسيط
|
| 3 |
+
حديث
|
Time period classification/era_model.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Time period classification/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a017935e208cd28f19e610037910fdb18ff9903ec921d0e60cfc1757d3f4bb57
|
| 3 |
+
size 436358108
|
Time period classification/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Time period classification/tokenizer_config.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"backend": "tokenizers",
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"do_lower_case": false,
|
| 5 |
+
"full_tokenizer_file": null,
|
| 6 |
+
"is_local": false,
|
| 7 |
+
"mask_token": "[MASK]",
|
| 8 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 9 |
+
"pad_token": "[PAD]",
|
| 10 |
+
"sep_token": "[SEP]",
|
| 11 |
+
"strip_accents": null,
|
| 12 |
+
"tokenize_chinese_chars": true,
|
| 13 |
+
"tokenizer_class": "BertTokenizer",
|
| 14 |
+
"unk_token": "[UNK]"
|
| 15 |
+
}
|
Time_period_classification_Model_V2/BERTerav2-20260416T182356Z-3-001.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b518da705b634274a107f359f38a982de591b84fbe8e9bf835b0c160d80e22ec
|
| 3 |
+
size 502654001
|
Time_period_classification_Model_V2/BERTerav2/config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_cross_attention": false,
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"bos_token_id": null,
|
| 8 |
+
"classifier_dropout": null,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"eos_token_id": null,
|
| 11 |
+
"hidden_act": "gelu",
|
| 12 |
+
"hidden_dropout_prob": 0.1,
|
| 13 |
+
"hidden_size": 768,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 3072,
|
| 16 |
+
"is_decoder": false,
|
| 17 |
+
"layer_norm_eps": 1e-12,
|
| 18 |
+
"max_position_embeddings": 512,
|
| 19 |
+
"model_type": "bert",
|
| 20 |
+
"num_attention_heads": 12,
|
| 21 |
+
"num_hidden_layers": 12,
|
| 22 |
+
"pad_token_id": 0,
|
| 23 |
+
"problem_type": "single_label_classification",
|
| 24 |
+
"tie_word_embeddings": true,
|
| 25 |
+
"transformers_version": "5.0.0",
|
| 26 |
+
"type_vocab_size": 2,
|
| 27 |
+
"use_cache": false,
|
| 28 |
+
"vocab_size": 64000
|
| 29 |
+
}
|
Time_period_classification_Model_V2/BERTerav2/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31d11e65f66639be4a54c46704a227de6f8acdd8bc6712331c799d558cfe4fec
|
| 3 |
+
size 540803048
|
Time_period_classification_Model_V2/BERTerav2/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Time_period_classification_Model_V2/BERTerav2/tokenizer_config.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"backend": "tokenizers",
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"do_basic_tokenize": true,
|
| 5 |
+
"do_lower_case": false,
|
| 6 |
+
"is_local": false,
|
| 7 |
+
"mask_token": "[MASK]",
|
| 8 |
+
"max_len": 512,
|
| 9 |
+
"model_max_length": 512,
|
| 10 |
+
"never_split": [
|
| 11 |
+
"[بريد]",
|
| 12 |
+
"[مستخدم]",
|
| 13 |
+
"[رابط]"
|
| 14 |
+
],
|
| 15 |
+
"pad_token": "[PAD]",
|
| 16 |
+
"sep_token": "[SEP]",
|
| 17 |
+
"strip_accents": null,
|
| 18 |
+
"tokenize_chinese_chars": true,
|
| 19 |
+
"tokenizer_class": "BertTokenizer",
|
| 20 |
+
"unk_token": "[UNK]"
|
| 21 |
+
}
|
Time_period_classification_Model_V2/BERTerav2/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea817d048a3a19b567f5fc854c933124083537dd06f7827ab34f2fd8ad7e491f
|
| 3 |
+
size 5201
|
Time_period_classification_Model_V2/Eraclassifer.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Topic_classifiication/TopicModel.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Topic_classifiication/best_model-20260416T175330Z-3-001.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55916e4507e95f8c8dcdb0153a0316ff44f8f6a5a5b263d8271aed47420ee0a5
|
| 3 |
+
size 502676405
|
Topic_classifiication/best_model/config.json
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_cross_attention": false,
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"bos_token_id": null,
|
| 8 |
+
"classifier_dropout": null,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"eos_token_id": null,
|
| 11 |
+
"hidden_act": "gelu",
|
| 12 |
+
"hidden_dropout_prob": 0.1,
|
| 13 |
+
"hidden_size": 768,
|
| 14 |
+
"id2label": {
|
| 15 |
+
"0": "\u062f\u064a\u0646\u064a\u0629",
|
| 16 |
+
"1": "\u0631\u062b\u0627\u0621",
|
| 17 |
+
"2": "\u063a\u0632\u0644_\u0631\u0648\u0645\u0627\u0646\u0633\u064a",
|
| 18 |
+
"3": "\u0645\u062f\u062d",
|
| 19 |
+
"4": "\u0647\u062c\u0627\u0621_\u0630\u0645",
|
| 20 |
+
"5": "\u0648\u062c\u062f\u0627\u0646\u064a",
|
| 21 |
+
"6": "\u0648\u0637\u0646\u064a\u0629"
|
| 22 |
+
},
|
| 23 |
+
"initializer_range": 0.02,
|
| 24 |
+
"intermediate_size": 3072,
|
| 25 |
+
"is_decoder": false,
|
| 26 |
+
"label2id": {
|
| 27 |
+
"\u062f\u064a\u0646\u064a\u0629": 0,
|
| 28 |
+
"\u0631\u062b\u0627\u0621": 1,
|
| 29 |
+
"\u063a\u0632\u0644_\u0631\u0648\u0645\u0627\u0646\u0633\u064a": 2,
|
| 30 |
+
"\u0645\u062f\u062d": 3,
|
| 31 |
+
"\u0647\u062c\u0627\u0621_\u0630\u0645": 4,
|
| 32 |
+
"\u0648\u062c\u062f\u0627\u0646\u064a": 5,
|
| 33 |
+
"\u0648\u0637\u0646\u064a\u0629": 6
|
| 34 |
+
},
|
| 35 |
+
"layer_norm_eps": 1e-12,
|
| 36 |
+
"max_position_embeddings": 512,
|
| 37 |
+
"model_type": "bert",
|
| 38 |
+
"num_attention_heads": 12,
|
| 39 |
+
"num_hidden_layers": 12,
|
| 40 |
+
"pad_token_id": 0,
|
| 41 |
+
"tie_word_embeddings": true,
|
| 42 |
+
"transformers_version": "5.0.0",
|
| 43 |
+
"type_vocab_size": 2,
|
| 44 |
+
"use_cache": false,
|
| 45 |
+
"vocab_size": 64000
|
| 46 |
+
}
|
Topic_classifiication/best_model/label_info.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f095b5dd3e52136d6965b639c79b2bd6c7da7e8be30056fee0c09a01a6f462cb
|
| 3 |
+
size 542
|
Topic_classifiication/best_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99a18427e55a3faaffe803c2acd5422cfc2be96bedd9f0bb1449de0e3b75ed67
|
| 3 |
+
size 540818452
|
Topic_classifiication/best_model/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Topic_classifiication/best_model/tokenizer_config.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"backend": "tokenizers",
|
| 3 |
+
"cls_token": "[CLS]",
|
| 4 |
+
"do_basic_tokenize": true,
|
| 5 |
+
"do_lower_case": false,
|
| 6 |
+
"is_local": false,
|
| 7 |
+
"mask_token": "[MASK]",
|
| 8 |
+
"max_len": 512,
|
| 9 |
+
"model_max_length": 512,
|
| 10 |
+
"never_split": [
|
| 11 |
+
"[بريد]",
|
| 12 |
+
"[مستخدم]",
|
| 13 |
+
"[رابط]"
|
| 14 |
+
],
|
| 15 |
+
"pad_token": "[PAD]",
|
| 16 |
+
"sep_token": "[SEP]",
|
| 17 |
+
"strip_accents": null,
|
| 18 |
+
"tokenize_chinese_chars": true,
|
| 19 |
+
"tokenizer_class": "BertTokenizer",
|
| 20 |
+
"unk_token": "[UNK]"
|
| 21 |
+
}
|
Topic_classifiication/best_model/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73e6ac9eacf3c2617b863a907d6dd1a1be000cf0df3cf7dfd565b45d6f378eca
|
| 3 |
+
size 5201
|
config.json
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_cross_attention": false,
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"bos_token_id": null,
|
| 8 |
+
"classifier_dropout": null,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"eos_token_id": null,
|
| 11 |
+
"hidden_act": "gelu",
|
| 12 |
+
"hidden_dropout_prob": 0.1,
|
| 13 |
+
"hidden_size": 768,
|
| 14 |
+
"id2label": {
|
| 15 |
+
"0": "LABEL_0",
|
| 16 |
+
"1": "LABEL_1",
|
| 17 |
+
"2": "LABEL_2",
|
| 18 |
+
"3": "LABEL_3",
|
| 19 |
+
"4": "LABEL_4",
|
| 20 |
+
"5": "LABEL_5",
|
| 21 |
+
"6": "LABEL_6",
|
| 22 |
+
"7": "LABEL_7",
|
| 23 |
+
"8": "LABEL_8",
|
| 24 |
+
"9": "LABEL_9",
|
| 25 |
+
"10": "LABEL_10",
|
| 26 |
+
"11": "LABEL_11",
|
| 27 |
+
"12": "LABEL_12",
|
| 28 |
+
"13": "LABEL_13"
|
| 29 |
+
},
|
| 30 |
+
"initializer_range": 0.02,
|
| 31 |
+
"intermediate_size": 3072,
|
| 32 |
+
"is_decoder": false,
|
| 33 |
+
"label2id": {
|
| 34 |
+
"LABEL_0": 0,
|
| 35 |
+
"LABEL_1": 1,
|
| 36 |
+
"LABEL_10": 10,
|
| 37 |
+
"LABEL_11": 11,
|
| 38 |
+
"LABEL_12": 12,
|
| 39 |
+
"LABEL_13": 13,
|
| 40 |
+
"LABEL_2": 2,
|
| 41 |
+
"LABEL_3": 3,
|
| 42 |
+
"LABEL_4": 4,
|
| 43 |
+
"LABEL_5": 5,
|
| 44 |
+
"LABEL_6": 6,
|
| 45 |
+
"LABEL_7": 7,
|
| 46 |
+
"LABEL_8": 8,
|
| 47 |
+
"LABEL_9": 9
|
| 48 |
+
},
|
| 49 |
+
"layer_norm_eps": 1e-12,
|
| 50 |
+
"max_position_embeddings": 32,
|
| 51 |
+
"model_type": "bert",
|
| 52 |
+
"num_attention_heads": 12,
|
| 53 |
+
"num_hidden_layers": 10,
|
| 54 |
+
"pad_token_id": 0,
|
| 55 |
+
"position_embedding_type": "absolute",
|
| 56 |
+
"tie_word_embeddings": true,
|
| 57 |
+
"transformers_version": "5.0.0",
|
| 58 |
+
"type_vocab_size": 2,
|
| 59 |
+
"use_cache": false,
|
| 60 |
+
"vocab_size": 50000
|
| 61 |
+
}
|
labels.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
saree
|
| 2 |
+
kamel
|
| 3 |
+
mutakareb
|
| 4 |
+
mutadarak
|
| 5 |
+
munsareh
|
| 6 |
+
madeed
|
| 7 |
+
mujtath
|
| 8 |
+
ramal
|
| 9 |
+
baseet
|
| 10 |
+
khafeef
|
| 11 |
+
taweel
|
| 12 |
+
wafer
|
| 13 |
+
hazaj
|
| 14 |
+
rajaz
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b236d99219dd8fe710a09a2807edc05b2bd2936d02424a7959e03fceb0279ea4
|
| 3 |
+
size 439650624
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"backend": "tokenizers",
|
| 3 |
+
"clean_up_tokenization_spaces": true,
|
| 4 |
+
"cls_token": "[CLS]",
|
| 5 |
+
"do_lower_case": true,
|
| 6 |
+
"is_local": false,
|
| 7 |
+
"mask_token": "[MASK]",
|
| 8 |
+
"model_max_length": 32,
|
| 9 |
+
"pad_token": "[PAD]",
|
| 10 |
+
"sep_token": "[SEP]",
|
| 11 |
+
"strip_accents": null,
|
| 12 |
+
"tokenize_chinese_chars": true,
|
| 13 |
+
"tokenizer_class": "BertTokenizer",
|
| 14 |
+
"unk_token": "[UNK]"
|
| 15 |
+
}
|