Training in progress epoch 0

Files changed (6) hide show

README.md CHANGED Viewed

@@ -14,9 +14,9 @@ probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Train Loss: 0.6738
-- Validation Loss: 0.6447
-- Train Accuracy: 0.625
 - Epoch: 0
 ## Model description
@@ -43,7 +43,7 @@ The following hyperparameters were used during training:
 | Train Loss | Validation Loss | Train Accuracy | Epoch |
 |:----------:|:---------------:|:--------------:|:-----:|
-| 0.6738     | 0.6447          | 0.625          | 0     |
 ### Framework versions

 This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Train Loss: 0.6613
+- Validation Loss: 0.6221
+- Train Accuracy: 0.725
 - Epoch: 0
 ## Model description
 | Train Loss | Validation Loss | Train Accuracy | Epoch |
 |:----------:|:---------------:|:--------------:|:-----:|
+| 0.6613     | 0.6221          | 0.725          | 0     |
 ### Framework versions

sentencepiece.bpe.model ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:49c4ba4e495ddf31eb2fdba7fc6aef3c233091d25d35bc9d24694ccf48ae114c
+size 904693

special_tokens_map.json CHANGED Viewed

@@ -1,7 +1,20 @@
 {
-  "cls_token": "[CLS]",
-  "mask_token": "[MASK]",
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "unk_token": "[UNK]"
 }

 {
+  "additional_special_tokens": [
+    "<s>NOTUSED",
+    "</s>NOTUSED",
+    "<_>"
+  ],
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
 }

tf_model.h5 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:52704b83fea4176093990308edf87b7821bdb6988c92708c77b5cdb2337b349e
-size 267951808

 version https://git-lfs.github.com/spec/v1
+oid sha256:47c7eec28498014b8432e6d4dd9a704c918d4790a5c9dc11ee682e55fb472bc9
+size 267955144

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -1,13 +1,25 @@
 {
   "clean_up_tokenization_spaces": true,
-  "cls_token": "[CLS]",
-  "do_lower_case": true,
-  "mask_token": "[MASK]",
-  "model_max_length": 512,
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "strip_accents": null,
-  "tokenize_chinese_chars": true,
-  "tokenizer_class": "DistilBertTokenizer",
-  "unk_token": "[UNK]"
 }

 {
+  "additional_special_tokens": [
+    "<s>NOTUSED",
+    "</s>NOTUSED",
+    "<_>"
+  ],
+  "bos_token": "<s>",
   "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "__type": "AddedToken",
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "model_max_length": 416,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "CamembertTokenizer",
+  "unk_token": "<unk>"
 }