gpt2NepaliCasualLM;6L6H:Batch 1:Epoch 1

Browse files

Files changed (4) hide show

README.md +3 -27
config.json +10 -6
generation_config.json +0 -2
tf_model.h5 +2 -2

README.md CHANGED Viewed

@@ -13,9 +13,7 @@ probably proofread and complete it, then remove this comment. -->
 This model was trained from scratch on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Train Loss: 6.5975
-- Validation Loss: 6.5688
-- Epoch: 19
 ## Model description
@@ -34,38 +32,16 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- optimizer: {'name': 'AdamWeightDecay', 'learning_rate': 2e-05, 'decay': 0.0, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07, 'amsgrad': False, 'weight_decay_rate': 0.01}
 - training_precision: float32
 ### Training results
-| Train Loss | Validation Loss | Epoch |
-|:----------:|:---------------:|:-----:|
-| 7.2555     | 6.3951          | 0     |
-| 7.2407     | 6.4006          | 1     |
-| 7.1960     | 6.4038          | 2     |
-| 7.1641     | 6.4023          | 3     |
-| 7.1523     | 6.4059          | 4     |
-| 7.0866     | 6.4129          | 5     |
-| 7.0504     | 6.4220          | 6     |
-| 7.0379     | 6.4316          | 7     |
-| 6.9766     | 6.4400          | 8     |
-| 6.9600     | 6.4495          | 9     |
-| 6.9436     | 6.4596          | 10    |
-| 6.8813     | 6.4681          | 11    |
-| 6.8393     | 6.4785          | 12    |
-| 6.8269     | 6.4903          | 13    |
-| 6.7888     | 6.5005          | 14    |
-| 6.7664     | 6.5136          | 15    |
-| 6.6829     | 6.5277          | 16    |
-| 6.6784     | 6.5418          | 17    |
-| 6.6056     | 6.5553          | 18    |
-| 6.5975     | 6.5688          | 19    |
 ### Framework versions
 - Transformers 4.28.1
 - TensorFlow 2.11.0
-- Datasets 2.11.0
 - Tokenizers 0.13.3

 This model was trained from scratch on an unknown dataset.
 It achieves the following results on the evaluation set:
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- optimizer: None
 - training_precision: float32
 ### Training results
 ### Framework versions
 - Transformers 4.28.1
 - TensorFlow 2.11.0
+- Datasets 2.1.0
 - Tokenizers 0.13.3

config.json CHANGED Viewed

@@ -1,20 +1,24 @@
 {
-  "_name_or_path": "./gpt2NepaliCasualLM/",
   "_num_labels": 1,
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
   ],
   "attn_pdrop": 0.1,
-  "bos_token_id": 1,
   "embd_pdrop": 0.1,
-  "eos_token_id": 2,
   "id2label": {
-    "0": "LABEL_0"
   },
   "initializer_range": 0.02,
   "label2id": {
-    "LABEL_0": 0
   },
   "layer_norm_epsilon": 1e-05,
   "model_type": "gpt2",
@@ -22,7 +26,7 @@
   "n_embd": 768,
   "n_head": 6,
   "n_inner": null,
-  "n_layer": 3,
   "n_positions": 1024,
   "reorder_and_upcast_attn": false,
   "resid_pdrop": 0.1,

 {
+  "_name_or_path": "/kaggle/input/nepali-trained-gpt2-casual-language-model/gpt2NepaliCasualLM",
   "_num_labels": 1,
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
   ],
   "attn_pdrop": 0.1,
+  "bos_token_id": null,
   "embd_pdrop": 0.1,
+  "eos_token_id": null,
   "id2label": {
+    "0": "NEUTRAL",
+    "1": "POSITIVE",
+    "2": "NEGATIVE"
   },
   "initializer_range": 0.02,
   "label2id": {
+    "NEGATIVE": 2,
+    "NEUTRAL": 0,
+    "POSITIVE": 1
   },
   "layer_norm_epsilon": 1e-05,
   "model_type": "gpt2",
   "n_embd": 768,
   "n_head": 6,
   "n_inner": null,
+  "n_layer": 6,
   "n_positions": 1024,
   "reorder_and_upcast_attn": false,
   "resid_pdrop": 0.1,

generation_config.json CHANGED Viewed

@@ -1,6 +1,4 @@
 {
   "_from_model_config": true,
-  "bos_token_id": 50256,
-  "eos_token_id": 50256,
   "transformers_version": "4.28.1"
 }

 {
   "_from_model_config": true,
   "transformers_version": "4.28.1"
 }

tf_model.h5 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4a195114a9cbbce6c76053024a7ef7b4426cc8c63c6b98c5587987f0c5a77446
-size 241858624

 version https://git-lfs.github.com/spec/v1
+oid sha256:25eaa1bc3d6bb27576f2dcd3e84a7f4eeb7316205d0556974cb328c53396ed19
+size 326955968