raygx commited on
Commit
474e425
·
1 Parent(s): da12fb7

gpt2NepaliCasualLM;6L6H:Batch 1:Epoch 1

Browse files
Files changed (4) hide show
  1. README.md +3 -27
  2. config.json +10 -6
  3. generation_config.json +0 -2
  4. tf_model.h5 +2 -2
README.md CHANGED
@@ -13,9 +13,7 @@ probably proofread and complete it, then remove this comment. -->
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
- - Train Loss: 6.5975
17
- - Validation Loss: 6.5688
18
- - Epoch: 19
19
 
20
  ## Model description
21
 
@@ -34,38 +32,16 @@ More information needed
34
  ### Training hyperparameters
35
 
36
  The following hyperparameters were used during training:
37
- - optimizer: {'name': 'AdamWeightDecay', 'learning_rate': 2e-05, 'decay': 0.0, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07, 'amsgrad': False, 'weight_decay_rate': 0.01}
38
  - training_precision: float32
39
 
40
  ### Training results
41
 
42
- | Train Loss | Validation Loss | Epoch |
43
- |:----------:|:---------------:|:-----:|
44
- | 7.2555 | 6.3951 | 0 |
45
- | 7.2407 | 6.4006 | 1 |
46
- | 7.1960 | 6.4038 | 2 |
47
- | 7.1641 | 6.4023 | 3 |
48
- | 7.1523 | 6.4059 | 4 |
49
- | 7.0866 | 6.4129 | 5 |
50
- | 7.0504 | 6.4220 | 6 |
51
- | 7.0379 | 6.4316 | 7 |
52
- | 6.9766 | 6.4400 | 8 |
53
- | 6.9600 | 6.4495 | 9 |
54
- | 6.9436 | 6.4596 | 10 |
55
- | 6.8813 | 6.4681 | 11 |
56
- | 6.8393 | 6.4785 | 12 |
57
- | 6.8269 | 6.4903 | 13 |
58
- | 6.7888 | 6.5005 | 14 |
59
- | 6.7664 | 6.5136 | 15 |
60
- | 6.6829 | 6.5277 | 16 |
61
- | 6.6784 | 6.5418 | 17 |
62
- | 6.6056 | 6.5553 | 18 |
63
- | 6.5975 | 6.5688 | 19 |
64
 
65
 
66
  ### Framework versions
67
 
68
  - Transformers 4.28.1
69
  - TensorFlow 2.11.0
70
- - Datasets 2.11.0
71
  - Tokenizers 0.13.3
 
13
 
14
  This model was trained from scratch on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
+
 
 
17
 
18
  ## Model description
19
 
 
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
35
+ - optimizer: None
36
  - training_precision: float32
37
 
38
  ### Training results
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
 
42
  ### Framework versions
43
 
44
  - Transformers 4.28.1
45
  - TensorFlow 2.11.0
46
+ - Datasets 2.1.0
47
  - Tokenizers 0.13.3
config.json CHANGED
@@ -1,20 +1,24 @@
1
  {
2
- "_name_or_path": "./gpt2NepaliCasualLM/",
3
  "_num_labels": 1,
4
  "activation_function": "gelu_new",
5
  "architectures": [
6
  "GPT2LMHeadModel"
7
  ],
8
  "attn_pdrop": 0.1,
9
- "bos_token_id": 1,
10
  "embd_pdrop": 0.1,
11
- "eos_token_id": 2,
12
  "id2label": {
13
- "0": "LABEL_0"
 
 
14
  },
15
  "initializer_range": 0.02,
16
  "label2id": {
17
- "LABEL_0": 0
 
 
18
  },
19
  "layer_norm_epsilon": 1e-05,
20
  "model_type": "gpt2",
@@ -22,7 +26,7 @@
22
  "n_embd": 768,
23
  "n_head": 6,
24
  "n_inner": null,
25
- "n_layer": 3,
26
  "n_positions": 1024,
27
  "reorder_and_upcast_attn": false,
28
  "resid_pdrop": 0.1,
 
1
  {
2
+ "_name_or_path": "/kaggle/input/nepali-trained-gpt2-casual-language-model/gpt2NepaliCasualLM",
3
  "_num_labels": 1,
4
  "activation_function": "gelu_new",
5
  "architectures": [
6
  "GPT2LMHeadModel"
7
  ],
8
  "attn_pdrop": 0.1,
9
+ "bos_token_id": null,
10
  "embd_pdrop": 0.1,
11
+ "eos_token_id": null,
12
  "id2label": {
13
+ "0": "NEUTRAL",
14
+ "1": "POSITIVE",
15
+ "2": "NEGATIVE"
16
  },
17
  "initializer_range": 0.02,
18
  "label2id": {
19
+ "NEGATIVE": 2,
20
+ "NEUTRAL": 0,
21
+ "POSITIVE": 1
22
  },
23
  "layer_norm_epsilon": 1e-05,
24
  "model_type": "gpt2",
 
26
  "n_embd": 768,
27
  "n_head": 6,
28
  "n_inner": null,
29
+ "n_layer": 6,
30
  "n_positions": 1024,
31
  "reorder_and_upcast_attn": false,
32
  "resid_pdrop": 0.1,
generation_config.json CHANGED
@@ -1,6 +1,4 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": 50256,
4
- "eos_token_id": 50256,
5
  "transformers_version": "4.28.1"
6
  }
 
1
  {
2
  "_from_model_config": true,
 
 
3
  "transformers_version": "4.28.1"
4
  }
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a195114a9cbbce6c76053024a7ef7b4426cc8c63c6b98c5587987f0c5a77446
3
- size 241858624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25eaa1bc3d6bb27576f2dcd3e84a7f4eeb7316205d0556974cb328c53396ed19
3
+ size 326955968