ninagroot commited on
Commit
6387e3f
·
verified ·
1 Parent(s): a43e481

ninagroot/babyllamatest

Browse files
README.md CHANGED
@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
- - Loss: 5.5855
17
 
18
  ## Model description
19
 
@@ -33,7 +33,7 @@ More information needed
33
 
34
  The following hyperparameters were used during training:
35
  - learning_rate: 0.00025
36
- - train_batch_size: 32
37
  - eval_batch_size: 8
38
  - seed: 42
39
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
@@ -46,46 +46,46 @@ The following hyperparameters were used during training:
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:----:|:---------------:|
49
- | No log | 1.0 | 7 | 71.5542 |
50
- | No log | 2.0 | 14 | 62.5591 |
51
- | 75.1015 | 3.0 | 21 | 52.1433 |
52
- | 75.1015 | 4.0 | 28 | 41.8504 |
53
- | 75.1015 | 5.0 | 35 | 35.2569 |
54
- | 47.6078 | 6.0 | 42 | 29.3735 |
55
- | 47.6078 | 7.0 | 49 | 25.0324 |
56
- | 47.6078 | 8.0 | 56 | 21.8563 |
57
- | 29.8451 | 9.0 | 63 | 18.5148 |
58
- | 29.8451 | 10.0 | 70 | 15.5228 |
59
- | 29.8451 | 11.0 | 77 | 13.0948 |
60
- | 17.3358 | 12.0 | 84 | 11.0128 |
61
- | 17.3358 | 13.0 | 91 | 9.2921 |
62
- | 17.3358 | 14.0 | 98 | 8.5328 |
63
- | 9.565 | 15.0 | 105 | 7.7662 |
64
- | 9.565 | 16.0 | 112 | 7.5869 |
65
- | 9.565 | 17.0 | 119 | 7.0970 |
66
- | 6.5384 | 18.0 | 126 | 6.8532 |
67
- | 6.5384 | 19.0 | 133 | 6.5811 |
68
- | 5.5048 | 20.0 | 140 | 6.4781 |
69
- | 5.5048 | 21.0 | 147 | 6.3671 |
70
- | 5.5048 | 22.0 | 154 | 6.3537 |
71
- | 5.0318 | 23.0 | 161 | 6.1682 |
72
- | 5.0318 | 24.0 | 168 | 5.9982 |
73
- | 5.0318 | 25.0 | 175 | 5.9669 |
74
- | 4.71 | 26.0 | 182 | 5.8959 |
75
- | 4.71 | 27.0 | 189 | 5.8910 |
76
- | 4.71 | 28.0 | 196 | 5.8447 |
77
- | 4.5081 | 29.0 | 203 | 5.7656 |
78
- | 4.5081 | 30.0 | 210 | 5.7277 |
79
- | 4.5081 | 31.0 | 217 | 5.7041 |
80
- | 4.3384 | 32.0 | 224 | 5.6611 |
81
- | 4.3384 | 33.0 | 231 | 5.6682 |
82
- | 4.3384 | 34.0 | 238 | 5.6317 |
83
- | 4.2903 | 35.0 | 245 | 5.6118 |
84
- | 4.2903 | 36.0 | 252 | 5.5995 |
85
- | 4.2903 | 37.0 | 259 | 5.6007 |
86
- | 4.2404 | 38.0 | 266 | 5.5819 |
87
- | 4.2404 | 39.0 | 273 | 5.5834 |
88
- | 4.1963 | 40.0 | 280 | 5.5855 |
89
 
90
 
91
  ### Framework versions
 
13
 
14
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
15
  It achieves the following results on the evaluation set:
16
+ - Loss: 12.3739
17
 
18
  ## Model description
19
 
 
33
 
34
  The following hyperparameters were used during training:
35
  - learning_rate: 0.00025
36
+ - train_batch_size: 128
37
  - eval_batch_size: 8
38
  - seed: 42
39
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:----:|:---------------:|
49
+ | 81.6769 | 1.0 | 2 | 74.8633 |
50
+ | 81.1451 | 2.0 | 4 | 73.7956 |
51
+ | 78.7609 | 3.0 | 6 | 71.9935 |
52
+ | 79.1356 | 4.0 | 8 | 69.4148 |
53
+ | 75.726 | 5.0 | 10 | 67.8374 |
54
+ | 74.2979 | 6.0 | 12 | 64.3771 |
55
+ | 70.3903 | 7.0 | 14 | 61.1100 |
56
+ | 67.5033 | 8.0 | 16 | 58.1597 |
57
+ | 64.8955 | 9.0 | 18 | 55.2518 |
58
+ | 61.2792 | 10.0 | 20 | 52.1664 |
59
+ | 57.5665 | 11.0 | 22 | 48.9584 |
60
+ | 54.0972 | 12.0 | 24 | 45.8081 |
61
+ | 50.2098 | 13.0 | 26 | 42.8455 |
62
+ | 48.9371 | 14.0 | 28 | 40.1582 |
63
+ | 45.2235 | 15.0 | 30 | 37.7302 |
64
+ | 44.1405 | 16.0 | 32 | 35.5237 |
65
+ | 41.0789 | 17.0 | 34 | 33.5662 |
66
+ | 40.2006 | 18.0 | 36 | 31.8106 |
67
+ | 38.5898 | 19.0 | 38 | 30.1508 |
68
+ | 36.2422 | 20.0 | 40 | 28.5076 |
69
+ | 34.6463 | 21.0 | 42 | 26.5191 |
70
+ | 30.7565 | 22.0 | 44 | 24.9482 |
71
+ | 29.6666 | 23.0 | 46 | 23.8793 |
72
+ | 27.6733 | 24.0 | 48 | 22.8973 |
73
+ | 25.9126 | 25.0 | 50 | 21.6442 |
74
+ | 25.2859 | 26.0 | 52 | 20.4439 |
75
+ | 24.0265 | 27.0 | 54 | 19.7371 |
76
+ | 21.8765 | 28.0 | 56 | 18.4843 |
77
+ | 20.4426 | 29.0 | 58 | 17.2997 |
78
+ | 18.7842 | 30.0 | 60 | 16.1685 |
79
+ | 17.7504 | 31.0 | 62 | 15.4688 |
80
+ | 16.5791 | 32.0 | 64 | 15.0343 |
81
+ | 16.1571 | 33.0 | 66 | 14.1040 |
82
+ | 15.0651 | 34.0 | 68 | 13.7322 |
83
+ | 14.0418 | 35.0 | 70 | 13.2421 |
84
+ | 13.6841 | 36.0 | 72 | 12.8765 |
85
+ | 13.3316 | 37.0 | 74 | 12.5740 |
86
+ | 13.3591 | 38.0 | 76 | 12.5028 |
87
+ | 13.0756 | 39.0 | 78 | 12.4223 |
88
+ | 13.0233 | 40.0 | 80 | 12.3739 |
89
 
90
 
91
  ### Framework versions
added_tokens.json CHANGED
@@ -1,3 +1,3 @@
1
  {
2
- "<|endoftext|>": 4312
3
  }
 
1
  {
2
+ "<|endoftext|>": 12198
3
  }
merges.txt CHANGED
The diff for this file is too large to render. See raw diff
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03acd465aafbe8e6f46318a9592cd0ae304d19d427c38a460754b2dc18cd506c
3
  size 217819016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a1714b9a7d37f2a284c5fd482259328e6821ab9e76ec376a2a7644c9d1cf168
3
  size 217819016
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -25,7 +25,7 @@
25
  "single_word": false,
26
  "special": true
27
  },
28
- "4312": {
29
  "content": "<|endoftext|>",
30
  "lstrip": false,
31
  "normalized": false,
@@ -40,5 +40,6 @@
40
  "model_max_length": 128,
41
  "pad_token": "<pad>",
42
  "tokenizer_class": "GPT2Tokenizer",
 
43
  "unk_token": "<|endoftext|>"
44
  }
 
25
  "single_word": false,
26
  "special": true
27
  },
28
+ "12198": {
29
  "content": "<|endoftext|>",
30
  "lstrip": false,
31
  "normalized": false,
 
40
  "model_max_length": 128,
41
  "pad_token": "<pad>",
42
  "tokenizer_class": "GPT2Tokenizer",
43
+ "truncation": true,
44
  "unk_token": "<|endoftext|>"
45
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a404120ec632e6c40885939d6a4d0f4d7e819ebd221abbc8530bab3798a602c
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0551b202cc608f1cc3f396331916badd2c01ef6cd77ea9422de79e6fdc202a56
3
  size 4984
vocab.json CHANGED
The diff for this file is too large to render. See raw diff