SM commited on
Commit
9efeae0
·
1 Parent(s): c3eb26d

Upload 66 files

Browse files
README.md CHANGED
@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 3.5093
21
- - Accuracy: 0.4494
22
 
23
  ## Model description
24
 
@@ -43,7 +43,7 @@ The following hyperparameters were used during training:
43
  - seed: 42
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
- - num_epochs: 60.0
47
 
48
  ### Training results
49
 
 
17
 
18
  This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 3.4268
21
+ - Accuracy: 0.3678
22
 
23
  ## Model description
24
 
 
43
  - seed: 42
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
+ - num_epochs: 40.0
47
 
48
  ### Training results
49
 
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 60.0,
3
- "eval_accuracy": 0.4494406429890301,
4
- "eval_loss": 3.5092644691467285,
5
- "eval_runtime": 0.6069,
6
- "eval_samples": 9,
7
- "eval_samples_per_second": 14.829,
8
- "eval_steps_per_second": 4.943,
9
- "perplexity": 33.423674596634115,
10
- "train_loss": 0.9941332481020972,
11
- "train_runtime": 1577.313,
12
- "train_samples": 138,
13
- "train_samples_per_second": 5.249,
14
- "train_steps_per_second": 1.331
15
  }
 
1
  {
2
+ "epoch": 40.0,
3
+ "eval_accuracy": 0.36779081133919844,
4
+ "eval_loss": 3.4267592430114746,
5
+ "eval_runtime": 0.5326,
6
+ "eval_samples": 8,
7
+ "eval_samples_per_second": 15.021,
8
+ "eval_steps_per_second": 3.755,
9
+ "perplexity": 30.776741019953068,
10
+ "train_loss": 1.7243760996851427,
11
+ "train_runtime": 861.0101,
12
+ "train_samples": 113,
13
+ "train_samples_per_second": 5.25,
14
+ "train_steps_per_second": 1.347
15
  }
checkpoint-1000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12f996d0719a66b10d90e36ef43b31f9eaae1c31749508f6854af308bb085736
3
  size 497774208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4539d2a083bbe3c5582d647b1ae6bc86e66a35cfc1b8f9e221aff26bbbde8195
3
  size 497774208
checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9adc9e7ab852a33f9fe84c647c87587bada024b6538c02088b10c9c8717806e2
3
  size 995642298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7056e6ee4e46e8e59c098ffc9fa74eb8cf4091180be014749fc9672c6dee96fa
3
  size 995642298
checkpoint-1000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a6632d83b7bb45efc05bef8c034ed3b2854a29a949ed96a7ca5bd50bcb7d902
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d420b459d39d02b619c50d8cdf88cb444183f0ce7c2d4fb429cd2b5d34fff044
3
  size 14244
checkpoint-1000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eee3c9505132ed967b6539dd4a6fb45e2bc29520ec4ec39ac5c68d846d45dec5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76a794237642516faf4b87039234f574d68642549c3c3bc8098873f4599d9d7e
3
  size 1064
checkpoint-1000/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 28.571428571428573,
5
  "eval_steps": 500,
6
  "global_step": 1000,
7
  "is_hyper_param_search": false,
@@ -9,24 +9,24 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 14.29,
13
- "learning_rate": 3.809523809523809e-05,
14
- "loss": 1.8052,
15
  "step": 500
16
  },
17
  {
18
- "epoch": 28.57,
19
- "learning_rate": 2.6190476190476192e-05,
20
- "loss": 1.0819,
21
  "step": 1000
22
  }
23
  ],
24
  "logging_steps": 500,
25
- "max_steps": 2100,
26
  "num_input_tokens_seen": 0,
27
- "num_train_epochs": 60,
28
  "save_steps": 500,
29
- "total_flos": 2061071548416000.0,
30
  "train_batch_size": 4,
31
  "trial_name": null,
32
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 34.48275862068966,
5
  "eval_steps": 500,
6
  "global_step": 1000,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 17.24,
13
+ "learning_rate": 2.844827586206897e-05,
14
+ "loss": 2.1876,
15
  "step": 500
16
  },
17
  {
18
+ "epoch": 34.48,
19
+ "learning_rate": 6.896551724137932e-06,
20
+ "loss": 1.4227,
21
  "step": 1000
22
  }
23
  ],
24
  "logging_steps": 500,
25
+ "max_steps": 1160,
26
  "num_input_tokens_seen": 0,
27
+ "num_train_epochs": 40,
28
  "save_steps": 500,
29
+ "total_flos": 2037032681472000.0,
30
  "train_batch_size": 4,
31
  "trial_name": null,
32
  "trial_params": null
checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7be20547a6728ba6b1c0823eb883cb2999dcf6b825f9d82a942dc5c63262ce46
3
  size 4664
checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b4dcf13f972feb06a7bc56ddc93ab0a631fe7d88e27643cbf9c0e042041fcff
3
  size 497774208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffc6c3538ac0c892d4ba1dbc0b4658747000215b6f0344b4810f8c693469a2bd
3
  size 497774208
checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84d55291718bd821470b8b79351ab3e47b578d3bc202542399220a633dc12848
3
  size 995642298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e083993f7a062b56c3ecbf41fddef3aef72845c7a860c14dc9063f295d4cfa6
3
  size 995642298
checkpoint-500/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e651e34957d21ef1419934c9c311ba824f956fd612f9f4b5bea9e4854d09d528
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cb841abcf2e8906e340787ceb62a5bd4a7332d20f27e04e3ad3d26c6caf5856
3
  size 14244
checkpoint-500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d84cb0a17b808448928ddd21fdfb54eabfda0598dfe3f0b7eebb6d442d67f65
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc56f025127707558594caa06236787d82e466db8432b4081f402bb03eef7151
3
  size 1064
checkpoint-500/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.285714285714286,
5
  "eval_steps": 500,
6
  "global_step": 500,
7
  "is_hyper_param_search": false,
@@ -9,18 +9,18 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 14.29,
13
- "learning_rate": 3.809523809523809e-05,
14
- "loss": 1.8052,
15
  "step": 500
16
  }
17
  ],
18
  "logging_steps": 500,
19
- "max_steps": 2100,
20
  "num_input_tokens_seen": 0,
21
- "num_train_epochs": 60,
22
  "save_steps": 500,
23
- "total_flos": 1030535774208000.0,
24
  "train_batch_size": 4,
25
  "trial_name": null,
26
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 17.24137931034483,
5
  "eval_steps": 500,
6
  "global_step": 500,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 17.24,
13
+ "learning_rate": 2.844827586206897e-05,
14
+ "loss": 2.1876,
15
  "step": 500
16
  }
17
  ],
18
  "logging_steps": 500,
19
+ "max_steps": 1160,
20
  "num_input_tokens_seen": 0,
21
+ "num_train_epochs": 40,
22
  "save_steps": 500,
23
+ "total_flos": 1018516340736000.0,
24
  "train_batch_size": 4,
25
  "trial_name": null,
26
  "trial_params": null
checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7be20547a6728ba6b1c0823eb883cb2999dcf6b825f9d82a942dc5c63262ce46
3
  size 4664
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 60.0,
3
- "eval_accuracy": 0.4494406429890301,
4
- "eval_loss": 3.5092644691467285,
5
- "eval_runtime": 0.6069,
6
- "eval_samples": 9,
7
- "eval_samples_per_second": 14.829,
8
- "eval_steps_per_second": 4.943,
9
- "perplexity": 33.423674596634115
10
  }
 
1
  {
2
+ "epoch": 40.0,
3
+ "eval_accuracy": 0.36779081133919844,
4
+ "eval_loss": 3.4267592430114746,
5
+ "eval_runtime": 0.5326,
6
+ "eval_samples": 8,
7
+ "eval_samples_per_second": 15.021,
8
+ "eval_steps_per_second": 3.755,
9
+ "perplexity": 30.776741019953068
10
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5aaa8c132a2f82e4f3a057cd8336ab86a6452f68defee110eba5fbb71b1e662c
3
  size 497774208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85daf429e1d2edfcaf99b4727e7c9dfd52b443830c2696dbc1a1af96cd3bbedd
3
  size 497774208
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 60.0,
3
- "train_loss": 0.9941332481020972,
4
- "train_runtime": 1577.313,
5
- "train_samples": 138,
6
- "train_samples_per_second": 5.249,
7
- "train_steps_per_second": 1.331
8
  }
 
1
  {
2
+ "epoch": 40.0,
3
+ "train_loss": 1.7243760996851427,
4
+ "train_runtime": 861.0101,
5
+ "train_samples": 113,
6
+ "train_samples_per_second": 5.25,
7
+ "train_steps_per_second": 1.347
8
  }
trainer_state.json CHANGED
@@ -1,53 +1,41 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 60.0,
5
  "eval_steps": 500,
6
- "global_step": 2100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 14.29,
13
- "learning_rate": 3.809523809523809e-05,
14
- "loss": 1.8052,
15
  "step": 500
16
  },
17
  {
18
- "epoch": 28.57,
19
- "learning_rate": 2.6190476190476192e-05,
20
- "loss": 1.0819,
21
  "step": 1000
22
  },
23
  {
24
- "epoch": 42.86,
25
- "learning_rate": 1.4285714285714285e-05,
26
- "loss": 0.6781,
27
- "step": 1500
28
- },
29
- {
30
- "epoch": 57.14,
31
- "learning_rate": 2.3809523809523808e-06,
32
- "loss": 0.5142,
33
- "step": 2000
34
- },
35
- {
36
- "epoch": 60.0,
37
- "step": 2100,
38
- "total_flos": 4326996049920000.0,
39
- "train_loss": 0.9941332481020972,
40
- "train_runtime": 1577.313,
41
- "train_samples_per_second": 5.249,
42
- "train_steps_per_second": 1.331
43
  }
44
  ],
45
  "logging_steps": 500,
46
- "max_steps": 2100,
47
  "num_input_tokens_seen": 0,
48
- "num_train_epochs": 60,
49
  "save_steps": 500,
50
- "total_flos": 4326996049920000.0,
51
  "train_batch_size": 4,
52
  "trial_name": null,
53
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 40.0,
5
  "eval_steps": 500,
6
+ "global_step": 1160,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 17.24,
13
+ "learning_rate": 2.844827586206897e-05,
14
+ "loss": 2.1876,
15
  "step": 500
16
  },
17
  {
18
+ "epoch": 34.48,
19
+ "learning_rate": 6.896551724137932e-06,
20
+ "loss": 1.4227,
21
  "step": 1000
22
  },
23
  {
24
+ "epoch": 40.0,
25
+ "step": 1160,
26
+ "total_flos": 2362079969280000.0,
27
+ "train_loss": 1.7243760996851427,
28
+ "train_runtime": 861.0101,
29
+ "train_samples_per_second": 5.25,
30
+ "train_steps_per_second": 1.347
 
 
 
 
 
 
 
 
 
 
 
 
31
  }
32
  ],
33
  "logging_steps": 500,
34
+ "max_steps": 1160,
35
  "num_input_tokens_seen": 0,
36
+ "num_train_epochs": 40,
37
  "save_steps": 500,
38
+ "total_flos": 2362079969280000.0,
39
  "train_batch_size": 4,
40
  "trial_name": null,
41
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7be20547a6728ba6b1c0823eb883cb2999dcf6b825f9d82a942dc5c63262ce46
3
  size 4664