Johannes Garstenauer commited on
Commit
a1e78ad
·
1 Parent(s): e97d11b

Training in progress, step 4

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5eca4bd1a0dc3b2f038f66bf50388447d18eebc41526b5232e9919ecb62a5a4b
3
- size 532524613
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af5dec527a9c814f0b0b0969f34e18dce6cc7e0ca1c062900e9ab561d71c0257
3
+ size 532524485
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:caa61399b7a53227108ebc38249705c50a8fb19b1d595bb3a5d63e8831b7d1a3
3
  size 266267309
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:409dfb718254049ca6895716068c3f4df968a22517ef020c66c5c26146f3ab5f
3
  size 266267309
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4b6060cf279e50135a405faf92c008144efde33dc3f94ac0484548bac6ae9e8
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16316a0b550954c0ab10d38c11e7ce2f43ad6cbf8d106a8d9c7a3e97f8b3cf6e
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb688e14e79b4182b9e5a8155b72ea71bee611c12f2cd72cad99d48f21f81233
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58e38e56f6ae6eafba42fa147eb9fa2dd398045bd711083ebb7e1bfe98b50fd0
3
  size 627
last-checkpoint/tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/trainer_state.json CHANGED
@@ -1,130 +1,15 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.8535120147874307,
5
- "global_step": 6175,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.23,
12
- "learning_rate": 4.614910659272951e-05,
13
- "loss": 0.6968,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 0.46,
18
- "learning_rate": 4.2298213185459026e-05,
19
- "loss": 0.6951,
20
- "step": 1000
21
- },
22
- {
23
- "epoch": 0.69,
24
- "learning_rate": 3.844731977818854e-05,
25
- "loss": 0.6944,
26
- "step": 1500
27
- },
28
- {
29
- "epoch": 0.92,
30
- "learning_rate": 3.4596426370918056e-05,
31
- "loss": 0.6944,
32
- "step": 2000
33
- },
34
- {
35
- "epoch": 0.92,
36
- "eval_accuracy": {
37
- "accuracy": 0.5025017869907077
38
- },
39
- "eval_f1": {
40
- "f1": 0.0
41
- },
42
- "eval_loss": 0.6932790279388428,
43
- "eval_runtime": 11.9399,
44
- "eval_samples_per_second": 234.341,
45
- "eval_steps_per_second": 1.843,
46
- "step": 2000
47
- },
48
- {
49
- "epoch": 1.16,
50
- "learning_rate": 3.0745532963647564e-05,
51
- "loss": 0.6937,
52
- "step": 2500
53
- },
54
- {
55
- "epoch": 1.39,
56
- "learning_rate": 2.6894639556377083e-05,
57
- "loss": 0.6936,
58
- "step": 3000
59
- },
60
- {
61
- "epoch": 1.62,
62
- "learning_rate": 2.3043746149106595e-05,
63
- "loss": 0.6936,
64
- "step": 3500
65
- },
66
- {
67
- "epoch": 1.85,
68
- "learning_rate": 1.9192852741836106e-05,
69
- "loss": 0.6936,
70
- "step": 4000
71
- },
72
- {
73
- "epoch": 1.85,
74
- "eval_accuracy": {
75
- "accuracy": 0.49749821300929237
76
- },
77
- "eval_f1": {
78
- "f1": 0.6644391408114558
79
- },
80
- "eval_loss": 0.6938893795013428,
81
- "eval_runtime": 11.9588,
82
- "eval_samples_per_second": 233.969,
83
- "eval_steps_per_second": 1.84,
84
- "step": 4000
85
- },
86
- {
87
- "epoch": 2.08,
88
- "learning_rate": 1.534195933456562e-05,
89
- "loss": 0.6933,
90
- "step": 4500
91
- },
92
- {
93
- "epoch": 2.31,
94
- "learning_rate": 1.1491065927295133e-05,
95
- "loss": 0.6934,
96
- "step": 5000
97
- },
98
- {
99
- "epoch": 2.54,
100
- "learning_rate": 7.640172520024647e-06,
101
- "loss": 0.6935,
102
- "step": 5500
103
- },
104
- {
105
- "epoch": 2.77,
106
- "learning_rate": 3.789279112754159e-06,
107
- "loss": 0.6935,
108
- "step": 6000
109
- },
110
- {
111
- "epoch": 2.77,
112
- "eval_accuracy": {
113
- "accuracy": 0.5025017869907077
114
- },
115
- "eval_f1": {
116
- "f1": 0.0
117
- },
118
- "eval_loss": 0.6931451559066772,
119
- "eval_runtime": 11.9466,
120
- "eval_samples_per_second": 234.208,
121
- "eval_steps_per_second": 1.842,
122
- "step": 6000
123
- }
124
- ],
125
- "max_steps": 6492,
126
  "num_train_epochs": 3,
127
- "total_flos": 1.0469958254972928e+17,
128
  "trial_name": null,
129
  "trial_params": null
130
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.18181818181818182,
5
+ "global_step": 4,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
+ "log_history": [],
10
+ "max_steps": 66,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  "num_train_epochs": 3,
12
+ "total_flos": 67823308111872.0,
13
  "trial_name": null,
14
  "trial_params": null
15
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4767a339eceb00e37c148ec2ef95cceaf80d8ec0242cfc80171905c16a7c8fdc
3
  size 3963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e25d14ad4fc875720ef03324b41abfb757bf175890a09e38c377c5afaad866cc
3
  size 3963
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:caa61399b7a53227108ebc38249705c50a8fb19b1d595bb3a5d63e8831b7d1a3
3
  size 266267309
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:409dfb718254049ca6895716068c3f4df968a22517ef020c66c5c26146f3ab5f
3
  size 266267309
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4767a339eceb00e37c148ec2ef95cceaf80d8ec0242cfc80171905c16a7c8fdc
3
  size 3963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e25d14ad4fc875720ef03324b41abfb757bf175890a09e38c377c5afaad866cc
3
  size 3963