irishprancer commited on
Commit
073c24b
·
verified ·
1 Parent(s): a9078a6

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1de9c7983d81c83a25d9f5af8ab2f633d7e6d24eec5b255ecd10994cf1de3e68
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7088e1465bec09d936d03e684198b58b239bf23505303ab77b8fed115ae83f21
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9b36b33638ac27ad81d5b5cd5fa2e56673b3a7acb5c226ff0baa60d122dfdc7
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66dec7cc35f0f25bdf81f153d3e76317918cc977ade7398352de92638c14366b
3
  size 1054135994
last-checkpoint/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.7480350136756897,
3
  "best_model_checkpoint": "./output/checkpoint-150",
4
  "epoch": 6.521739130434782,
5
  "eval_steps": 150,
@@ -10,115 +10,115 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.43478260869565216,
13
- "grad_norm": 1.5022108554840088,
14
  "learning_rate": 5e-06,
15
- "loss": 0.9063,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.8695652173913043,
20
- "grad_norm": 1.679484248161316,
21
  "learning_rate": 1e-05,
22
  "loss": 0.9018,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 1.3043478260869565,
27
- "grad_norm": 1.7071681022644043,
28
  "learning_rate": 1.5e-05,
29
- "loss": 0.8972,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 1.7391304347826086,
34
- "grad_norm": 1.4155817031860352,
35
  "learning_rate": 2e-05,
36
- "loss": 0.9019,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 2.1739130434782608,
41
- "grad_norm": 1.2699992656707764,
42
  "learning_rate": 2.5e-05,
43
- "loss": 0.8208,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 2.608695652173913,
48
- "grad_norm": 1.902714729309082,
49
  "learning_rate": 3e-05,
50
- "loss": 0.865,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 3.0434782608695654,
55
- "grad_norm": 1.344117283821106,
56
  "learning_rate": 3.5e-05,
57
- "loss": 0.853,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 3.4782608695652173,
62
- "grad_norm": 1.6205377578735352,
63
  "learning_rate": 4e-05,
64
- "loss": 0.774,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 3.9130434782608696,
69
- "grad_norm": 1.364487886428833,
70
  "learning_rate": 4.5e-05,
71
- "loss": 0.8004,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 4.3478260869565215,
76
- "grad_norm": 1.2991905212402344,
77
  "learning_rate": 5e-05,
78
- "loss": 0.7915,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 4.782608695652174,
83
- "grad_norm": 2.0769360065460205,
84
  "learning_rate": 4.999999504539938e-05,
85
- "loss": 0.7423,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 5.217391304347826,
90
- "grad_norm": 1.0391877889633179,
91
  "learning_rate": 4.999998018159948e-05,
92
- "loss": 0.6799,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 5.6521739130434785,
97
- "grad_norm": 1.4947184324264526,
98
  "learning_rate": 4.999995540860619e-05,
99
- "loss": 0.6506,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 6.086956521739131,
104
- "grad_norm": 1.8294117450714111,
105
  "learning_rate": 4.999992072642933e-05,
106
- "loss": 0.6741,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 6.521739130434782,
111
- "grad_norm": 1.1097073554992676,
112
  "learning_rate": 4.9999876135082656e-05,
113
  "loss": 0.6923,
114
  "step": 150
115
  },
116
  {
117
  "epoch": 6.521739130434782,
118
- "eval_loss": 0.7480350136756897,
119
- "eval_runtime": 0.5268,
120
- "eval_samples_per_second": 18.983,
121
- "eval_steps_per_second": 18.983,
122
  "step": 150
123
  }
124
  ],
 
1
  {
2
+ "best_metric": 0.7474737167358398,
3
  "best_model_checkpoint": "./output/checkpoint-150",
4
  "epoch": 6.521739130434782,
5
  "eval_steps": 150,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.43478260869565216,
13
+ "grad_norm": 1.5022056102752686,
14
  "learning_rate": 5e-06,
15
+ "loss": 0.9062,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.8695652173913043,
20
+ "grad_norm": 1.679456114768982,
21
  "learning_rate": 1e-05,
22
  "loss": 0.9018,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 1.3043478260869565,
27
+ "grad_norm": 1.707067608833313,
28
  "learning_rate": 1.5e-05,
29
+ "loss": 0.8969,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 1.7391304347826086,
34
+ "grad_norm": 1.4233026504516602,
35
  "learning_rate": 2e-05,
36
+ "loss": 0.9022,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 2.1739130434782608,
41
+ "grad_norm": 1.2776679992675781,
42
  "learning_rate": 2.5e-05,
43
+ "loss": 0.8206,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 2.608695652173913,
48
+ "grad_norm": 1.9025815725326538,
49
  "learning_rate": 3e-05,
50
+ "loss": 0.8648,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 3.0434782608695654,
55
+ "grad_norm": 1.3365222215652466,
56
  "learning_rate": 3.5e-05,
57
+ "loss": 0.8532,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 3.4782608695652173,
62
+ "grad_norm": 1.6204768419265747,
63
  "learning_rate": 4e-05,
64
+ "loss": 0.7741,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 3.9130434782608696,
69
+ "grad_norm": 1.356784701347351,
70
  "learning_rate": 4.5e-05,
71
+ "loss": 0.8006,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 4.3478260869565215,
76
+ "grad_norm": 1.2996201515197754,
77
  "learning_rate": 5e-05,
78
+ "loss": 0.7921,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 4.782608695652174,
83
+ "grad_norm": 2.0766441822052,
84
  "learning_rate": 4.999999504539938e-05,
85
+ "loss": 0.7419,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 5.217391304347826,
90
+ "grad_norm": 1.0354195833206177,
91
  "learning_rate": 4.999998018159948e-05,
92
+ "loss": 0.68,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 5.6521739130434785,
97
+ "grad_norm": 1.4931747913360596,
98
  "learning_rate": 4.999995540860619e-05,
99
+ "loss": 0.6505,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 6.086956521739131,
104
+ "grad_norm": 1.8289649486541748,
105
  "learning_rate": 4.999992072642933e-05,
106
+ "loss": 0.6738,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 6.521739130434782,
111
+ "grad_norm": 1.109723448753357,
112
  "learning_rate": 4.9999876135082656e-05,
113
  "loss": 0.6923,
114
  "step": 150
115
  },
116
  {
117
  "epoch": 6.521739130434782,
118
+ "eval_loss": 0.7474737167358398,
119
+ "eval_runtime": 0.4635,
120
+ "eval_samples_per_second": 21.574,
121
+ "eval_steps_per_second": 21.574,
122
  "step": 150
123
  }
124
  ],
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c534cfc3e2dd38ea6fb24a33807c3d741b1e0c48890f1270e8024ad58bfd114d
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3a70fbc4ba87acefb3623b6ac3a7214d64f45e93ed5b9ac02509ef8f9f7f72d
3
  size 5368