bimabk commited on
Commit
35e08fe
·
verified ·
1 Parent(s): 5746067

Upload task output 0fe99f84-0038-4cec-8e61-1eb9fea8dc55

Browse files
Files changed (4) hide show
  1. loss.txt +1 -1
  2. model.safetensors +1 -1
  3. trainer_state.json +39 -39
  4. training_args.bin +1 -1
loss.txt CHANGED
@@ -1 +1 @@
1
- 94,1.4254399538040161
 
1
+ 94,1.4226864576339722
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:463c535603aff4165f67f61dfa629ad8562f6de3d215a045fe038e8ec99796e6
3
  size 2471645608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ab0909463ed87ed047c9689bd5c88ff5cbd6baf5236cf377f4b8b00fa277f94
3
  size 2471645608
trainer_state.json CHANGED
@@ -11,30 +11,30 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.10638297872340426,
14
- "grad_norm": 6.90625,
15
  "learning_rate": 8.8845e-06,
16
- "loss": 2.3621,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.2127659574468085,
21
  "grad_norm": 4.96875,
22
  "learning_rate": 1.9990125e-05,
23
- "loss": 2.1354,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.3191489361702128,
28
- "grad_norm": 4.71875,
29
  "learning_rate": 3.1095750000000005e-05,
30
  "loss": 1.9609,
31
  "step": 15
32
  },
33
  {
34
  "epoch": 0.425531914893617,
35
- "grad_norm": 3.921875,
36
  "learning_rate": 4.2201375e-05,
37
- "loss": 1.841,
38
  "step": 20
39
  },
40
  {
@@ -46,109 +46,109 @@
46
  },
47
  {
48
  "epoch": 0.6382978723404256,
49
- "grad_norm": 3.921875,
50
  "learning_rate": 6.4412625e-05,
51
- "loss": 1.7088,
52
  "step": 30
53
  },
54
  {
55
  "epoch": 0.7446808510638298,
56
- "grad_norm": 3.75,
57
  "learning_rate": 7.551825e-05,
58
- "loss": 1.6829,
59
  "step": 35
60
  },
61
  {
62
  "epoch": 0.851063829787234,
63
- "grad_norm": 3.6875,
64
  "learning_rate": 7.753475804658967e-05,
65
- "loss": 1.6549,
66
  "step": 40
67
  },
68
  {
69
  "epoch": 0.9574468085106383,
70
- "grad_norm": 3.265625,
71
  "learning_rate": 7.670842209011892e-05,
72
- "loss": 1.6349,
73
  "step": 45
74
  },
75
  {
76
  "epoch": 1.0,
77
- "eval_loss": 1.5820515155792236,
78
- "eval_runtime": 4.9972,
79
- "eval_samples_per_second": 40.023,
80
- "eval_steps_per_second": 40.023,
81
  "step": 47
82
  },
83
  {
84
  "epoch": 1.0638297872340425,
85
- "grad_norm": 3.5,
86
  "learning_rate": 7.526567749442305e-05,
87
- "loss": 1.4597,
88
  "step": 50
89
  },
90
  {
91
  "epoch": 1.1702127659574468,
92
- "grad_norm": 3.15625,
93
  "learning_rate": 7.323814868052365e-05,
94
- "loss": 1.3235,
95
  "step": 55
96
  },
97
  {
98
  "epoch": 1.2765957446808511,
99
- "grad_norm": 3.53125,
100
  "learning_rate": 7.067027832041926e-05,
101
- "loss": 1.3141,
102
  "step": 60
103
  },
104
  {
105
  "epoch": 1.3829787234042552,
106
- "grad_norm": 3.1875,
107
  "learning_rate": 6.761835317036523e-05,
108
- "loss": 1.3339,
109
  "step": 65
110
  },
111
  {
112
  "epoch": 1.4893617021276595,
113
- "grad_norm": 3.109375,
114
  "learning_rate": 6.414927028626436e-05,
115
- "loss": 1.2874,
116
  "step": 70
117
  },
118
  {
119
  "epoch": 1.5957446808510638,
120
- "grad_norm": 3.140625,
121
  "learning_rate": 6.033907066526388e-05,
122
- "loss": 1.2909,
123
  "step": 75
124
  },
125
  {
126
  "epoch": 1.702127659574468,
127
  "grad_norm": 2.859375,
128
  "learning_rate": 5.627127245558645e-05,
129
- "loss": 1.2628,
130
  "step": 80
131
  },
132
  {
133
  "epoch": 1.8085106382978724,
134
- "grad_norm": 2.9375,
135
  "learning_rate": 5.203504027001068e-05,
136
- "loss": 1.2736,
137
  "step": 85
138
  },
139
  {
140
  "epoch": 1.9148936170212765,
141
- "grad_norm": 2.921875,
142
  "learning_rate": 4.7723230730964036e-05,
143
- "loss": 1.2499,
144
  "step": 90
145
  },
146
  {
147
  "epoch": 2.0,
148
- "eval_loss": 1.4254399538040161,
149
- "eval_runtime": 4.8558,
150
- "eval_samples_per_second": 41.188,
151
- "eval_steps_per_second": 41.188,
152
  "step": 94
153
  }
154
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0.10638297872340426,
14
+ "grad_norm": 6.875,
15
  "learning_rate": 8.8845e-06,
16
+ "loss": 2.3619,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.2127659574468085,
21
  "grad_norm": 4.96875,
22
  "learning_rate": 1.9990125e-05,
23
+ "loss": 2.1355,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.3191489361702128,
28
+ "grad_norm": 4.75,
29
  "learning_rate": 3.1095750000000005e-05,
30
  "loss": 1.9609,
31
  "step": 15
32
  },
33
  {
34
  "epoch": 0.425531914893617,
35
+ "grad_norm": 3.9375,
36
  "learning_rate": 4.2201375e-05,
37
+ "loss": 1.8411,
38
  "step": 20
39
  },
40
  {
 
46
  },
47
  {
48
  "epoch": 0.6382978723404256,
49
+ "grad_norm": 3.9375,
50
  "learning_rate": 6.4412625e-05,
51
+ "loss": 1.7092,
52
  "step": 30
53
  },
54
  {
55
  "epoch": 0.7446808510638298,
56
+ "grad_norm": 3.703125,
57
  "learning_rate": 7.551825e-05,
58
+ "loss": 1.679,
59
  "step": 35
60
  },
61
  {
62
  "epoch": 0.851063829787234,
63
+ "grad_norm": 3.75,
64
  "learning_rate": 7.753475804658967e-05,
65
+ "loss": 1.65,
66
  "step": 40
67
  },
68
  {
69
  "epoch": 0.9574468085106383,
70
+ "grad_norm": 3.28125,
71
  "learning_rate": 7.670842209011892e-05,
72
+ "loss": 1.6324,
73
  "step": 45
74
  },
75
  {
76
  "epoch": 1.0,
77
+ "eval_loss": 1.5718683004379272,
78
+ "eval_runtime": 5.0419,
79
+ "eval_samples_per_second": 39.668,
80
+ "eval_steps_per_second": 39.668,
81
  "step": 47
82
  },
83
  {
84
  "epoch": 1.0638297872340425,
85
+ "grad_norm": 3.5625,
86
  "learning_rate": 7.526567749442305e-05,
87
+ "loss": 1.4548,
88
  "step": 50
89
  },
90
  {
91
  "epoch": 1.1702127659574468,
92
+ "grad_norm": 3.203125,
93
  "learning_rate": 7.323814868052365e-05,
94
+ "loss": 1.3195,
95
  "step": 55
96
  },
97
  {
98
  "epoch": 1.2765957446808511,
99
+ "grad_norm": 3.515625,
100
  "learning_rate": 7.067027832041926e-05,
101
+ "loss": 1.3081,
102
  "step": 60
103
  },
104
  {
105
  "epoch": 1.3829787234042552,
106
+ "grad_norm": 3.28125,
107
  "learning_rate": 6.761835317036523e-05,
108
+ "loss": 1.3275,
109
  "step": 65
110
  },
111
  {
112
  "epoch": 1.4893617021276595,
113
+ "grad_norm": 3.140625,
114
  "learning_rate": 6.414927028626436e-05,
115
+ "loss": 1.2813,
116
  "step": 70
117
  },
118
  {
119
  "epoch": 1.5957446808510638,
120
+ "grad_norm": 3.265625,
121
  "learning_rate": 6.033907066526388e-05,
122
+ "loss": 1.2849,
123
  "step": 75
124
  },
125
  {
126
  "epoch": 1.702127659574468,
127
  "grad_norm": 2.859375,
128
  "learning_rate": 5.627127245558645e-05,
129
+ "loss": 1.2558,
130
  "step": 80
131
  },
132
  {
133
  "epoch": 1.8085106382978724,
134
+ "grad_norm": 2.90625,
135
  "learning_rate": 5.203504027001068e-05,
136
+ "loss": 1.2672,
137
  "step": 85
138
  },
139
  {
140
  "epoch": 1.9148936170212765,
141
+ "grad_norm": 2.875,
142
  "learning_rate": 4.7723230730964036e-05,
143
+ "loss": 1.2442,
144
  "step": 90
145
  },
146
  {
147
  "epoch": 2.0,
148
+ "eval_loss": 1.4226864576339722,
149
+ "eval_runtime": 4.9284,
150
+ "eval_samples_per_second": 40.581,
151
+ "eval_steps_per_second": 40.581,
152
  "step": 94
153
  }
154
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2dba5456ef636428827b0247f0a75c2ea36bd40474b12587bfed5fd8edee13fb
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf331e6fd57c18254efcfbc196d1235b38b32362cd9d58a9b0e6317cde1e5ab3
3
  size 5688