bimabk commited on
Commit
6dcfb2d
·
verified ·
1 Parent(s): 35e08fe

Upload task output 0fe99f84-0038-4cec-8e61-1eb9fea8dc55

Browse files
Files changed (4) hide show
  1. loss.txt +1 -1
  2. model.safetensors +1 -1
  3. trainer_state.json +34 -34
  4. training_args.bin +1 -1
loss.txt CHANGED
@@ -1 +1 @@
1
- 94,1.4226864576339722
 
1
+ 94,1.4222824573516846
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ab0909463ed87ed047c9689bd5c88ff5cbd6baf5236cf377f4b8b00fa277f94
3
  size 2471645608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a91b62ec7c4aaa49c8c51325cf89a61746ace89ce3e3b07f4e01d803481c13cc
3
  size 2471645608
trainer_state.json CHANGED
@@ -13,42 +13,42 @@
13
  "epoch": 0.10638297872340426,
14
  "grad_norm": 6.875,
15
  "learning_rate": 8.8845e-06,
16
- "loss": 2.3619,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.2127659574468085,
21
  "grad_norm": 4.96875,
22
  "learning_rate": 1.9990125e-05,
23
- "loss": 2.1355,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.3191489361702128,
28
- "grad_norm": 4.75,
29
  "learning_rate": 3.1095750000000005e-05,
30
- "loss": 1.9609,
31
  "step": 15
32
  },
33
  {
34
  "epoch": 0.425531914893617,
35
  "grad_norm": 3.9375,
36
  "learning_rate": 4.2201375e-05,
37
- "loss": 1.8411,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.5319148936170213,
42
  "grad_norm": 3.890625,
43
  "learning_rate": 5.3307e-05,
44
- "loss": 1.7944,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 0.6382978723404256,
49
- "grad_norm": 3.9375,
50
  "learning_rate": 6.4412625e-05,
51
- "loss": 1.7092,
52
  "step": 30
53
  },
54
  {
@@ -60,9 +60,9 @@
60
  },
61
  {
62
  "epoch": 0.851063829787234,
63
- "grad_norm": 3.75,
64
  "learning_rate": 7.753475804658967e-05,
65
- "loss": 1.65,
66
  "step": 40
67
  },
68
  {
@@ -74,81 +74,81 @@
74
  },
75
  {
76
  "epoch": 1.0,
77
- "eval_loss": 1.5718683004379272,
78
- "eval_runtime": 5.0419,
79
- "eval_samples_per_second": 39.668,
80
- "eval_steps_per_second": 39.668,
81
  "step": 47
82
  },
83
  {
84
  "epoch": 1.0638297872340425,
85
- "grad_norm": 3.5625,
86
  "learning_rate": 7.526567749442305e-05,
87
- "loss": 1.4548,
88
  "step": 50
89
  },
90
  {
91
  "epoch": 1.1702127659574468,
92
- "grad_norm": 3.203125,
93
  "learning_rate": 7.323814868052365e-05,
94
- "loss": 1.3195,
95
  "step": 55
96
  },
97
  {
98
  "epoch": 1.2765957446808511,
99
- "grad_norm": 3.515625,
100
  "learning_rate": 7.067027832041926e-05,
101
- "loss": 1.3081,
102
  "step": 60
103
  },
104
  {
105
  "epoch": 1.3829787234042552,
106
- "grad_norm": 3.28125,
107
  "learning_rate": 6.761835317036523e-05,
108
- "loss": 1.3275,
109
  "step": 65
110
  },
111
  {
112
  "epoch": 1.4893617021276595,
113
- "grad_norm": 3.140625,
114
  "learning_rate": 6.414927028626436e-05,
115
- "loss": 1.2813,
116
  "step": 70
117
  },
118
  {
119
  "epoch": 1.5957446808510638,
120
  "grad_norm": 3.265625,
121
  "learning_rate": 6.033907066526388e-05,
122
- "loss": 1.2849,
123
  "step": 75
124
  },
125
  {
126
  "epoch": 1.702127659574468,
127
- "grad_norm": 2.859375,
128
  "learning_rate": 5.627127245558645e-05,
129
- "loss": 1.2558,
130
  "step": 80
131
  },
132
  {
133
  "epoch": 1.8085106382978724,
134
  "grad_norm": 2.90625,
135
  "learning_rate": 5.203504027001068e-05,
136
- "loss": 1.2672,
137
  "step": 85
138
  },
139
  {
140
  "epoch": 1.9148936170212765,
141
- "grad_norm": 2.875,
142
  "learning_rate": 4.7723230730964036e-05,
143
- "loss": 1.2442,
144
  "step": 90
145
  },
146
  {
147
  "epoch": 2.0,
148
- "eval_loss": 1.4226864576339722,
149
- "eval_runtime": 4.9284,
150
- "eval_samples_per_second": 40.581,
151
- "eval_steps_per_second": 40.581,
152
  "step": 94
153
  }
154
  ],
 
13
  "epoch": 0.10638297872340426,
14
  "grad_norm": 6.875,
15
  "learning_rate": 8.8845e-06,
16
+ "loss": 2.3618,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.2127659574468085,
21
  "grad_norm": 4.96875,
22
  "learning_rate": 1.9990125e-05,
23
+ "loss": 2.1356,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.3191489361702128,
28
+ "grad_norm": 4.71875,
29
  "learning_rate": 3.1095750000000005e-05,
30
+ "loss": 1.9607,
31
  "step": 15
32
  },
33
  {
34
  "epoch": 0.425531914893617,
35
  "grad_norm": 3.9375,
36
  "learning_rate": 4.2201375e-05,
37
+ "loss": 1.8407,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.5319148936170213,
42
  "grad_norm": 3.890625,
43
  "learning_rate": 5.3307e-05,
44
+ "loss": 1.7943,
45
  "step": 25
46
  },
47
  {
48
  "epoch": 0.6382978723404256,
49
+ "grad_norm": 3.921875,
50
  "learning_rate": 6.4412625e-05,
51
+ "loss": 1.7089,
52
  "step": 30
53
  },
54
  {
 
60
  },
61
  {
62
  "epoch": 0.851063829787234,
63
+ "grad_norm": 3.765625,
64
  "learning_rate": 7.753475804658967e-05,
65
+ "loss": 1.6499,
66
  "step": 40
67
  },
68
  {
 
74
  },
75
  {
76
  "epoch": 1.0,
77
+ "eval_loss": 1.5716543197631836,
78
+ "eval_runtime": 5.0246,
79
+ "eval_samples_per_second": 39.804,
80
+ "eval_steps_per_second": 39.804,
81
  "step": 47
82
  },
83
  {
84
  "epoch": 1.0638297872340425,
85
+ "grad_norm": 3.59375,
86
  "learning_rate": 7.526567749442305e-05,
87
+ "loss": 1.4545,
88
  "step": 50
89
  },
90
  {
91
  "epoch": 1.1702127659574468,
92
+ "grad_norm": 3.171875,
93
  "learning_rate": 7.323814868052365e-05,
94
+ "loss": 1.319,
95
  "step": 55
96
  },
97
  {
98
  "epoch": 1.2765957446808511,
99
+ "grad_norm": 3.5,
100
  "learning_rate": 7.067027832041926e-05,
101
+ "loss": 1.3079,
102
  "step": 60
103
  },
104
  {
105
  "epoch": 1.3829787234042552,
106
+ "grad_norm": 3.25,
107
  "learning_rate": 6.761835317036523e-05,
108
+ "loss": 1.3272,
109
  "step": 65
110
  },
111
  {
112
  "epoch": 1.4893617021276595,
113
+ "grad_norm": 3.125,
114
  "learning_rate": 6.414927028626436e-05,
115
+ "loss": 1.2809,
116
  "step": 70
117
  },
118
  {
119
  "epoch": 1.5957446808510638,
120
  "grad_norm": 3.265625,
121
  "learning_rate": 6.033907066526388e-05,
122
+ "loss": 1.2853,
123
  "step": 75
124
  },
125
  {
126
  "epoch": 1.702127659574468,
127
+ "grad_norm": 2.875,
128
  "learning_rate": 5.627127245558645e-05,
129
+ "loss": 1.256,
130
  "step": 80
131
  },
132
  {
133
  "epoch": 1.8085106382978724,
134
  "grad_norm": 2.90625,
135
  "learning_rate": 5.203504027001068e-05,
136
+ "loss": 1.2676,
137
  "step": 85
138
  },
139
  {
140
  "epoch": 1.9148936170212765,
141
+ "grad_norm": 2.890625,
142
  "learning_rate": 4.7723230730964036e-05,
143
+ "loss": 1.2441,
144
  "step": 90
145
  },
146
  {
147
  "epoch": 2.0,
148
+ "eval_loss": 1.4222824573516846,
149
+ "eval_runtime": 4.9323,
150
+ "eval_samples_per_second": 40.549,
151
+ "eval_steps_per_second": 40.549,
152
  "step": 94
153
  }
154
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf331e6fd57c18254efcfbc196d1235b38b32362cd9d58a9b0e6317cde1e5ab3
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c40039de34630cebb38d81b2adb72bd9b6312651546c459581ed3475f42da829
3
  size 5688