danield12 commited on
Commit
16af61e
·
verified ·
1 Parent(s): 8ec028c

Upload folder using huggingface_hub

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93aa0cf7d99ad26d47d8643e22d1ffc2cdf84ec975fa62bf6a3744af6d78bffa
3
  size 160086542
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b349cc388254688729bb2bee539e11e35048cffe1a215edda714ab0e5e576f87
3
  size 160086542
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
  "epoch": 0.99,
3
- "eval_accuracy": 0.84,
4
- "eval_f1_score": 0.8423376623376623,
5
- "eval_gmean": 0.8605301007689224,
6
- "eval_loss": 0.3891015648841858,
7
- "eval_precision": 0.8687999999999999,
8
- "eval_recall": 0.84,
9
- "eval_runtime": 156.3274,
10
- "eval_samples_per_second": 0.32,
11
- "eval_steps_per_second": 0.045,
12
- "total_flos": 1.171238676922368e+17,
13
- "train_loss": 0.9641927083333334,
14
- "train_runtime": 18930.2986,
15
- "train_samples_per_second": 0.317,
16
  "train_steps_per_second": 0.005
17
  }
 
1
  {
2
  "epoch": 0.99,
3
+ "eval_accuracy": 0.8,
4
+ "eval_f1_score": 0.7993558776167471,
5
+ "eval_gmean": 0.7980746061134115,
6
+ "eval_loss": 0.5084765553474426,
7
+ "eval_precision": 0.8012987012987013,
8
+ "eval_recall": 0.8,
9
+ "eval_runtime": 158.4717,
10
+ "eval_samples_per_second": 0.316,
11
+ "eval_steps_per_second": 0.044,
12
+ "total_flos": 1.1914693337677824e+17,
13
+ "train_loss": 0.3557273290490591,
14
+ "train_runtime": 19662.2913,
15
+ "train_samples_per_second": 0.305,
16
  "train_steps_per_second": 0.005
17
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 0.99,
3
- "eval_accuracy": 0.84,
4
- "eval_f1_score": 0.8423376623376623,
5
- "eval_gmean": 0.8605301007689224,
6
- "eval_loss": 0.3891015648841858,
7
- "eval_precision": 0.8687999999999999,
8
- "eval_recall": 0.84,
9
- "eval_runtime": 156.3274,
10
- "eval_samples_per_second": 0.32,
11
- "eval_steps_per_second": 0.045
12
  }
 
1
  {
2
  "epoch": 0.99,
3
+ "eval_accuracy": 0.8,
4
+ "eval_f1_score": 0.7993558776167471,
5
+ "eval_gmean": 0.7980746061134115,
6
+ "eval_loss": 0.5084765553474426,
7
+ "eval_precision": 0.8012987012987013,
8
+ "eval_recall": 0.8,
9
+ "eval_runtime": 158.4717,
10
+ "eval_samples_per_second": 0.316,
11
+ "eval_steps_per_second": 0.044
12
  }
runs/Mar31_12-39-58_d80bcfba016b/events.out.tfevents.1711888801.d80bcfba016b.34.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfbe93418dd3800a1f694f88819ccee31229f668fa3f35739f4230494fea5d65
3
+ size 9095
runs/Mar31_12-39-58_d80bcfba016b/events.out.tfevents.1711908621.d80bcfba016b.34.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a38e13b6604aa68e759fc23bc13c7468123e29500549b8f71f9312b10117fa95
3
+ size 605
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.99,
3
- "total_flos": 1.171238676922368e+17,
4
- "train_loss": 0.9641927083333334,
5
- "train_runtime": 18930.2986,
6
- "train_samples_per_second": 0.317,
7
  "train_steps_per_second": 0.005
8
  }
 
1
  {
2
  "epoch": 0.99,
3
+ "total_flos": 1.1914693337677824e+17,
4
+ "train_loss": 0.3557273290490591,
5
+ "train_runtime": 19662.2913,
6
+ "train_samples_per_second": 0.305,
7
  "train_steps_per_second": 0.005
8
  }
trainer_state.json CHANGED
@@ -10,127 +10,127 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.09,
13
- "grad_norm": 35.25,
14
  "learning_rate": 1.8681318681318682e-05,
15
- "loss": 1.1454,
16
  "step": 8
17
  },
18
  {
19
  "epoch": 0.17,
20
- "grad_norm": 36.5,
21
  "learning_rate": 1.6923076923076924e-05,
22
- "loss": 1.1827,
23
  "step": 16
24
  },
25
  {
26
  "epoch": 0.26,
27
- "grad_norm": 34.0,
28
  "learning_rate": 1.5164835164835166e-05,
29
- "loss": 1.1057,
30
  "step": 24
31
  },
32
  {
33
  "epoch": 0.3,
34
- "eval_accuracy": 0.82,
35
- "eval_f1_score": 0.8225454545454545,
36
- "eval_gmean": 0.844292130744656,
37
- "eval_loss": 0.4087499976158142,
38
- "eval_precision": 0.8572435897435898,
39
- "eval_recall": 0.82,
40
- "eval_runtime": 156.306,
41
- "eval_samples_per_second": 0.32,
42
- "eval_steps_per_second": 0.045,
43
  "step": 28
44
  },
45
  {
46
  "epoch": 0.34,
47
- "grad_norm": 30.75,
48
  "learning_rate": 1.3406593406593406e-05,
49
- "loss": 0.9439,
50
  "step": 32
51
  },
52
  {
53
  "epoch": 0.43,
54
- "grad_norm": 35.25,
55
  "learning_rate": 1.164835164835165e-05,
56
- "loss": 0.9382,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.51,
61
- "grad_norm": 35.25,
62
  "learning_rate": 9.890109890109892e-06,
63
- "loss": 0.9136,
64
  "step": 48
65
  },
66
  {
67
  "epoch": 0.6,
68
- "grad_norm": 35.75,
69
  "learning_rate": 8.131868131868132e-06,
70
- "loss": 0.8892,
71
  "step": 56
72
  },
73
  {
74
  "epoch": 0.6,
75
- "eval_accuracy": 0.84,
76
- "eval_f1_score": 0.8423376623376623,
77
- "eval_gmean": 0.8605301007689224,
78
- "eval_loss": 0.39445313811302185,
79
- "eval_precision": 0.8687999999999999,
80
- "eval_recall": 0.84,
81
- "eval_runtime": 156.2149,
82
- "eval_samples_per_second": 0.32,
83
- "eval_steps_per_second": 0.045,
84
  "step": 56
85
  },
86
  {
87
  "epoch": 0.68,
88
- "grad_norm": 25.0,
89
  "learning_rate": 6.373626373626373e-06,
90
- "loss": 0.9037,
91
  "step": 64
92
  },
93
  {
94
  "epoch": 0.77,
95
- "grad_norm": 19.25,
96
  "learning_rate": 4.615384615384616e-06,
97
- "loss": 0.8933,
98
  "step": 72
99
  },
100
  {
101
  "epoch": 0.85,
102
- "grad_norm": 21.0,
103
  "learning_rate": 2.8571428571428573e-06,
104
- "loss": 0.8473,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.9,
109
- "eval_accuracy": 0.84,
110
- "eval_f1_score": 0.8423376623376623,
111
- "eval_gmean": 0.8605301007689224,
112
- "eval_loss": 0.3901953101158142,
113
- "eval_precision": 0.8687999999999999,
114
- "eval_recall": 0.84,
115
- "eval_runtime": 155.9323,
116
- "eval_samples_per_second": 0.321,
117
- "eval_steps_per_second": 0.045,
118
  "step": 84
119
  },
120
  {
121
  "epoch": 0.94,
122
- "grad_norm": 32.5,
123
  "learning_rate": 1.098901098901099e-06,
124
- "loss": 0.8776,
125
  "step": 88
126
  },
127
  {
128
  "epoch": 0.99,
129
  "step": 93,
130
- "total_flos": 1.171238676922368e+17,
131
- "train_loss": 0.9641927083333334,
132
- "train_runtime": 18930.2986,
133
- "train_samples_per_second": 0.317,
134
  "train_steps_per_second": 0.005
135
  }
136
  ],
@@ -139,7 +139,7 @@
139
  "num_input_tokens_seen": 0,
140
  "num_train_epochs": 1,
141
  "save_steps": 500,
142
- "total_flos": 1.171238676922368e+17,
143
  "train_batch_size": 16,
144
  "trial_name": null,
145
  "trial_params": null
 
10
  "log_history": [
11
  {
12
  "epoch": 0.09,
13
+ "grad_norm": 6.0,
14
  "learning_rate": 1.8681318681318682e-05,
15
+ "loss": 0.266,
16
  "step": 8
17
  },
18
  {
19
  "epoch": 0.17,
20
+ "grad_norm": 8.75,
21
  "learning_rate": 1.6923076923076924e-05,
22
+ "loss": 0.404,
23
  "step": 16
24
  },
25
  {
26
  "epoch": 0.26,
27
+ "grad_norm": 7.90625,
28
  "learning_rate": 1.5164835164835166e-05,
29
+ "loss": 0.4084,
30
  "step": 24
31
  },
32
  {
33
  "epoch": 0.3,
34
+ "eval_accuracy": 0.8,
35
+ "eval_f1_score": 0.7993558776167471,
36
+ "eval_gmean": 0.7980746061134115,
37
+ "eval_loss": 0.5118359327316284,
38
+ "eval_precision": 0.8012987012987013,
39
+ "eval_recall": 0.8,
40
+ "eval_runtime": 158.1221,
41
+ "eval_samples_per_second": 0.316,
42
+ "eval_steps_per_second": 0.044,
43
  "step": 28
44
  },
45
  {
46
  "epoch": 0.34,
47
+ "grad_norm": 6.84375,
48
  "learning_rate": 1.3406593406593406e-05,
49
+ "loss": 0.3697,
50
  "step": 32
51
  },
52
  {
53
  "epoch": 0.43,
54
+ "grad_norm": 5.9375,
55
  "learning_rate": 1.164835164835165e-05,
56
+ "loss": 0.3647,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.51,
61
+ "grad_norm": 7.1875,
62
  "learning_rate": 9.890109890109892e-06,
63
+ "loss": 0.3738,
64
  "step": 48
65
  },
66
  {
67
  "epoch": 0.6,
68
+ "grad_norm": 6.46875,
69
  "learning_rate": 8.131868131868132e-06,
70
+ "loss": 0.3298,
71
  "step": 56
72
  },
73
  {
74
  "epoch": 0.6,
75
+ "eval_accuracy": 0.8,
76
+ "eval_f1_score": 0.7993558776167471,
77
+ "eval_gmean": 0.7980746061134115,
78
+ "eval_loss": 0.5087890625,
79
+ "eval_precision": 0.8012987012987013,
80
+ "eval_recall": 0.8,
81
+ "eval_runtime": 158.641,
82
+ "eval_samples_per_second": 0.315,
83
+ "eval_steps_per_second": 0.044,
84
  "step": 56
85
  },
86
  {
87
  "epoch": 0.68,
88
+ "grad_norm": 6.75,
89
  "learning_rate": 6.373626373626373e-06,
90
+ "loss": 0.333,
91
  "step": 64
92
  },
93
  {
94
  "epoch": 0.77,
95
+ "grad_norm": 7.3125,
96
  "learning_rate": 4.615384615384616e-06,
97
+ "loss": 0.3813,
98
  "step": 72
99
  },
100
  {
101
  "epoch": 0.85,
102
+ "grad_norm": 6.5,
103
  "learning_rate": 2.8571428571428573e-06,
104
+ "loss": 0.3403,
105
  "step": 80
106
  },
107
  {
108
  "epoch": 0.9,
109
+ "eval_accuracy": 0.8,
110
+ "eval_f1_score": 0.7993558776167471,
111
+ "eval_gmean": 0.7980746061134115,
112
+ "eval_loss": 0.5084765553474426,
113
+ "eval_precision": 0.8012987012987013,
114
+ "eval_recall": 0.8,
115
+ "eval_runtime": 158.2765,
116
+ "eval_samples_per_second": 0.316,
117
+ "eval_steps_per_second": 0.044,
118
  "step": 84
119
  },
120
  {
121
  "epoch": 0.94,
122
+ "grad_norm": 6.84375,
123
  "learning_rate": 1.098901098901099e-06,
124
+ "loss": 0.3473,
125
  "step": 88
126
  },
127
  {
128
  "epoch": 0.99,
129
  "step": 93,
130
+ "total_flos": 1.1914693337677824e+17,
131
+ "train_loss": 0.3557273290490591,
132
+ "train_runtime": 19662.2913,
133
+ "train_samples_per_second": 0.305,
134
  "train_steps_per_second": 0.005
135
  }
136
  ],
 
139
  "num_input_tokens_seen": 0,
140
  "num_train_epochs": 1,
141
  "save_steps": 500,
142
+ "total_flos": 1.1914693337677824e+17,
143
  "train_batch_size": 16,
144
  "trial_name": null,
145
  "trial_params": null