ShengdingHu commited on
Commit
78a640d
·
1 Parent(s): 16e45d6

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_accuracy": 0.7472924187725631,
4
- "eval_loss": 0.5988962054252625,
5
- "eval_runtime": 0.3009,
6
  "eval_samples": 277,
7
- "eval_samples_per_second": 920.67,
8
- "eval_steps_per_second": 9.971,
9
- "train_loss": 0.49518577380058093,
10
- "train_runtime": 60.2408,
11
  "train_samples": 2490,
12
- "train_samples_per_second": 413.341,
13
- "train_steps_per_second": 12.948
14
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_accuracy": 0.6750902527075813,
4
+ "eval_loss": 0.6114864945411682,
5
+ "eval_runtime": 0.2285,
6
  "eval_samples": 277,
7
+ "eval_samples_per_second": 1212.012,
8
+ "eval_steps_per_second": 13.126,
9
+ "train_loss": 0.5984805376101763,
10
+ "train_runtime": 94.4914,
11
  "train_samples": 2490,
12
+ "train_samples_per_second": 263.516,
13
+ "train_steps_per_second": 8.255
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_accuracy": 0.7472924187725631,
4
- "eval_loss": 0.5988962054252625,
5
- "eval_runtime": 0.3009,
6
  "eval_samples": 277,
7
- "eval_samples_per_second": 920.67,
8
- "eval_steps_per_second": 9.971
9
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_accuracy": 0.6750902527075813,
4
+ "eval_loss": 0.6114864945411682,
5
+ "eval_runtime": 0.2285,
6
  "eval_samples": 277,
7
+ "eval_samples_per_second": 1212.012,
8
+ "eval_steps_per_second": 13.126
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4168f966e8b785553498a9065d7db43b21cb2e0ade49d284869e1cb05acc60c2
3
- size 2661529
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:479f47801246dd806b3e3b3630da4ccc2ce96a88c03ab6269bda37d649febb33
3
+ size 2587045
runs/Jan28_14-43-03_node4/events.out.tfevents.1643352217.node4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d5adc47703bb4895f9fc48e854ef9cfc954d918c121e7e00d073c747c502733
3
- size 5125
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b6fdbca0254bf4058ea90b07ad2d83444163328c5f706c2c19282dace102361
3
+ size 7251
runs/Jan28_14-43-03_node4/events.out.tfevents.1643352312.node4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45625e5c740369a3268dfcd6176a35f4bb21697cbf945acd2e8fed7f5e10bd1d
3
+ size 363
runs/Jan29_11-35-56_node4/1643427381.1388886/events.out.tfevents.1643427381.node4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c28746d03b06ee0d08bd5afb6d3210df1408bccde6037c252bd139cd9f84deee
3
+ size 4585
runs/Jan29_11-35-56_node4/events.out.tfevents.1643427381.node4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba6e39695b004c5d296fc084eb130fb4c5d7c094f36dda70fc64521443f9cc43
3
+ size 3516
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 10.0,
3
- "train_loss": 0.49518577380058093,
4
- "train_runtime": 60.2408,
5
  "train_samples": 2490,
6
- "train_samples_per_second": 413.341,
7
- "train_steps_per_second": 12.948
8
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "train_loss": 0.5984805376101763,
4
+ "train_runtime": 94.4914,
5
  "train_samples": 2490,
6
+ "train_samples_per_second": 263.516,
7
+ "train_steps_per_second": 8.255
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.7472924187725631,
3
- "best_model_checkpoint": "outputs/lora/roberta-base/v2/rte/checkpoint-702",
4
  "epoch": 10.0,
5
  "global_step": 780,
6
  "is_hyper_param_search": false,
@@ -10,112 +10,112 @@
10
  {
11
  "epoch": 1.0,
12
  "eval_accuracy": 0.4729241877256318,
13
- "eval_loss": 0.700201690196991,
14
- "eval_runtime": 0.2542,
15
- "eval_samples_per_second": 1089.901,
16
- "eval_steps_per_second": 11.804,
17
  "step": 78
18
  },
19
  {
20
  "epoch": 2.0,
21
- "eval_accuracy": 0.6389891696750902,
22
- "eval_loss": 0.6369197964668274,
23
- "eval_runtime": 0.3571,
24
- "eval_samples_per_second": 775.771,
25
- "eval_steps_per_second": 8.402,
26
  "step": 156
27
  },
28
  {
29
  "epoch": 3.0,
30
- "eval_accuracy": 0.6606498194945848,
31
- "eval_loss": 0.6243730783462524,
32
- "eval_runtime": 0.2436,
33
- "eval_samples_per_second": 1137.183,
34
- "eval_steps_per_second": 12.316,
35
  "step": 234
36
  },
37
  {
38
  "epoch": 4.0,
39
- "eval_accuracy": 0.7184115523465704,
40
- "eval_loss": 0.572300136089325,
41
- "eval_runtime": 0.228,
42
- "eval_samples_per_second": 1214.712,
43
- "eval_steps_per_second": 13.156,
44
  "step": 312
45
  },
46
  {
47
  "epoch": 5.0,
48
- "eval_accuracy": 0.7220216606498195,
49
- "eval_loss": 0.5677592754364014,
50
- "eval_runtime": 0.2299,
51
- "eval_samples_per_second": 1204.73,
52
- "eval_steps_per_second": 13.048,
53
  "step": 390
54
  },
55
  {
56
  "epoch": 6.0,
57
- "eval_accuracy": 0.7220216606498195,
58
- "eval_loss": 0.6392128467559814,
59
- "eval_runtime": 0.2306,
60
- "eval_samples_per_second": 1201.256,
61
- "eval_steps_per_second": 13.01,
62
  "step": 468
63
  },
64
  {
65
  "epoch": 6.41,
66
  "learning_rate": 0.00019099590723055935,
67
- "loss": 0.5649,
68
  "step": 500
69
  },
70
  {
71
  "epoch": 7.0,
72
- "eval_accuracy": 0.7364620938628159,
73
- "eval_loss": 0.5661243200302124,
74
- "eval_runtime": 0.3267,
75
- "eval_samples_per_second": 847.854,
76
- "eval_steps_per_second": 9.183,
77
  "step": 546
78
  },
79
  {
80
  "epoch": 8.0,
81
- "eval_accuracy": 0.7328519855595668,
82
- "eval_loss": 0.6633248329162598,
83
- "eval_runtime": 0.228,
84
- "eval_samples_per_second": 1214.709,
85
- "eval_steps_per_second": 13.156,
86
  "step": 624
87
  },
88
  {
89
  "epoch": 9.0,
90
- "eval_accuracy": 0.7472924187725631,
91
- "eval_loss": 0.5988962054252625,
92
- "eval_runtime": 0.2338,
93
- "eval_samples_per_second": 1184.722,
94
- "eval_steps_per_second": 12.831,
95
  "step": 702
96
  },
97
  {
98
  "epoch": 10.0,
99
- "eval_accuracy": 0.7292418772563177,
100
- "eval_loss": 0.6346577405929565,
101
- "eval_runtime": 0.2362,
102
- "eval_samples_per_second": 1172.727,
103
- "eval_steps_per_second": 12.701,
104
  "step": 780
105
  },
106
  {
107
  "epoch": 10.0,
108
  "step": 780,
109
- "total_flos": 1643505980774400.0,
110
- "train_loss": 0.49518577380058093,
111
- "train_runtime": 60.2408,
112
- "train_samples_per_second": 413.341,
113
- "train_steps_per_second": 12.948
114
  }
115
  ],
116
  "max_steps": 780,
117
  "num_train_epochs": 10,
118
- "total_flos": 1643505980774400.0,
119
  "trial_name": null,
120
  "trial_params": null
121
  }
 
1
  {
2
+ "best_metric": 0.6750902527075813,
3
+ "best_model_checkpoint": "outputs/lora/roberta-base/v2/rte/checkpoint-624",
4
  "epoch": 10.0,
5
  "global_step": 780,
6
  "is_hyper_param_search": false,
 
10
  {
11
  "epoch": 1.0,
12
  "eval_accuracy": 0.4729241877256318,
13
+ "eval_loss": 0.70159912109375,
14
+ "eval_runtime": 0.2776,
15
+ "eval_samples_per_second": 997.786,
16
+ "eval_steps_per_second": 10.806,
17
  "step": 78
18
  },
19
  {
20
  "epoch": 2.0,
21
+ "eval_accuracy": 0.5018050541516246,
22
+ "eval_loss": 0.6921780109405518,
23
+ "eval_runtime": 0.2113,
24
+ "eval_samples_per_second": 1310.929,
25
+ "eval_steps_per_second": 14.198,
26
  "step": 156
27
  },
28
  {
29
  "epoch": 3.0,
30
+ "eval_accuracy": 0.5667870036101083,
31
+ "eval_loss": 0.6696296334266663,
32
+ "eval_runtime": 0.2287,
33
+ "eval_samples_per_second": 1211.094,
34
+ "eval_steps_per_second": 13.117,
35
  "step": 234
36
  },
37
  {
38
  "epoch": 4.0,
39
+ "eval_accuracy": 0.631768953068592,
40
+ "eval_loss": 0.643951416015625,
41
+ "eval_runtime": 0.2582,
42
+ "eval_samples_per_second": 1072.841,
43
+ "eval_steps_per_second": 11.619,
44
  "step": 312
45
  },
46
  {
47
  "epoch": 5.0,
48
+ "eval_accuracy": 0.631768953068592,
49
+ "eval_loss": 0.6657952070236206,
50
+ "eval_runtime": 0.2373,
51
+ "eval_samples_per_second": 1167.361,
52
+ "eval_steps_per_second": 12.643,
53
  "step": 390
54
  },
55
  {
56
  "epoch": 6.0,
57
+ "eval_accuracy": 0.6570397111913358,
58
+ "eval_loss": 0.6278102993965149,
59
+ "eval_runtime": 0.2222,
60
+ "eval_samples_per_second": 1246.371,
61
+ "eval_steps_per_second": 13.499,
62
  "step": 468
63
  },
64
  {
65
  "epoch": 6.41,
66
  "learning_rate": 0.00019099590723055935,
67
+ "loss": 0.6432,
68
  "step": 500
69
  },
70
  {
71
  "epoch": 7.0,
72
+ "eval_accuracy": 0.6642599277978339,
73
+ "eval_loss": 0.6154282689094543,
74
+ "eval_runtime": 0.2258,
75
+ "eval_samples_per_second": 1226.508,
76
+ "eval_steps_per_second": 13.283,
77
  "step": 546
78
  },
79
  {
80
  "epoch": 8.0,
81
+ "eval_accuracy": 0.6750902527075813,
82
+ "eval_loss": 0.6114864945411682,
83
+ "eval_runtime": 0.2213,
84
+ "eval_samples_per_second": 1251.904,
85
+ "eval_steps_per_second": 13.559,
86
  "step": 624
87
  },
88
  {
89
  "epoch": 9.0,
90
+ "eval_accuracy": 0.6534296028880866,
91
+ "eval_loss": 0.6326375603675842,
92
+ "eval_runtime": 0.2324,
93
+ "eval_samples_per_second": 1192.091,
94
+ "eval_steps_per_second": 12.911,
95
  "step": 702
96
  },
97
  {
98
  "epoch": 10.0,
99
+ "eval_accuracy": 0.6678700361010831,
100
+ "eval_loss": 0.6225477457046509,
101
+ "eval_runtime": 0.2107,
102
+ "eval_samples_per_second": 1314.871,
103
+ "eval_steps_per_second": 14.24,
104
  "step": 780
105
  },
106
  {
107
  "epoch": 10.0,
108
  "step": 780,
109
+ "total_flos": 1638956800972800.0,
110
+ "train_loss": 0.5984805376101763,
111
+ "train_runtime": 94.4914,
112
+ "train_samples_per_second": 263.516,
113
+ "train_steps_per_second": 8.255
114
  }
115
  ],
116
  "max_steps": 780,
117
  "num_train_epochs": 10,
118
+ "total_flos": 1638956800972800.0,
119
  "trial_name": null,
120
  "trial_params": null
121
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b70dcd2cea804f4ca61036bac5575f24f6125f32a118370cfd426b637ade935
3
  size 2991
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b87fc109746fef165a5437bcaba71950ecac52b6d309d88608d3b828a6f71f9
3
  size 2991