SO84350 commited on
Commit
7203b26
·
1 Parent(s): 325786a

Upload folder using huggingface_hub

Browse files
checkpoint-40/adapter_config.json CHANGED
@@ -33,13 +33,13 @@
33
  "rank_pattern": {},
34
  "revision": null,
35
  "target_modules": [
 
 
36
  "down_proj",
37
  "v_proj",
38
- "up_proj",
39
- "gate_proj",
40
  "k_proj",
41
- "q_proj",
42
- "o_proj"
43
  ],
44
  "target_parameters": null,
45
  "task_type": "CAUSAL_LM",
 
33
  "rank_pattern": {},
34
  "revision": null,
35
  "target_modules": [
36
+ "q_proj",
37
+ "gate_proj",
38
  "down_proj",
39
  "v_proj",
40
+ "o_proj",
 
41
  "k_proj",
42
+ "up_proj"
 
43
  ],
44
  "target_parameters": null,
45
  "task_type": "CAUSAL_LM",
checkpoint-40/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae42738c68ca2f49cdb8eceaeee24eb6786311a7bb2e6ce6b923d9a2c5081ef3
3
  size 528550256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9560e2e50cfadee75690ba80538050731c99673fc654696e56c3ca4a6f88c515
3
  size 528550256
checkpoint-40/trainer_state.json CHANGED
@@ -11,94 +11,94 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.04509582863585118,
14
- "eval_loss": 2.755807876586914,
15
- "eval_runtime": 49.9512,
16
- "eval_samples_per_second": 3.744,
17
- "eval_steps_per_second": 1.882,
18
  "step": 5
19
  },
20
  {
21
  "epoch": 0.09019165727170236,
22
- "grad_norm": 8.082921704044566e-05,
23
  "learning_rate": 3.91304347826087e-06,
24
  "loss": 2.5175,
25
  "step": 10
26
  },
27
  {
28
  "epoch": 0.09019165727170236,
29
- "eval_loss": 2.642366886138916,
30
- "eval_runtime": 48.9639,
31
- "eval_samples_per_second": 3.819,
32
- "eval_steps_per_second": 1.92,
33
  "step": 10
34
  },
35
  {
36
  "epoch": 0.13528748590755355,
37
- "eval_loss": 2.448051691055298,
38
- "eval_runtime": 48.7281,
39
- "eval_samples_per_second": 3.838,
40
- "eval_steps_per_second": 1.929,
41
  "step": 15
42
  },
43
  {
44
  "epoch": 0.18038331454340473,
45
- "grad_norm": 5.596791743300855e-05,
46
  "learning_rate": 8.260869565217392e-06,
47
  "loss": 2.3923,
48
  "step": 20
49
  },
50
  {
51
  "epoch": 0.18038331454340473,
52
- "eval_loss": 2.2489748001098633,
53
- "eval_runtime": 48.9996,
54
- "eval_samples_per_second": 3.816,
55
- "eval_steps_per_second": 1.918,
56
  "step": 20
57
  },
58
  {
59
  "epoch": 0.2254791431792559,
60
- "eval_loss": 2.101285934448242,
61
- "eval_runtime": 48.6925,
62
- "eval_samples_per_second": 3.84,
63
- "eval_steps_per_second": 1.93,
64
  "step": 25
65
  },
66
  {
67
  "epoch": 0.2705749718151071,
68
- "grad_norm": 2.360753387620207e-05,
69
  "learning_rate": 9.97758641300553e-06,
70
- "loss": 1.9697,
71
  "step": 30
72
  },
73
  {
74
  "epoch": 0.2705749718151071,
75
- "eval_loss": 2.018988847732544,
76
- "eval_runtime": 49.2078,
77
- "eval_samples_per_second": 3.8,
78
- "eval_steps_per_second": 1.91,
79
  "step": 30
80
  },
81
  {
82
  "epoch": 0.3156708004509583,
83
- "eval_loss": 1.968351125717163,
84
- "eval_runtime": 48.7611,
85
- "eval_samples_per_second": 3.835,
86
- "eval_steps_per_second": 1.928,
87
  "step": 35
88
  },
89
  {
90
  "epoch": 0.36076662908680945,
91
- "grad_norm": 1.8503900719224475e-05,
92
  "learning_rate": 9.841341526992536e-06,
93
- "loss": 1.9253,
94
  "step": 40
95
  },
96
  {
97
  "epoch": 0.36076662908680945,
98
- "eval_loss": 1.9313749074935913,
99
- "eval_runtime": 48.6426,
100
- "eval_samples_per_second": 3.844,
101
- "eval_steps_per_second": 1.932,
102
  "step": 40
103
  }
104
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0.04509582863585118,
14
+ "eval_loss": 2.755869150161743,
15
+ "eval_runtime": 39.8797,
16
+ "eval_samples_per_second": 4.689,
17
+ "eval_steps_per_second": 2.357,
18
  "step": 5
19
  },
20
  {
21
  "epoch": 0.09019165727170236,
22
+ "grad_norm": 8.087086462182924e-05,
23
  "learning_rate": 3.91304347826087e-06,
24
  "loss": 2.5175,
25
  "step": 10
26
  },
27
  {
28
  "epoch": 0.09019165727170236,
29
+ "eval_loss": 2.6424806118011475,
30
+ "eval_runtime": 39.4795,
31
+ "eval_samples_per_second": 4.737,
32
+ "eval_steps_per_second": 2.381,
33
  "step": 10
34
  },
35
  {
36
  "epoch": 0.13528748590755355,
37
+ "eval_loss": 2.4479880332946777,
38
+ "eval_runtime": 39.6378,
39
+ "eval_samples_per_second": 4.718,
40
+ "eval_steps_per_second": 2.371,
41
  "step": 15
42
  },
43
  {
44
  "epoch": 0.18038331454340473,
45
+ "grad_norm": 5.596490518655628e-05,
46
  "learning_rate": 8.260869565217392e-06,
47
  "loss": 2.3923,
48
  "step": 20
49
  },
50
  {
51
  "epoch": 0.18038331454340473,
52
+ "eval_loss": 2.249032974243164,
53
+ "eval_runtime": 40.0741,
54
+ "eval_samples_per_second": 4.666,
55
+ "eval_steps_per_second": 2.346,
56
  "step": 20
57
  },
58
  {
59
  "epoch": 0.2254791431792559,
60
+ "eval_loss": 2.1012394428253174,
61
+ "eval_runtime": 39.6516,
62
+ "eval_samples_per_second": 4.716,
63
+ "eval_steps_per_second": 2.371,
64
  "step": 25
65
  },
66
  {
67
  "epoch": 0.2705749718151071,
68
+ "grad_norm": 2.367888009757735e-05,
69
  "learning_rate": 9.97758641300553e-06,
70
+ "loss": 1.9696,
71
  "step": 30
72
  },
73
  {
74
  "epoch": 0.2705749718151071,
75
+ "eval_loss": 2.0189175605773926,
76
+ "eval_runtime": 39.4326,
77
+ "eval_samples_per_second": 4.742,
78
+ "eval_steps_per_second": 2.384,
79
  "step": 30
80
  },
81
  {
82
  "epoch": 0.3156708004509583,
83
+ "eval_loss": 1.9682776927947998,
84
+ "eval_runtime": 39.6167,
85
+ "eval_samples_per_second": 4.72,
86
+ "eval_steps_per_second": 2.373,
87
  "step": 35
88
  },
89
  {
90
  "epoch": 0.36076662908680945,
91
+ "grad_norm": 1.8539132724981755e-05,
92
  "learning_rate": 9.841341526992536e-06,
93
+ "loss": 1.9254,
94
  "step": 40
95
  },
96
  {
97
  "epoch": 0.36076662908680945,
98
+ "eval_loss": 1.9314751625061035,
99
+ "eval_runtime": 40.1866,
100
+ "eval_samples_per_second": 4.653,
101
+ "eval_steps_per_second": 2.339,
102
  "step": 40
103
  }
104
  ],
checkpoint-40/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a29b26ce399fed6a1d0492eeac81ed205826ce22d3f0cc648ca6f3a187bd287f
3
  size 5841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b72ba8573ed734a6285768fbde417a7804f6b0e1fd4951476235aa39cc1bc96
3
  size 5841
checkpoint-50/adapter_config.json CHANGED
@@ -33,13 +33,13 @@
33
  "rank_pattern": {},
34
  "revision": null,
35
  "target_modules": [
 
 
36
  "down_proj",
37
  "v_proj",
38
- "up_proj",
39
- "gate_proj",
40
  "k_proj",
41
- "q_proj",
42
- "o_proj"
43
  ],
44
  "target_parameters": null,
45
  "task_type": "CAUSAL_LM",
 
33
  "rank_pattern": {},
34
  "revision": null,
35
  "target_modules": [
36
+ "q_proj",
37
+ "gate_proj",
38
  "down_proj",
39
  "v_proj",
40
+ "o_proj",
 
41
  "k_proj",
42
+ "up_proj"
 
43
  ],
44
  "target_parameters": null,
45
  "task_type": "CAUSAL_LM",
checkpoint-50/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43b441ced103345b13a797a29f58fd6e045555859827771bb4f5c6907dc14668
3
  size 528550256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7cec7abd5fbd48dc990315f94392a9ffcffa5abc279d02ddcd15b13fe159459
3
  size 528550256
checkpoint-50/trainer_state.json CHANGED
@@ -11,117 +11,117 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.04509582863585118,
14
- "eval_loss": 2.755807876586914,
15
- "eval_runtime": 49.9512,
16
- "eval_samples_per_second": 3.744,
17
- "eval_steps_per_second": 1.882,
18
  "step": 5
19
  },
20
  {
21
  "epoch": 0.09019165727170236,
22
- "grad_norm": 8.082921704044566e-05,
23
  "learning_rate": 3.91304347826087e-06,
24
  "loss": 2.5175,
25
  "step": 10
26
  },
27
  {
28
  "epoch": 0.09019165727170236,
29
- "eval_loss": 2.642366886138916,
30
- "eval_runtime": 48.9639,
31
- "eval_samples_per_second": 3.819,
32
- "eval_steps_per_second": 1.92,
33
  "step": 10
34
  },
35
  {
36
  "epoch": 0.13528748590755355,
37
- "eval_loss": 2.448051691055298,
38
- "eval_runtime": 48.7281,
39
- "eval_samples_per_second": 3.838,
40
- "eval_steps_per_second": 1.929,
41
  "step": 15
42
  },
43
  {
44
  "epoch": 0.18038331454340473,
45
- "grad_norm": 5.596791743300855e-05,
46
  "learning_rate": 8.260869565217392e-06,
47
  "loss": 2.3923,
48
  "step": 20
49
  },
50
  {
51
  "epoch": 0.18038331454340473,
52
- "eval_loss": 2.2489748001098633,
53
- "eval_runtime": 48.9996,
54
- "eval_samples_per_second": 3.816,
55
- "eval_steps_per_second": 1.918,
56
  "step": 20
57
  },
58
  {
59
  "epoch": 0.2254791431792559,
60
- "eval_loss": 2.101285934448242,
61
- "eval_runtime": 48.6925,
62
- "eval_samples_per_second": 3.84,
63
- "eval_steps_per_second": 1.93,
64
  "step": 25
65
  },
66
  {
67
  "epoch": 0.2705749718151071,
68
- "grad_norm": 2.360753387620207e-05,
69
  "learning_rate": 9.97758641300553e-06,
70
- "loss": 1.9697,
71
  "step": 30
72
  },
73
  {
74
  "epoch": 0.2705749718151071,
75
- "eval_loss": 2.018988847732544,
76
- "eval_runtime": 49.2078,
77
- "eval_samples_per_second": 3.8,
78
- "eval_steps_per_second": 1.91,
79
  "step": 30
80
  },
81
  {
82
  "epoch": 0.3156708004509583,
83
- "eval_loss": 1.968351125717163,
84
- "eval_runtime": 48.7611,
85
- "eval_samples_per_second": 3.835,
86
- "eval_steps_per_second": 1.928,
87
  "step": 35
88
  },
89
  {
90
  "epoch": 0.36076662908680945,
91
- "grad_norm": 1.8503900719224475e-05,
92
  "learning_rate": 9.841341526992536e-06,
93
- "loss": 1.9253,
94
  "step": 40
95
  },
96
  {
97
  "epoch": 0.36076662908680945,
98
- "eval_loss": 1.9313749074935913,
99
- "eval_runtime": 48.6426,
100
- "eval_samples_per_second": 3.844,
101
- "eval_steps_per_second": 1.932,
102
  "step": 40
103
  },
104
  {
105
  "epoch": 0.40586245772266066,
106
- "eval_loss": 1.9003801345825195,
107
- "eval_runtime": 48.7802,
108
- "eval_samples_per_second": 3.834,
109
- "eval_steps_per_second": 1.927,
110
  "step": 45
111
  },
112
  {
113
  "epoch": 0.4509582863585118,
114
- "grad_norm": 1.6762534869485535e-05,
115
  "learning_rate": 9.584688140963945e-06,
116
- "loss": 1.7795,
117
  "step": 50
118
  },
119
  {
120
  "epoch": 0.4509582863585118,
121
- "eval_loss": 1.872605323791504,
122
- "eval_runtime": 49.0664,
123
- "eval_samples_per_second": 3.811,
124
- "eval_steps_per_second": 1.916,
125
  "step": 50
126
  }
127
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0.04509582863585118,
14
+ "eval_loss": 2.755869150161743,
15
+ "eval_runtime": 39.8797,
16
+ "eval_samples_per_second": 4.689,
17
+ "eval_steps_per_second": 2.357,
18
  "step": 5
19
  },
20
  {
21
  "epoch": 0.09019165727170236,
22
+ "grad_norm": 8.087086462182924e-05,
23
  "learning_rate": 3.91304347826087e-06,
24
  "loss": 2.5175,
25
  "step": 10
26
  },
27
  {
28
  "epoch": 0.09019165727170236,
29
+ "eval_loss": 2.6424806118011475,
30
+ "eval_runtime": 39.4795,
31
+ "eval_samples_per_second": 4.737,
32
+ "eval_steps_per_second": 2.381,
33
  "step": 10
34
  },
35
  {
36
  "epoch": 0.13528748590755355,
37
+ "eval_loss": 2.4479880332946777,
38
+ "eval_runtime": 39.6378,
39
+ "eval_samples_per_second": 4.718,
40
+ "eval_steps_per_second": 2.371,
41
  "step": 15
42
  },
43
  {
44
  "epoch": 0.18038331454340473,
45
+ "grad_norm": 5.596490518655628e-05,
46
  "learning_rate": 8.260869565217392e-06,
47
  "loss": 2.3923,
48
  "step": 20
49
  },
50
  {
51
  "epoch": 0.18038331454340473,
52
+ "eval_loss": 2.249032974243164,
53
+ "eval_runtime": 40.0741,
54
+ "eval_samples_per_second": 4.666,
55
+ "eval_steps_per_second": 2.346,
56
  "step": 20
57
  },
58
  {
59
  "epoch": 0.2254791431792559,
60
+ "eval_loss": 2.1012394428253174,
61
+ "eval_runtime": 39.6516,
62
+ "eval_samples_per_second": 4.716,
63
+ "eval_steps_per_second": 2.371,
64
  "step": 25
65
  },
66
  {
67
  "epoch": 0.2705749718151071,
68
+ "grad_norm": 2.367888009757735e-05,
69
  "learning_rate": 9.97758641300553e-06,
70
+ "loss": 1.9696,
71
  "step": 30
72
  },
73
  {
74
  "epoch": 0.2705749718151071,
75
+ "eval_loss": 2.0189175605773926,
76
+ "eval_runtime": 39.4326,
77
+ "eval_samples_per_second": 4.742,
78
+ "eval_steps_per_second": 2.384,
79
  "step": 30
80
  },
81
  {
82
  "epoch": 0.3156708004509583,
83
+ "eval_loss": 1.9682776927947998,
84
+ "eval_runtime": 39.6167,
85
+ "eval_samples_per_second": 4.72,
86
+ "eval_steps_per_second": 2.373,
87
  "step": 35
88
  },
89
  {
90
  "epoch": 0.36076662908680945,
91
+ "grad_norm": 1.8539132724981755e-05,
92
  "learning_rate": 9.841341526992536e-06,
93
+ "loss": 1.9254,
94
  "step": 40
95
  },
96
  {
97
  "epoch": 0.36076662908680945,
98
+ "eval_loss": 1.9314751625061035,
99
+ "eval_runtime": 40.1866,
100
+ "eval_samples_per_second": 4.653,
101
+ "eval_steps_per_second": 2.339,
102
  "step": 40
103
  },
104
  {
105
  "epoch": 0.40586245772266066,
106
+ "eval_loss": 1.9004476070404053,
107
+ "eval_runtime": 39.7173,
108
+ "eval_samples_per_second": 4.708,
109
+ "eval_steps_per_second": 2.367,
110
  "step": 45
111
  },
112
  {
113
  "epoch": 0.4509582863585118,
114
+ "grad_norm": 1.6754116586525925e-05,
115
  "learning_rate": 9.584688140963945e-06,
116
+ "loss": 1.7796,
117
  "step": 50
118
  },
119
  {
120
  "epoch": 0.4509582863585118,
121
+ "eval_loss": 1.8727240562438965,
122
+ "eval_runtime": 39.3767,
123
+ "eval_samples_per_second": 4.749,
124
+ "eval_steps_per_second": 2.387,
125
  "step": 50
126
  }
127
  ],
checkpoint-50/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a29b26ce399fed6a1d0492eeac81ed205826ce22d3f0cc648ca6f3a187bd287f
3
  size 5841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b72ba8573ed734a6285768fbde417a7804f6b0e1fd4951476235aa39cc1bc96
3
  size 5841