bimabk commited on
Commit
e7035f6
·
verified ·
1 Parent(s): 27037a9

Upload task output 1

Browse files
adapter_config.json CHANGED
@@ -30,13 +30,13 @@
30
  "rank_pattern": {},
31
  "revision": null,
32
  "target_modules": [
33
- "down_proj",
34
- "up_proj",
35
- "o_proj",
36
  "q_proj",
 
 
37
  "v_proj",
 
38
  "gate_proj",
39
- "k_proj"
40
  ],
41
  "target_parameters": null,
42
  "task_type": "CAUSAL_LM",
 
30
  "rank_pattern": {},
31
  "revision": null,
32
  "target_modules": [
 
 
 
33
  "q_proj",
34
+ "k_proj",
35
+ "down_proj",
36
  "v_proj",
37
+ "up_proj",
38
  "gate_proj",
39
+ "o_proj"
40
  ],
41
  "target_parameters": null,
42
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18b3cfd1956f8f81b7d944a1ceac17b75dd1164b4d5535b407d50a8f31636c6a
3
  size 957942768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e162c64c34c562057fc3e8070abf86b086e58c5b39b382aad71e2ba9f36a793
3
  size 957942768
loss.txt CHANGED
@@ -1 +1 @@
1
- 1,no_eval
 
1
+ 4,-0.010877630673348904
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.25,
6
  "eval_steps": 500,
7
- "global_step": 1,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -25,7 +25,7 @@
25
  "entropy": 0.5559625327587128,
26
  "epoch": 0.25,
27
  "frac_reward_zero_std": 0.0714285746216774,
28
- "grad_norm": 0.3674161732196808,
29
  "learning_rate": 0.0,
30
  "loss": -0.0119,
31
  "num_tokens": 217107.0,
@@ -39,12 +39,140 @@
39
  "sampling/sampling_logp_difference/max": 1.4227659702301025,
40
  "sampling/sampling_logp_difference/mean": 0.031906239688396454,
41
  "step": 1,
42
- "step_time": 1646.065230104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  }
44
  ],
45
  "logging_steps": 1.0,
46
  "max_steps": 8,
47
- "num_input_tokens_seen": 217107,
48
  "num_train_epochs": 2,
49
  "save_steps": 500,
50
  "stateful_callbacks": {
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
  "eval_steps": 500,
7
+ "global_step": 4,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
25
  "entropy": 0.5559625327587128,
26
  "epoch": 0.25,
27
  "frac_reward_zero_std": 0.0714285746216774,
28
+ "grad_norm": 0.3681248426437378,
29
  "learning_rate": 0.0,
30
  "loss": -0.0119,
31
  "num_tokens": 217107.0,
 
39
  "sampling/sampling_logp_difference/max": 1.4227659702301025,
40
  "sampling/sampling_logp_difference/mean": 0.031906239688396454,
41
  "step": 1,
42
+ "step_time": 1695.3378897999974
43
+ },
44
+ {
45
+ "clip_ratio/high_max": 0.0,
46
+ "clip_ratio/high_mean": 0.0,
47
+ "clip_ratio/low_mean": 0.0,
48
+ "clip_ratio/low_min": 0.0,
49
+ "clip_ratio/region_mean": 0.0,
50
+ "completions/clipped_ratio": 0.0,
51
+ "completions/max_length": 87.0,
52
+ "completions/max_terminated_length": 87.0,
53
+ "completions/mean_length": 50.145835876464844,
54
+ "completions/mean_terminated_length": 50.145835876464844,
55
+ "completions/min_length": 31.0,
56
+ "completions/min_terminated_length": 31.0,
57
+ "entropy": 0.5740033835172653,
58
+ "epoch": 0.5,
59
+ "frac_reward_zero_std": 0.0714285746216774,
60
+ "grad_norm": 0.44471269845962524,
61
+ "learning_rate": 2.84304e-07,
62
+ "loss": -0.019,
63
+ "num_tokens": 458068.0,
64
+ "reward": 0.2202083021402359,
65
+ "reward_std": 0.4740038514137268,
66
+ "rewards/env_alfworld_reward/mean": 0.2202083021402359,
67
+ "rewards/env_alfworld_reward/std": 0.4740039110183716,
68
+ "sampling/importance_sampling_ratio/max": 2.9810025691986084,
69
+ "sampling/importance_sampling_ratio/mean": 1.0009801387786865,
70
+ "sampling/importance_sampling_ratio/min": 0.0,
71
+ "sampling/sampling_logp_difference/max": 4.155694007873535,
72
+ "sampling/sampling_logp_difference/mean": 0.032881855964660645,
73
+ "step": 2,
74
+ "step_time": 1649.8384637760028
75
+ },
76
+ {
77
+ "clip_ratio/high_max": 0.007568976026959717,
78
+ "clip_ratio/high_mean": 0.006711035966873169,
79
+ "clip_ratio/low_mean": 0.003108485194388777,
80
+ "clip_ratio/low_min": 0.0022171867603901774,
81
+ "clip_ratio/region_mean": 0.009819521103054285,
82
+ "completions/clipped_ratio": 0.0,
83
+ "completions/max_length": 104.0,
84
+ "completions/max_terminated_length": 104.0,
85
+ "completions/mean_length": 50.770835876464844,
86
+ "completions/mean_terminated_length": 50.770835876464844,
87
+ "completions/min_length": 25.0,
88
+ "completions/min_terminated_length": 25.0,
89
+ "entropy": 0.6086474657058716,
90
+ "epoch": 0.75,
91
+ "frac_reward_zero_std": 0.095238097012043,
92
+ "grad_norm": 0.35934266448020935,
93
+ "learning_rate": 5.68608e-07,
94
+ "loss": -0.0351,
95
+ "num_tokens": 693527.0,
96
+ "reward": 0.33044642210006714,
97
+ "reward_std": 0.5141857266426086,
98
+ "rewards/env_alfworld_reward/mean": 0.33044642210006714,
99
+ "rewards/env_alfworld_reward/std": 0.5141857266426086,
100
+ "sampling/importance_sampling_ratio/max": 2.783240795135498,
101
+ "sampling/importance_sampling_ratio/mean": 0.9537404775619507,
102
+ "sampling/importance_sampling_ratio/min": 0.0,
103
+ "sampling/sampling_logp_difference/max": 1.1146756410598755,
104
+ "sampling/sampling_logp_difference/mean": 0.03519035875797272,
105
+ "step": 3,
106
+ "step_time": 1672.7712960310018
107
+ },
108
+ {
109
+ "clip_ratio/high_max": 0.007020744145847857,
110
+ "clip_ratio/high_mean": 0.006485109799541533,
111
+ "clip_ratio/low_mean": 0.004376257362309843,
112
+ "clip_ratio/low_min": 0.003922754956874996,
113
+ "clip_ratio/region_mean": 0.010861367220059037,
114
+ "completions/clipped_ratio": 0.0,
115
+ "completions/max_length": 95.0,
116
+ "completions/max_terminated_length": 95.0,
117
+ "completions/mean_length": 51.60714340209961,
118
+ "completions/mean_terminated_length": 51.60714340209961,
119
+ "completions/min_length": 28.0,
120
+ "completions/min_terminated_length": 28.0,
121
+ "entropy": 0.5597293674945831,
122
+ "epoch": 1.0,
123
+ "frac_reward_zero_std": 0.0535714291036129,
124
+ "grad_norm": 0.409810870885849,
125
+ "learning_rate": 8.529119999999999e-07,
126
+ "loss": 0.0229,
127
+ "num_tokens": 942371.0,
128
+ "reward": 0.03812500089406967,
129
+ "reward_std": 0.29200857877731323,
130
+ "rewards/env_alfworld_reward/mean": 0.03812500089406967,
131
+ "rewards/env_alfworld_reward/std": 0.2920086085796356,
132
+ "sampling/importance_sampling_ratio/max": 2.8310158252716064,
133
+ "sampling/importance_sampling_ratio/mean": 0.9631035327911377,
134
+ "sampling/importance_sampling_ratio/min": 0.0,
135
+ "sampling/sampling_logp_difference/max": 1.3009870052337646,
136
+ "sampling/sampling_logp_difference/mean": 0.03364055976271629,
137
+ "step": 4,
138
+ "step_time": 1871.100103906003
139
+ },
140
+ {
141
+ "epoch": 1.0,
142
+ "eval_clip_ratio/high_max": 0.0,
143
+ "eval_clip_ratio/high_mean": 0.0,
144
+ "eval_clip_ratio/low_mean": 0.0,
145
+ "eval_clip_ratio/low_min": 0.0,
146
+ "eval_clip_ratio/region_mean": 0.0,
147
+ "eval_completions/clipped_ratio": 0.0,
148
+ "eval_completions/max_length": 68.96,
149
+ "eval_completions/max_terminated_length": 68.96,
150
+ "eval_completions/mean_length": 51.315,
151
+ "eval_completions/mean_terminated_length": 51.315,
152
+ "eval_completions/min_length": 37.8,
153
+ "eval_completions/min_terminated_length": 37.8,
154
+ "eval_entropy": 0.6129815554618836,
155
+ "eval_frac_reward_zero_std": 0.09,
156
+ "eval_loss": -0.010877630673348904,
157
+ "eval_num_tokens": 942371.0,
158
+ "eval_reward": 0.19167500026524067,
159
+ "eval_reward_std": 0.38457470636814833,
160
+ "eval_rewards/env_alfworld_reward/mean": 0.19167500026524067,
161
+ "eval_rewards/env_alfworld_reward/std": 0.38457471802830695,
162
+ "eval_runtime": 2099.7568,
163
+ "eval_samples_per_second": 0.095,
164
+ "eval_sampling/importance_sampling_ratio/max": 1.5984654092788697,
165
+ "eval_sampling/importance_sampling_ratio/mean": 0.8831864225864411,
166
+ "eval_sampling/importance_sampling_ratio/min": 0.3859718208014965,
167
+ "eval_sampling/sampling_logp_difference/max": 0.6316441667079925,
168
+ "eval_sampling/sampling_logp_difference/mean": 0.03440669998526573,
169
+ "eval_steps_per_second": 0.012,
170
+ "step": 4
171
  }
172
  ],
173
  "logging_steps": 1.0,
174
  "max_steps": 8,
175
+ "num_input_tokens_seen": 942371,
176
  "num_train_epochs": 2,
177
  "save_steps": 500,
178
  "stateful_callbacks": {
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:941547e754edb27d424da371bb915059d4b43f7da529e95b79628779b4d5d17e
3
  size 7825
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ed4e467527db6458333d49b7f960c0fc9f43d07312ed047c641931255b615a8
3
  size 7825