pefanis27 commited on
Commit
c8ec0a0
·
verified ·
1 Parent(s): eab9d53

phi-3.5-new

Browse files
adapter_config.json CHANGED
@@ -23,10 +23,10 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "qkv_proj",
27
- "o_proj",
28
  "gate_up_proj",
29
- "down_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
26
  "gate_up_proj",
27
+ "down_proj",
28
+ "qkv_proj",
29
+ "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8dd3dcd915b2dd761edd13f06737584ebfa1e3321219edc4805c74bef799516
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9530a4f7189fe3a34ddd73af5e9cbbc39d01141ff2a83bc58fc0472af0f0c315
3
  size 100697728
added_tokens.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|assistant|>": 32001,
3
+ "<|endoftext|>": 32000,
4
+ "<|end|>": 32007,
5
+ "<|placeholder1|>": 32002,
6
+ "<|placeholder2|>": 32003,
7
+ "<|placeholder3|>": 32004,
8
+ "<|placeholder4|>": 32005,
9
+ "<|placeholder5|>": 32008,
10
+ "<|placeholder6|>": 32009,
11
+ "<|system|>": 32006,
12
+ "<|user|>": 32010
13
+ }
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_loss": 1.0177021026611328,
4
- "eval_runtime": 6.8483,
5
- "eval_samples_per_second": 2.482,
6
- "eval_steps_per_second": 0.73,
7
- "total_flos": 8689108767160320.0,
8
- "train_loss": 0.9847308204287575,
9
- "train_runtime": 581.6572,
10
- "train_samples_per_second": 0.713,
11
- "train_steps_per_second": 0.181
12
  }
 
1
  {
2
+ "epoch": 9.0,
3
+ "eval_loss": 1.001703143119812,
4
+ "eval_runtime": 16.7881,
5
+ "eval_samples_per_second": 2.561,
6
+ "eval_steps_per_second": 0.655,
7
+ "total_flos": 3.918186203657011e+16,
8
+ "train_loss": 0.8946734860412076,
9
+ "train_runtime": 2550.8567,
10
+ "train_samples_per_second": 2.039,
11
+ "train_steps_per_second": 0.51
12
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_loss": 1.0177021026611328,
4
- "eval_runtime": 6.8483,
5
- "eval_samples_per_second": 2.482,
6
- "eval_steps_per_second": 0.73
7
  }
 
1
  {
2
+ "epoch": 9.0,
3
+ "eval_loss": 1.001703143119812,
4
+ "eval_runtime": 16.7881,
5
+ "eval_samples_per_second": 2.561,
6
+ "eval_steps_per_second": 0.655
7
  }
runs/Jan26_21-17-10_dmlab/events.out.tfevents.1737919031.dmlab.32472.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4aa7cc980bf3d2c1ec9a39ba407ae5db23f5886adae9544030941263afbbefac
3
+ size 12992
runs/Jan26_21-17-10_dmlab/events.out.tfevents.1737921599.dmlab.32472.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9641486726cc361f62562ffe71401e8a945c8b34ea507c6f1b573a808aedde13
3
+ size 359
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 5.0,
3
- "total_flos": 8689108767160320.0,
4
- "train_loss": 0.9847308204287575,
5
- "train_runtime": 581.6572,
6
- "train_samples_per_second": 0.713,
7
- "train_steps_per_second": 0.181
8
  }
 
1
  {
2
+ "epoch": 9.0,
3
+ "total_flos": 3.918186203657011e+16,
4
+ "train_loss": 0.8946734860412076,
5
+ "train_runtime": 2550.8567,
6
+ "train_samples_per_second": 2.039,
7
+ "train_steps_per_second": 0.51
8
  }
trainer_state.json CHANGED
@@ -1,102 +1,162 @@
1
  {
2
- "best_metric": 1.0051764249801636,
3
- "best_model_checkpoint": "/home/labuser/Documents/phi-3/phi-3.5-new/checkpoint-105",
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 105,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 0.7281827330589294,
14
- "learning_rate": 0.0001,
15
- "loss": 1.2517,
16
- "step": 21
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_loss": 1.1211004257202148,
21
- "eval_runtime": 6.7788,
22
- "eval_samples_per_second": 2.508,
23
- "eval_steps_per_second": 0.738,
24
- "step": 21
25
  },
26
  {
27
  "epoch": 2.0,
28
- "grad_norm": 0.5565729737281799,
29
- "learning_rate": 8.535533905932738e-05,
30
- "loss": 0.9912,
31
- "step": 42
32
  },
33
  {
34
  "epoch": 2.0,
35
- "eval_loss": 1.0125099420547485,
36
- "eval_runtime": 6.8252,
37
- "eval_samples_per_second": 2.491,
38
- "eval_steps_per_second": 0.733,
39
- "step": 42
40
  },
41
  {
42
  "epoch": 3.0,
43
- "grad_norm": 0.730018138885498,
44
- "learning_rate": 5e-05,
45
- "loss": 0.9198,
46
- "step": 63
47
  },
48
  {
49
  "epoch": 3.0,
50
- "eval_loss": 1.0076608657836914,
51
- "eval_runtime": 6.788,
52
- "eval_samples_per_second": 2.504,
53
- "eval_steps_per_second": 0.737,
54
- "step": 63
55
  },
56
  {
57
  "epoch": 4.0,
58
- "grad_norm": 0.912876546382904,
59
- "learning_rate": 1.4644660940672627e-05,
60
- "loss": 0.8883,
61
- "step": 84
62
  },
63
  {
64
  "epoch": 4.0,
65
- "eval_loss": 1.0058414936065674,
66
- "eval_runtime": 6.8991,
67
- "eval_samples_per_second": 2.464,
68
- "eval_steps_per_second": 0.725,
69
- "step": 84
70
  },
71
  {
72
  "epoch": 5.0,
73
- "grad_norm": 0.5380117893218994,
74
- "learning_rate": 0.0,
75
- "loss": 0.8727,
76
- "step": 105
77
  },
78
  {
79
  "epoch": 5.0,
80
- "eval_loss": 1.0051764249801636,
81
- "eval_runtime": 6.8394,
82
- "eval_samples_per_second": 2.486,
83
- "eval_steps_per_second": 0.731,
84
- "step": 105
85
  },
86
  {
87
- "epoch": 5.0,
88
- "step": 105,
89
- "total_flos": 8689108767160320.0,
90
- "train_loss": 0.9847308204287575,
91
- "train_runtime": 581.6572,
92
- "train_samples_per_second": 0.713,
93
- "train_steps_per_second": 0.181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  }
95
  ],
96
  "logging_steps": 500,
97
- "max_steps": 105,
98
  "num_input_tokens_seen": 0,
99
- "num_train_epochs": 5,
100
  "save_steps": 500,
101
  "stateful_callbacks": {
102
  "EarlyStoppingCallback": {
@@ -105,7 +165,7 @@
105
  "early_stopping_threshold": 0.0
106
  },
107
  "attributes": {
108
- "early_stopping_patience_counter": 0
109
  }
110
  },
111
  "TrainerControl": {
@@ -119,7 +179,7 @@
119
  "attributes": {}
120
  }
121
  },
122
- "total_flos": 8689108767160320.0,
123
  "train_batch_size": 4,
124
  "trial_name": null,
125
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9914960861206055,
3
+ "best_model_checkpoint": "/home/labuser/Documents/phi-3/phi-3.5-new/checkpoint-312",
4
+ "epoch": 9.0,
5
  "eval_steps": 500,
6
+ "global_step": 468,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.8883442878723145,
14
+ "learning_rate": 2e-05,
15
+ "loss": 1.2693,
16
+ "step": 52
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_loss": 1.1401562690734863,
21
+ "eval_runtime": 16.8795,
22
+ "eval_samples_per_second": 2.547,
23
+ "eval_steps_per_second": 0.652,
24
+ "step": 52
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "grad_norm": 0.8219994902610779,
29
+ "learning_rate": 4e-05,
30
+ "loss": 0.9808,
31
+ "step": 104
32
  },
33
  {
34
  "epoch": 2.0,
35
+ "eval_loss": 1.0025322437286377,
36
+ "eval_runtime": 16.7914,
37
+ "eval_samples_per_second": 2.561,
38
+ "eval_steps_per_second": 0.655,
39
+ "step": 104
40
  },
41
  {
42
  "epoch": 3.0,
43
+ "grad_norm": 0.8449307680130005,
44
+ "learning_rate": 6e-05,
45
+ "loss": 0.895,
46
+ "step": 156
47
  },
48
  {
49
  "epoch": 3.0,
50
+ "eval_loss": 1.0123087167739868,
51
+ "eval_runtime": 16.7952,
52
+ "eval_samples_per_second": 2.56,
53
+ "eval_steps_per_second": 0.655,
54
+ "step": 156
55
  },
56
  {
57
  "epoch": 4.0,
58
+ "grad_norm": 0.622931718826294,
59
+ "learning_rate": 8e-05,
60
+ "loss": 0.8651,
61
+ "step": 208
62
  },
63
  {
64
  "epoch": 4.0,
65
+ "eval_loss": 1.0015382766723633,
66
+ "eval_runtime": 16.795,
67
+ "eval_samples_per_second": 2.56,
68
+ "eval_steps_per_second": 0.655,
69
+ "step": 208
70
  },
71
  {
72
  "epoch": 5.0,
73
+ "grad_norm": 0.6589271426200867,
74
+ "learning_rate": 0.0001,
75
+ "loss": 0.8471,
76
+ "step": 260
77
  },
78
  {
79
  "epoch": 5.0,
80
+ "eval_loss": 1.0026640892028809,
81
+ "eval_runtime": 16.7954,
82
+ "eval_samples_per_second": 2.56,
83
+ "eval_steps_per_second": 0.655,
84
+ "step": 260
85
  },
86
  {
87
+ "epoch": 6.0,
88
+ "grad_norm": 0.7380354404449463,
89
+ "learning_rate": 9.938441702975689e-05,
90
+ "loss": 0.8273,
91
+ "step": 312
92
+ },
93
+ {
94
+ "epoch": 6.0,
95
+ "eval_loss": 0.9914960861206055,
96
+ "eval_runtime": 16.7888,
97
+ "eval_samples_per_second": 2.561,
98
+ "eval_steps_per_second": 0.655,
99
+ "step": 312
100
+ },
101
+ {
102
+ "epoch": 7.0,
103
+ "grad_norm": 0.7154495716094971,
104
+ "learning_rate": 9.755282581475769e-05,
105
+ "loss": 0.8068,
106
+ "step": 364
107
+ },
108
+ {
109
+ "epoch": 7.0,
110
+ "eval_loss": 0.9957849979400635,
111
+ "eval_runtime": 16.7927,
112
+ "eval_samples_per_second": 2.561,
113
+ "eval_steps_per_second": 0.655,
114
+ "step": 364
115
+ },
116
+ {
117
+ "epoch": 8.0,
118
+ "grad_norm": 0.9513155817985535,
119
+ "learning_rate": 9.45503262094184e-05,
120
+ "loss": 0.7883,
121
+ "step": 416
122
+ },
123
+ {
124
+ "epoch": 8.0,
125
+ "eval_loss": 1.0050514936447144,
126
+ "eval_runtime": 16.8033,
127
+ "eval_samples_per_second": 2.559,
128
+ "eval_steps_per_second": 0.655,
129
+ "step": 416
130
+ },
131
+ {
132
+ "epoch": 9.0,
133
+ "grad_norm": 0.9450660347938538,
134
+ "learning_rate": 9.045084971874738e-05,
135
+ "loss": 0.7722,
136
+ "step": 468
137
+ },
138
+ {
139
+ "epoch": 9.0,
140
+ "eval_loss": 1.008681058883667,
141
+ "eval_runtime": 16.7907,
142
+ "eval_samples_per_second": 2.561,
143
+ "eval_steps_per_second": 0.655,
144
+ "step": 468
145
+ },
146
+ {
147
+ "epoch": 9.0,
148
+ "step": 468,
149
+ "total_flos": 3.918186203657011e+16,
150
+ "train_loss": 0.8946734860412076,
151
+ "train_runtime": 2550.8567,
152
+ "train_samples_per_second": 2.039,
153
+ "train_steps_per_second": 0.51
154
  }
155
  ],
156
  "logging_steps": 500,
157
+ "max_steps": 1300,
158
  "num_input_tokens_seen": 0,
159
+ "num_train_epochs": 25,
160
  "save_steps": 500,
161
  "stateful_callbacks": {
162
  "EarlyStoppingCallback": {
 
165
  "early_stopping_threshold": 0.0
166
  },
167
  "attributes": {
168
+ "early_stopping_patience_counter": 3
169
  }
170
  },
171
  "TrainerControl": {
 
179
  "attributes": {}
180
  }
181
  },
182
+ "total_flos": 3.918186203657011e+16,
183
  "train_batch_size": 4,
184
  "trial_name": null,
185
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f13e576b6949e0d2dec213e01d234ec58654620365de37ce95f267570e8fb4ee
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:128eba89cd71392d5ec29707dedc993445621390f2e66243a915aa50897df7e4
3
  size 5624