abdo-Mansour commited on
Commit
2dcc7fe
·
verified ·
1 Parent(s): ed85c44

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f57c160d2b922a54d88f536012a2d24ac47dbef1acb1f8c8d63487761fb4be25
3
  size 161533160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2373407f411bdefcd1c1619d4337dc75c28545aecea714e87ab0192268b19c0
3
  size 161533160
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6238a76200c39daf0fbb9f6649698d4cf14657fb4c51378657eb5dfe22a277e4
3
  size 323298107
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8045b3960c09346b96cc68600f0bdf0804cab1858372d8860af22c20ef37d7a5
3
  size 323298107
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85af476a78ba5ced6159a0a73d339182472e654d41949a6de3c8a75963047363
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fd9ac6b4a5aea74e8d8f24c3e96e1af47905735c7dc53e6854b8615703b8ae3
3
  size 14917
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fac969061fcfa6580058d9dd98ea17f8a5629238d68c550370289b84d4767462
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c86ebf50ac9f9127531a301e6ae85c1c8d9423c1122d72f8eafb77301bb870e2
3
  size 14917
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ab8f7fae8c5bc945ba8d0476887328f81726abcc0550ee4572fa2d3eac0adcb
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09b104d11e23c43fff8ed10992448da9d6b5482113779cdd276876818600dad4
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a3f4b44496cb1177f3335737fdd4d6fd3a0beda7544091b0e32c12d83e6cc56
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b2bd53166d581902513a9666139854e88807312a6b89c8ab8f4eaed17fb5e63
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 100,
3
- "best_metric": 0.344037264585495,
4
- "best_model_checkpoint": "/kaggle/working/Llama-Factory-out/checkpoint-100",
5
- "epoch": 0.2359882005899705,
6
  "eval_steps": 50,
7
- "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -94,6 +94,49 @@
94
  "eval_samples_per_second": 3.139,
95
  "eval_steps_per_second": 0.785,
96
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  }
98
  ],
99
  "logging_steps": 10,
@@ -113,7 +156,7 @@
113
  "attributes": {}
114
  }
115
  },
116
- "total_flos": 2.40043843125248e+16,
117
  "train_batch_size": 2,
118
  "trial_name": null,
119
  "trial_params": null
 
1
  {
2
+ "best_global_step": 150,
3
+ "best_metric": 0.32990705966949463,
4
+ "best_model_checkpoint": "/kaggle/working/Llama-Factory-out/checkpoint-150",
5
+ "epoch": 0.35398230088495575,
6
  "eval_steps": 50,
7
+ "global_step": 150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
94
  "eval_samples_per_second": 3.139,
95
  "eval_steps_per_second": 0.785,
96
  "step": 100
97
+ },
98
+ {
99
+ "epoch": 0.25958702064896755,
100
+ "grad_norm": 3.625964403152466,
101
+ "learning_rate": 1.695853242428357e-05,
102
+ "loss": 0.2473,
103
+ "step": 110
104
+ },
105
+ {
106
+ "epoch": 0.2831858407079646,
107
+ "grad_norm": 1.9131460189819336,
108
+ "learning_rate": 1.6916845061292673e-05,
109
+ "loss": 0.2083,
110
+ "step": 120
111
+ },
112
+ {
113
+ "epoch": 0.30678466076696165,
114
+ "grad_norm": 3.0459189414978027,
115
+ "learning_rate": 1.6860890501703082e-05,
116
+ "loss": 0.2358,
117
+ "step": 130
118
+ },
119
+ {
120
+ "epoch": 0.3303834808259587,
121
+ "grad_norm": 4.418868541717529,
122
+ "learning_rate": 1.6790763592776032e-05,
123
+ "loss": 0.2745,
124
+ "step": 140
125
+ },
126
+ {
127
+ "epoch": 0.35398230088495575,
128
+ "grad_norm": 2.294529438018799,
129
+ "learning_rate": 1.670658320498962e-05,
130
+ "loss": 0.2016,
131
+ "step": 150
132
+ },
133
+ {
134
+ "epoch": 0.35398230088495575,
135
+ "eval_loss": 0.32990705966949463,
136
+ "eval_runtime": 375.8551,
137
+ "eval_samples_per_second": 3.14,
138
+ "eval_steps_per_second": 0.785,
139
+ "step": 150
140
  }
141
  ],
142
  "logging_steps": 10,
 
156
  "attributes": {}
157
  }
158
  },
159
+ "total_flos": 3.620038096243917e+16,
160
  "train_batch_size": 2,
161
  "trial_name": null,
162
  "trial_params": null