abdo-Mansour commited on
Commit
d9be883
·
verified ·
1 Parent(s): 028fc67

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14bf7eebdd663b3ea19c21140d09049a942ca2805a3a516c668495cbc89da99d
3
  size 161533160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7727ddcda81ef4af053f0028ba65de665ef0d03f9b03688d230a65191a2f484c
3
  size 161533160
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa53890b641def633cae2d9fc63c7ef423990e1fb2b0825a66bf6eac654360a9
3
  size 323298107
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a5f4c788147cbf77121ec6171fbd0b2b46c8555425bc8d1c64d8f02c14682c8
3
  size 323298107
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7acfecdfbc302e5cfb683c010fb2d33cce2dba28a902ba94b209ca4f49e592af
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a66c2a768f8209fe849e9eba93fe6dc2a0e7b9b72434220c6173918e2206bde6
3
  size 14917
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ed56eb42e65a7e4c00a66c230edb0d550fdfb3482a6cd447834e719d0dc9b35
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7d06d589659970c843ee8bda33ac65b1103f445a5ebaf1e5d85ca1a884f938f
3
  size 14917
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a40f87368a5d34bd9167ba22f37c5593e2e47132180dfaf5ec6d25e4f0170e1d
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd41ef3a25cc97fee0fbbd5b42d6f6855cae1ed20b73ba10e4ac2591bfa3c9e3
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c88606a1ece45772e81873bd7d0d345f95ef0ba0f0959f30a5f19a783b7b6d5e
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a15e434a307874a0f7324b4bcce16f24e420fcd331b609bbbc1a5591082bc2c
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 100,
3
- "best_metric": 0.7142078876495361,
4
- "best_model_checkpoint": "/kaggle/working/Llama-Factory-out/checkpoint-100",
5
- "epoch": 0.2527646129541864,
6
  "eval_steps": 50,
7
- "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -94,6 +94,49 @@
94
  "eval_samples_per_second": 3.404,
95
  "eval_steps_per_second": 0.852,
96
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  }
98
  ],
99
  "logging_steps": 10,
@@ -113,7 +156,7 @@
113
  "attributes": {}
114
  }
115
  },
116
- "total_flos": 2.2953173215870976e+16,
117
  "train_batch_size": 2,
118
  "trial_name": null,
119
  "trial_params": null
 
1
  {
2
+ "best_global_step": 150,
3
+ "best_metric": 0.6856361031532288,
4
+ "best_model_checkpoint": "/kaggle/working/Llama-Factory-out/checkpoint-150",
5
+ "epoch": 0.3791469194312796,
6
  "eval_steps": 50,
7
+ "global_step": 150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
94
  "eval_samples_per_second": 3.404,
95
  "eval_steps_per_second": 0.852,
96
  "step": 100
97
+ },
98
+ {
99
+ "epoch": 0.27804107424960506,
100
+ "grad_norm": 6.488720893859863,
101
+ "learning_rate": 1.820275277152846e-05,
102
+ "loss": 0.6861,
103
+ "step": 110
104
+ },
105
+ {
106
+ "epoch": 0.3033175355450237,
107
+ "grad_norm": 6.127718448638916,
108
+ "learning_rate": 1.7666743421972986e-05,
109
+ "loss": 0.681,
110
+ "step": 120
111
+ },
112
+ {
113
+ "epoch": 0.3285939968404423,
114
+ "grad_norm": 5.501866340637207,
115
+ "learning_rate": 1.7071067811865477e-05,
116
+ "loss": 0.6915,
117
+ "step": 130
118
+ },
119
+ {
120
+ "epoch": 0.353870458135861,
121
+ "grad_norm": 5.910053730010986,
122
+ "learning_rate": 1.6420361773423205e-05,
123
+ "loss": 0.677,
124
+ "step": 140
125
+ },
126
+ {
127
+ "epoch": 0.3791469194312796,
128
+ "grad_norm": 6.499867916107178,
129
+ "learning_rate": 1.571968941195081e-05,
130
+ "loss": 0.7094,
131
+ "step": 150
132
+ },
133
+ {
134
+ "epoch": 0.3791469194312796,
135
+ "eval_loss": 0.6856361031532288,
136
+ "eval_runtime": 413.5831,
137
+ "eval_samples_per_second": 3.402,
138
+ "eval_steps_per_second": 0.851,
139
+ "step": 150
140
  }
141
  ],
142
  "logging_steps": 10,
 
156
  "attributes": {}
157
  }
158
  },
159
+ "total_flos": 3.4537036362809344e+16,
160
  "train_batch_size": 2,
161
  "trial_name": null,
162
  "trial_params": null