Training in progress, step 5000
Browse files- model.safetensors +1 -1
- trainer_log.jsonl +28 -0
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 49958912
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20447b7d0a3533af3cc654e7034ac3ae236837fa98ba96a5fdb162f34707afb5
|
| 3 |
size 49958912
|
trainer_log.jsonl
CHANGED
|
@@ -127,3 +127,31 @@
|
|
| 127 |
{"current_steps": 4064, "total_steps": 5000, "loss": 3.6312, "eval_loss": null, "predict_loss": null, "learning_rate": 5.454978814762181e-05, "epoch": 0.23131652342193637, "percentage": 81.28}
|
| 128 |
{"current_steps": 4096, "total_steps": 5000, "loss": 3.6917, "eval_loss": null, "predict_loss": null, "learning_rate": 5.29483415121254e-05, "epoch": 0.2331379133701406, "percentage": 81.92}
|
| 129 |
{"current_steps": 4128, "total_steps": 5000, "loss": 3.6064, "eval_loss": null, "predict_loss": null, "learning_rate": 5.139604443738368e-05, "epoch": 0.2349593033183448, "percentage": 82.56}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
{"current_steps": 4064, "total_steps": 5000, "loss": 3.6312, "eval_loss": null, "predict_loss": null, "learning_rate": 5.454978814762181e-05, "epoch": 0.23131652342193637, "percentage": 81.28}
|
| 128 |
{"current_steps": 4096, "total_steps": 5000, "loss": 3.6917, "eval_loss": null, "predict_loss": null, "learning_rate": 5.29483415121254e-05, "epoch": 0.2331379133701406, "percentage": 81.92}
|
| 129 |
{"current_steps": 4128, "total_steps": 5000, "loss": 3.6064, "eval_loss": null, "predict_loss": null, "learning_rate": 5.139604443738368e-05, "epoch": 0.2349593033183448, "percentage": 82.56}
|
| 130 |
+
{"current_steps": 4160, "total_steps": 5000, "loss": 3.7736, "eval_loss": null, "predict_loss": null, "learning_rate": 4.9893577812197555e-05, "epoch": 0.23678069326654905, "percentage": 83.2}
|
| 131 |
+
{"current_steps": 4192, "total_steps": 5000, "loss": 3.7065, "eval_loss": null, "predict_loss": null, "learning_rate": 4.8441600668085826e-05, "epoch": 0.23860208321475326, "percentage": 83.84}
|
| 132 |
+
{"current_steps": 4224, "total_steps": 5000, "loss": 3.6369, "eval_loss": null, "predict_loss": null, "learning_rate": 4.704074989021219e-05, "epoch": 0.24042347316295748, "percentage": 84.48}
|
| 133 |
+
{"current_steps": 4256, "total_steps": 5000, "loss": 3.6924, "eval_loss": null, "predict_loss": null, "learning_rate": 4.569163993802638e-05, "epoch": 0.2422448631111617, "percentage": 85.12}
|
| 134 |
+
{"current_steps": 4288, "total_steps": 5000, "loss": 3.6998, "eval_loss": null, "predict_loss": null, "learning_rate": 4.439486257574193e-05, "epoch": 0.24406625305936594, "percentage": 85.76}
|
| 135 |
+
{"current_steps": 4320, "total_steps": 5000, "loss": 3.6369, "eval_loss": null, "predict_loss": null, "learning_rate": 4.315098661276884e-05, "epoch": 0.24588764300757016, "percentage": 86.4}
|
| 136 |
+
{"current_steps": 4352, "total_steps": 5000, "loss": 3.615, "eval_loss": null, "predict_loss": null, "learning_rate": 4.1960557654214884e-05, "epoch": 0.24770903295577437, "percentage": 87.04}
|
| 137 |
+
{"current_steps": 4384, "total_steps": 5000, "loss": 3.6617, "eval_loss": null, "predict_loss": null, "learning_rate": 4.082409786156541e-05, "epoch": 0.2495304229039786, "percentage": 87.68}
|
| 138 |
+
{"current_steps": 4416, "total_steps": 5000, "loss": 3.5658, "eval_loss": null, "predict_loss": null, "learning_rate": 3.9742105723645814e-05, "epoch": 0.25135181285218283, "percentage": 88.32}
|
| 139 |
+
{"current_steps": 4448, "total_steps": 5000, "loss": 3.579, "eval_loss": null, "predict_loss": null, "learning_rate": 3.871505583796792e-05, "epoch": 0.253173202800387, "percentage": 88.96}
|
| 140 |
+
{"current_steps": 4480, "total_steps": 5000, "loss": 3.6225, "eval_loss": null, "predict_loss": null, "learning_rate": 3.774339870255591e-05, "epoch": 0.25499459274859126, "percentage": 89.6}
|
| 141 |
+
{"current_steps": 4512, "total_steps": 5000, "loss": 3.6348, "eval_loss": null, "predict_loss": null, "learning_rate": 3.682756051834299e-05, "epoch": 0.2568159826967955, "percentage": 90.24}
|
| 142 |
+
{"current_steps": 4544, "total_steps": 5000, "loss": 3.5613, "eval_loss": null, "predict_loss": null, "learning_rate": 3.596794300222543e-05, "epoch": 0.2586373726449997, "percentage": 90.88}
|
| 143 |
+
{"current_steps": 4576, "total_steps": 5000, "loss": 3.5749, "eval_loss": null, "predict_loss": null, "learning_rate": 3.516492321085647e-05, "epoch": 0.26045876259320394, "percentage": 91.52}
|
| 144 |
+
{"current_steps": 4608, "total_steps": 5000, "loss": 3.5163, "eval_loss": null, "predict_loss": null, "learning_rate": 3.4418853375256644e-05, "epoch": 0.2622801525414082, "percentage": 92.16}
|
| 145 |
+
{"current_steps": 4640, "total_steps": 5000, "loss": 3.646, "eval_loss": null, "predict_loss": null, "learning_rate": 3.3730060746313664e-05, "epoch": 0.2641015424896124, "percentage": 92.8}
|
| 146 |
+
{"current_steps": 4672, "total_steps": 5000, "loss": 3.6188, "eval_loss": null, "predict_loss": null, "learning_rate": 3.309884745123941e-05, "epoch": 0.2659229324378166, "percentage": 93.44}
|
| 147 |
+
{"current_steps": 4704, "total_steps": 5000, "loss": 3.6008, "eval_loss": null, "predict_loss": null, "learning_rate": 3.2525490361046904e-05, "epoch": 0.2677443223860208, "percentage": 94.08}
|
| 148 |
+
{"current_steps": 4736, "total_steps": 5000, "loss": 3.6379, "eval_loss": null, "predict_loss": null, "learning_rate": 3.201024096910552e-05, "epoch": 0.26956571233422505, "percentage": 94.72}
|
| 149 |
+
{"current_steps": 4768, "total_steps": 5000, "loss": 3.6285, "eval_loss": null, "predict_loss": null, "learning_rate": 3.155332528082772e-05, "epoch": 0.2713871022824293, "percentage": 95.36}
|
| 150 |
+
{"current_steps": 4800, "total_steps": 5000, "loss": 3.6428, "eval_loss": null, "predict_loss": null, "learning_rate": 3.11549437145356e-05, "epoch": 0.2732084922306335, "percentage": 96.0}
|
| 151 |
+
{"current_steps": 4832, "total_steps": 5000, "loss": 3.6523, "eval_loss": null, "predict_loss": null, "learning_rate": 3.081527101355075e-05, "epoch": 0.2750298821788377, "percentage": 96.64}
|
| 152 |
+
{"current_steps": 4864, "total_steps": 5000, "loss": 3.6134, "eval_loss": null, "predict_loss": null, "learning_rate": 3.053445616954611e-05, "epoch": 0.27685127212704197, "percentage": 97.28}
|
| 153 |
+
{"current_steps": 4896, "total_steps": 5000, "loss": 3.6809, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0312622357193284e-05, "epoch": 0.27867266207524616, "percentage": 97.92}
|
| 154 |
+
{"current_steps": 4928, "total_steps": 5000, "loss": 3.5551, "eval_loss": null, "predict_loss": null, "learning_rate": 3.014986688013406e-05, "epoch": 0.2804940520234504, "percentage": 98.56}
|
| 155 |
+
{"current_steps": 4960, "total_steps": 5000, "loss": 3.658, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0046261128299766e-05, "epoch": 0.28231544197165465, "percentage": 99.2}
|
| 156 |
+
{"current_steps": 4992, "total_steps": 5000, "loss": 3.6934, "eval_loss": null, "predict_loss": null, "learning_rate": 3.000185054659739e-05, "epoch": 0.28413683191985883, "percentage": 99.84}
|
| 157 |
+
{"current_steps": 5000, "total_steps": 5000, "loss": null, "eval_loss": null, "predict_loss": null, "learning_rate": null, "epoch": 0.2845921794069099, "percentage": 100.0}
|