Upload 27 files
Browse files- .gitattributes +6 -0
- saved_models/gpt_model_enhanced_stop_20251003_153246.pth +3 -0
- saved_models/gpt_model_enhanced_stop_20251003_174251.pth +3 -0
- saved_models/gpt_model_enhanced_stop_20251003_200243.pth +3 -0
- saved_models/gpt_model_enhanced_stop_20251004_181034.pth +3 -0
- saved_models/gpt_model_enhanced_stop_20251005_192151.pth +3 -0
- saved_models/gpt_model_final_20251002_161708.pth +3 -0
- saved_models/gpt_model_final_20251002_205242.pth +3 -0
- saved_models/gpt_model_final_20251003_124248.pth +3 -0
- saved_models/sft_training_curves_20251002_172157.pdf +0 -0
- saved_models/sft_training_curves_20251002_172157.png +3 -0
- saved_models/sft_training_info_20251002_182031.json +18 -0
- saved_models/training_curves_20251002_160458.pdf +0 -0
- saved_models/training_curves_20251002_160458.png +3 -0
- saved_models/training_curves_20251002_205221.pdf +0 -0
- saved_models/training_curves_20251002_205221.png +3 -0
- saved_models/training_curves_20251003_024433.png +3 -0
- saved_models/training_curves_20251004_172613.png +3 -0
- saved_models/training_curves_20251005_191820.png +3 -0
- saved_models/training_info_20251002_161708.json +15 -0
- saved_models/training_info_20251002_205242.json +15 -0
- saved_models/training_info_20251003_020629.json +16 -0
- saved_models/training_info_20251003_020818.json +16 -0
- saved_models/training_info_20251003_021708.json +28 -0
- saved_models/training_info_20251003_022538.json +44 -0
- saved_models/training_info_20251003_124248.json +92 -0
- saved_models/training_info_20251004_181034.json +92 -0
- saved_models/training_info_20251005_192151.json +92 -0
.gitattributes
CHANGED
|
@@ -38,3 +38,9 @@ deepseek_local_comparison_results.png filter=lfs diff=lfs merge=lfs -text
|
|
| 38 |
model_comparison_results_1.png filter=lfs diff=lfs merge=lfs -text
|
| 39 |
model_comparison_results_2.png filter=lfs diff=lfs merge=lfs -text
|
| 40 |
model_comparison_results.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
model_comparison_results_1.png filter=lfs diff=lfs merge=lfs -text
|
| 39 |
model_comparison_results_2.png filter=lfs diff=lfs merge=lfs -text
|
| 40 |
model_comparison_results.png filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
saved_models/sft_training_curves_20251002_172157.png filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
saved_models/training_curves_20251002_160458.png filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
saved_models/training_curves_20251002_205221.png filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
saved_models/training_curves_20251003_024433.png filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
saved_models/training_curves_20251004_172613.png filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
saved_models/training_curves_20251005_191820.png filter=lfs diff=lfs merge=lfs -text
|
saved_models/gpt_model_enhanced_stop_20251003_153246.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d2bc26b8c795bab9a38a73874c3a003b0e73eb4f727bdab7be40dd200fc380a0
|
| 3 |
+
size 298169487
|
saved_models/gpt_model_enhanced_stop_20251003_174251.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f465231d814ac9b63f2cbd1e8dcf5f819fcdd5cc21c5f005028f695eb1a6e0e6
|
| 3 |
+
size 291340431
|
saved_models/gpt_model_enhanced_stop_20251003_200243.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c35d00022f33fc404502940a89fb5174b7a12a0dab2834b1ef5ecbcd6f7219c2
|
| 3 |
+
size 304654927
|
saved_models/gpt_model_enhanced_stop_20251004_181034.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de3772617c56e4ccb0994c3689c05145463eb3009745fd50570ff7e9a24f3923
|
| 3 |
+
size 294366287
|
saved_models/gpt_model_enhanced_stop_20251005_192151.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90f01ff913fd88c2767b68303fe3d52f9ceef6a6e901b45e822567e1afd4d630
|
| 3 |
+
size 298688591
|
saved_models/gpt_model_final_20251002_161708.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3d4250945f289498e144bdfa2568bde07bcf43ff34980ec551871484fab42fb
|
| 3 |
+
size 379055071
|
saved_models/gpt_model_final_20251002_205242.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dfe0c0612d21f8b7b4556d635a90f7162a4fc7795773bdb076cf193c7882255d
|
| 3 |
+
size 379055071
|
saved_models/gpt_model_final_20251003_124248.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:94b7c8eb8ff2ab38d5b531767e177f02a2cd6d8e294884fa01d20d86ab99868c
|
| 3 |
+
size 332664287
|
saved_models/sft_training_curves_20251002_172157.pdf
ADDED
|
Binary file (14 kB). View file
|
|
|
saved_models/sft_training_curves_20251002_172157.png
ADDED
|
Git LFS Details
|
saved_models/sft_training_info_20251002_182031.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"timestamp": "20251002_182031",
|
| 3 |
+
"current_iteration": 5000,
|
| 4 |
+
"final_iteration": 5000,
|
| 5 |
+
"hyperparameters": {
|
| 6 |
+
"batch_size": 32,
|
| 7 |
+
"block_size": 512,
|
| 8 |
+
"d_model": 512,
|
| 9 |
+
"h": 8,
|
| 10 |
+
"Nx": 6,
|
| 11 |
+
"dropout_rate": 0.2,
|
| 12 |
+
"lr_rate": 0.0001,
|
| 13 |
+
"training_type": "SFT",
|
| 14 |
+
"cot_samples": 1000
|
| 15 |
+
},
|
| 16 |
+
"training_time": "2025-10-02 18:20:32",
|
| 17 |
+
"training_type": "SFT"
|
| 18 |
+
}
|
saved_models/training_curves_20251002_160458.pdf
ADDED
|
Binary file (14.3 kB). View file
|
|
|
saved_models/training_curves_20251002_160458.png
ADDED
|
Git LFS Details
|
saved_models/training_curves_20251002_205221.pdf
ADDED
|
Binary file (14.3 kB). View file
|
|
|
saved_models/training_curves_20251002_205221.png
ADDED
|
Git LFS Details
|
saved_models/training_curves_20251003_024433.png
ADDED
|
Git LFS Details
|
saved_models/training_curves_20251004_172613.png
ADDED
|
Git LFS Details
|
saved_models/training_curves_20251005_191820.png
ADDED
|
Git LFS Details
|
saved_models/training_info_20251002_161708.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"timestamp": "20251002_161708",
|
| 3 |
+
"current_iteration": 10000,
|
| 4 |
+
"final_iteration": 10000,
|
| 5 |
+
"hyperparameters": {
|
| 6 |
+
"batch_size": 32,
|
| 7 |
+
"block_size": 512,
|
| 8 |
+
"d_model": 512,
|
| 9 |
+
"h": 8,
|
| 10 |
+
"Nx": 6,
|
| 11 |
+
"dropout_rate": 0.2,
|
| 12 |
+
"lr_rate": 0.001
|
| 13 |
+
},
|
| 14 |
+
"training_time": "2025-10-02 16:17:09"
|
| 15 |
+
}
|
saved_models/training_info_20251002_205242.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"timestamp": "20251002_205242",
|
| 3 |
+
"current_iteration": 10000,
|
| 4 |
+
"final_iteration": 10000,
|
| 5 |
+
"hyperparameters": {
|
| 6 |
+
"batch_size": 32,
|
| 7 |
+
"block_size": 512,
|
| 8 |
+
"d_model": 512,
|
| 9 |
+
"h": 8,
|
| 10 |
+
"Nx": 6,
|
| 11 |
+
"dropout_rate": 0.2,
|
| 12 |
+
"lr_rate": 0.001
|
| 13 |
+
},
|
| 14 |
+
"training_time": "2025-10-02 20:52:43"
|
| 15 |
+
}
|
saved_models/training_info_20251003_020629.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"timestamp": "20251003_020629",
|
| 3 |
+
"iteration": 297,
|
| 4 |
+
"train_losses": [
|
| 5 |
+
9.135263471603393
|
| 6 |
+
],
|
| 7 |
+
"valid_losses": [
|
| 8 |
+
9.137469944953919
|
| 9 |
+
],
|
| 10 |
+
"train_ppls": [
|
| 11 |
+
9276.721452517138
|
| 12 |
+
],
|
| 13 |
+
"valid_ppls": [
|
| 14 |
+
9297.212889773891
|
| 15 |
+
]
|
| 16 |
+
}
|
saved_models/training_info_20251003_020818.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"timestamp": "20251003_020818",
|
| 3 |
+
"iteration": 417,
|
| 4 |
+
"train_losses": [
|
| 5 |
+
9.131195340156555
|
| 6 |
+
],
|
| 7 |
+
"valid_losses": [
|
| 8 |
+
9.134268293380737
|
| 9 |
+
],
|
| 10 |
+
"train_ppls": [
|
| 11 |
+
9239.059189712
|
| 12 |
+
],
|
| 13 |
+
"valid_ppls": [
|
| 14 |
+
9267.494053564746
|
| 15 |
+
]
|
| 16 |
+
}
|
saved_models/training_info_20251003_021708.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"timestamp": "20251003_021708",
|
| 3 |
+
"iteration": 1912,
|
| 4 |
+
"train_losses": [
|
| 5 |
+
9.134580550193787,
|
| 6 |
+
5.869582514762879,
|
| 7 |
+
5.5604641020298,
|
| 8 |
+
5.282446354627609
|
| 9 |
+
],
|
| 10 |
+
"valid_losses": [
|
| 11 |
+
9.125736680030823,
|
| 12 |
+
5.978839534521103,
|
| 13 |
+
5.5966400253772735,
|
| 14 |
+
5.448123054504395
|
| 15 |
+
],
|
| 16 |
+
"train_ppls": [
|
| 17 |
+
9270.388343580242,
|
| 18 |
+
354.10111741566493,
|
| 19 |
+
259.9434486147166,
|
| 20 |
+
196.8508537871248
|
| 21 |
+
],
|
| 22 |
+
"valid_ppls": [
|
| 23 |
+
9188.763703583847,
|
| 24 |
+
394.9817394219924,
|
| 25 |
+
269.51930633160225,
|
| 26 |
+
232.32170125491427
|
| 27 |
+
]
|
| 28 |
+
}
|
saved_models/training_info_20251003_022538.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"timestamp": "20251003_022538",
|
| 3 |
+
"iteration": 3625,
|
| 4 |
+
"train_losses": [
|
| 5 |
+
9.132858023643493,
|
| 6 |
+
5.998576678037644,
|
| 7 |
+
5.513576445579528,
|
| 8 |
+
5.219495766162872,
|
| 9 |
+
5.244669423103333,
|
| 10 |
+
5.252831830978393,
|
| 11 |
+
5.0975641715526585,
|
| 12 |
+
5.109532077312469
|
| 13 |
+
],
|
| 14 |
+
"valid_losses": [
|
| 15 |
+
9.130632095336914,
|
| 16 |
+
6.077304810285568,
|
| 17 |
+
5.67069188117981,
|
| 18 |
+
5.482845658063889,
|
| 19 |
+
5.408580594062805,
|
| 20 |
+
5.219080436229706,
|
| 21 |
+
5.178996448516846,
|
| 22 |
+
5.1929361116886135
|
| 23 |
+
],
|
| 24 |
+
"train_ppls": [
|
| 25 |
+
9254.433598707652,
|
| 26 |
+
402.854992879087,
|
| 27 |
+
248.03663216001516,
|
| 28 |
+
184.84095750340347,
|
| 29 |
+
189.5531429813315,
|
| 30 |
+
191.10668474373148,
|
| 31 |
+
163.62286426959807,
|
| 32 |
+
165.59285209010247
|
| 33 |
+
],
|
| 34 |
+
"valid_ppls": [
|
| 35 |
+
9233.856802731327,
|
| 36 |
+
435.8529040032509,
|
| 37 |
+
290.23527326329105,
|
| 38 |
+
240.5302011247121,
|
| 39 |
+
223.31438884745674,
|
| 40 |
+
184.76420346110612,
|
| 41 |
+
177.50458658850164,
|
| 42 |
+
179.99626698242258
|
| 43 |
+
]
|
| 44 |
+
}
|
saved_models/training_info_20251003_124248.json
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"timestamp": "20251003_124248",
|
| 3 |
+
"iteration": 10000,
|
| 4 |
+
"train_losses": [
|
| 5 |
+
9.129518022537232,
|
| 6 |
+
6.249767727851868,
|
| 7 |
+
5.751644665002823,
|
| 8 |
+
5.432924321889877,
|
| 9 |
+
5.380447548627854,
|
| 10 |
+
5.287347407341003,
|
| 11 |
+
5.201901209354401,
|
| 12 |
+
5.206252094507217,
|
| 13 |
+
5.128036706447602,
|
| 14 |
+
5.146241910457611,
|
| 15 |
+
5.054771481752396,
|
| 16 |
+
5.089460607767105,
|
| 17 |
+
5.142615798711777,
|
| 18 |
+
4.971773710250854,
|
| 19 |
+
5.018589770793914,
|
| 20 |
+
4.986998063325882,
|
| 21 |
+
4.901928544044495,
|
| 22 |
+
4.924854670763016,
|
| 23 |
+
4.882941116094589,
|
| 24 |
+
4.833026471734047
|
| 25 |
+
],
|
| 26 |
+
"valid_losses": [
|
| 27 |
+
9.130995869636536,
|
| 28 |
+
6.351803512573242,
|
| 29 |
+
5.781590422391892,
|
| 30 |
+
5.6069504833221435,
|
| 31 |
+
5.517809283733368,
|
| 32 |
+
5.369585566520691,
|
| 33 |
+
5.316266185045242,
|
| 34 |
+
5.262127176523209,
|
| 35 |
+
5.158866701126098,
|
| 36 |
+
5.103218721151352,
|
| 37 |
+
5.177156277894974,
|
| 38 |
+
5.101760529279709,
|
| 39 |
+
5.106319153308869,
|
| 40 |
+
5.066332763433456,
|
| 41 |
+
4.955312205553055,
|
| 42 |
+
4.9866281771659855,
|
| 43 |
+
4.9465500092506405,
|
| 44 |
+
4.944554382562638,
|
| 45 |
+
4.932551403641701,
|
| 46 |
+
4.939222619533539
|
| 47 |
+
],
|
| 48 |
+
"train_ppls": [
|
| 49 |
+
9223.57534224239,
|
| 50 |
+
517.8925186892013,
|
| 51 |
+
314.7078238326993,
|
| 52 |
+
228.81740373538926,
|
| 53 |
+
217.11942518452676,
|
| 54 |
+
197.81799827534553,
|
| 55 |
+
181.61720617786816,
|
| 56 |
+
182.40912330469595,
|
| 57 |
+
168.68561335377868,
|
| 58 |
+
171.78469345781065,
|
| 59 |
+
156.76870174157824,
|
| 60 |
+
162.3022938421263,
|
| 61 |
+
171.1629109708002,
|
| 62 |
+
144.28257602921107,
|
| 63 |
+
151.19792963990514,
|
| 64 |
+
146.49599106312616,
|
| 65 |
+
134.54901333118875,
|
| 66 |
+
137.6693328098662,
|
| 67 |
+
132.01837482188307,
|
| 68 |
+
125.59048210376211
|
| 69 |
+
],
|
| 70 |
+
"valid_ppls": [
|
| 71 |
+
9237.216453562814,
|
| 72 |
+
573.526138433827,
|
| 73 |
+
324.27451396569484,
|
| 74 |
+
272.31254886498004,
|
| 75 |
+
249.08875624475485,
|
| 76 |
+
214.77383978784715,
|
| 77 |
+
203.62217332242238,
|
| 78 |
+
192.8913691934505,
|
| 79 |
+
173.9671870269423,
|
| 80 |
+
164.55069864162073,
|
| 81 |
+
177.17824821424654,
|
| 82 |
+
164.31092700939686,
|
| 83 |
+
165.06166862379737,
|
| 84 |
+
158.5916664602556,
|
| 85 |
+
141.92690982608215,
|
| 86 |
+
146.44181424379707,
|
| 87 |
+
140.68875079955296,
|
| 88 |
+
140.40826853578682,
|
| 89 |
+
138.73302511680404,
|
| 90 |
+
139.6616371202669
|
| 91 |
+
]
|
| 92 |
+
}
|
saved_models/training_info_20251004_181034.json
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"timestamp": "20251004_181034",
|
| 3 |
+
"iteration": 10000,
|
| 4 |
+
"train_losses": [
|
| 5 |
+
5.384823150634766,
|
| 6 |
+
4.10352410197258,
|
| 7 |
+
4.082370533943176,
|
| 8 |
+
4.081991443634033,
|
| 9 |
+
4.115782041549682,
|
| 10 |
+
4.035000920295715,
|
| 11 |
+
4.043951436281204,
|
| 12 |
+
4.0193834912776945,
|
| 13 |
+
4.050422257184982,
|
| 14 |
+
4.007429542541504,
|
| 15 |
+
4.026445609331131,
|
| 16 |
+
4.0093786239624025,
|
| 17 |
+
4.013193726539612,
|
| 18 |
+
3.999409407377243,
|
| 19 |
+
3.996302636861801,
|
| 20 |
+
3.9996998167037963,
|
| 21 |
+
3.9200201332569122,
|
| 22 |
+
3.9437547767162324,
|
| 23 |
+
3.8990400445461275,
|
| 24 |
+
3.922501788139343
|
| 25 |
+
],
|
| 26 |
+
"valid_losses": [
|
| 27 |
+
5.368972723484039,
|
| 28 |
+
4.169427636861801,
|
| 29 |
+
4.154992059469223,
|
| 30 |
+
4.130547314882278,
|
| 31 |
+
4.169944169521332,
|
| 32 |
+
4.0979551923274995,
|
| 33 |
+
4.077370369434357,
|
| 34 |
+
4.092267180681229,
|
| 35 |
+
4.121343567371368,
|
| 36 |
+
4.069447156190872,
|
| 37 |
+
4.087164725065231,
|
| 38 |
+
4.080873987674713,
|
| 39 |
+
4.043817769289017,
|
| 40 |
+
4.03347335934639,
|
| 41 |
+
4.0298103272914885,
|
| 42 |
+
4.026532536745071,
|
| 43 |
+
4.007442288398742,
|
| 44 |
+
4.034571597576141,
|
| 45 |
+
4.011275128126145,
|
| 46 |
+
4.024933376312256
|
| 47 |
+
],
|
| 48 |
+
"train_ppls": [
|
| 49 |
+
218.07153488457897,
|
| 50 |
+
60.55330805635613,
|
| 51 |
+
59.285842507086265,
|
| 52 |
+
59.26337207815248,
|
| 53 |
+
61.300134774788916,
|
| 54 |
+
56.54297227455415,
|
| 55 |
+
57.05133270203233,
|
| 56 |
+
55.666776192558146,
|
| 57 |
+
57.42169865181937,
|
| 58 |
+
55.00529990733514,
|
| 59 |
+
56.06129296001715,
|
| 60 |
+
55.112614263671325,
|
| 61 |
+
55.323276133190696,
|
| 62 |
+
54.565914288551944,
|
| 63 |
+
54.39665357812676,
|
| 64 |
+
54.5817630401745,
|
| 65 |
+
50.40145951338362,
|
| 66 |
+
51.61202958022025,
|
| 67 |
+
49.35504771033067,
|
| 68 |
+
50.526693871422225
|
| 69 |
+
],
|
| 70 |
+
"valid_ppls": [
|
| 71 |
+
214.6422574594534,
|
| 72 |
+
64.67842196649966,
|
| 73 |
+
63.751458325386196,
|
| 74 |
+
62.21196315184112,
|
| 75 |
+
64.7118391135932,
|
| 76 |
+
60.21702937694706,
|
| 77 |
+
58.990143429603194,
|
| 78 |
+
59.87548648336652,
|
| 79 |
+
61.642006841183196,
|
| 80 |
+
58.52459868455931,
|
| 81 |
+
59.57075257785934,
|
| 82 |
+
59.197184857393864,
|
| 83 |
+
57.043707331631325,
|
| 84 |
+
56.45666537445655,
|
| 85 |
+
56.25024109965814,
|
| 86 |
+
56.06616643505251,
|
| 87 |
+
55.00600100150311,
|
| 88 |
+
56.5187023021225,
|
| 89 |
+
55.21723474132522,
|
| 90 |
+
55.97657929126963
|
| 91 |
+
]
|
| 92 |
+
}
|
saved_models/training_info_20251005_192151.json
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"timestamp": "20251005_192151",
|
| 3 |
+
"iteration": 10000,
|
| 4 |
+
"train_losses": [
|
| 5 |
+
5.391083490848541,
|
| 6 |
+
4.080670750141143,
|
| 7 |
+
4.118255652189255,
|
| 8 |
+
4.118281594514847,
|
| 9 |
+
4.0693074274063115,
|
| 10 |
+
4.066421988010407,
|
| 11 |
+
4.0487049961090085,
|
| 12 |
+
4.09080335021019,
|
| 13 |
+
4.029991974830628,
|
| 14 |
+
4.040501079559326,
|
| 15 |
+
4.000860288143158,
|
| 16 |
+
4.030070139169693,
|
| 17 |
+
3.987196750640869,
|
| 18 |
+
3.9932286298274993,
|
| 19 |
+
3.9395544254779815,
|
| 20 |
+
3.9641987037658692,
|
| 21 |
+
3.9298926997184753,
|
| 22 |
+
3.9404408383369445,
|
| 23 |
+
3.954084361791611,
|
| 24 |
+
3.903088995218277
|
| 25 |
+
],
|
| 26 |
+
"valid_losses": [
|
| 27 |
+
5.364040174484253,
|
| 28 |
+
4.133196280002594,
|
| 29 |
+
4.126940160989761,
|
| 30 |
+
4.140352500677109,
|
| 31 |
+
4.110208399295807,
|
| 32 |
+
4.1024039101600644,
|
| 33 |
+
4.101395148038864,
|
| 34 |
+
4.045079884529113,
|
| 35 |
+
4.142436131238937,
|
| 36 |
+
4.054532527923584,
|
| 37 |
+
4.097548708915711,
|
| 38 |
+
4.048539190292359,
|
| 39 |
+
4.040392457246781,
|
| 40 |
+
4.023428028821945,
|
| 41 |
+
4.035654059648514,
|
| 42 |
+
3.9959933757781982,
|
| 43 |
+
4.023642826080322,
|
| 44 |
+
3.9648880326747893,
|
| 45 |
+
4.02871377825737,
|
| 46 |
+
4.007620505094528
|
| 47 |
+
],
|
| 48 |
+
"train_ppls": [
|
| 49 |
+
219.44101912982637,
|
| 50 |
+
59.18515499005087,
|
| 51 |
+
61.45195513519193,
|
| 52 |
+
61.45354936249919,
|
| 53 |
+
58.516421684812634,
|
| 54 |
+
58.347819459106525,
|
| 55 |
+
57.32317522324482,
|
| 56 |
+
59.78790304107917,
|
| 57 |
+
56.26045974559773,
|
| 58 |
+
56.85482445947149,
|
| 59 |
+
54.64514038397881,
|
| 60 |
+
56.264857479119975,
|
| 61 |
+
53.90357221829597,
|
| 62 |
+
54.22969463034853,
|
| 63 |
+
51.3956955853101,
|
| 64 |
+
52.678041757003975,
|
| 65 |
+
50.90151562921041,
|
| 66 |
+
51.44127358825189,
|
| 67 |
+
52.14792344632695,
|
| 68 |
+
49.55528897386779
|
| 69 |
+
],
|
| 70 |
+
"valid_ppls": [
|
| 71 |
+
213.5861308465433,
|
| 72 |
+
62.37697893633224,
|
| 73 |
+
61.98795927790631,
|
| 74 |
+
62.82496338858189,
|
| 75 |
+
60.95942014620351,
|
| 76 |
+
60.48551471431312,
|
| 77 |
+
60.424529982903024,
|
| 78 |
+
57.11574851658181,
|
| 79 |
+
62.95600387491564,
|
| 80 |
+
57.658203095230256,
|
| 81 |
+
60.19255712751682,
|
| 82 |
+
57.31367149527243,
|
| 83 |
+
56.84864909235673,
|
| 84 |
+
55.89237847975788,
|
| 85 |
+
56.579914777858924,
|
| 86 |
+
54.379833411142414,
|
| 87 |
+
55.90438529888927,
|
| 88 |
+
52.71436677255565,
|
| 89 |
+
56.18859375795506,
|
| 90 |
+
55.015804862829924
|
| 91 |
+
]
|
| 92 |
+
}
|