Upload folder using huggingface_hub
Browse files- final.pt +3 -0
- logs/metrics.jsonl +102 -0
final.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47962d2eb5e6ad65b28a2cc7dd7c40b8a923f8f2ca6de10cdbdc2bb765c1bb06
|
| 3 |
+
size 304304359
|
logs/metrics.jsonl
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 0, "val_loss": 10.825744774891414, "val_perplexity": 50299.21627192595}
|
| 2 |
+
{"step": 0, "val_loss": 10.825744774891414, "val_perplexity": 50299.21627192595}
|
| 3 |
+
{"step": 1000, "val_loss": 10.689736016575488, "val_perplexity": 43902.91582561654}
|
| 4 |
+
{"step": 2000, "val_loss": 10.433539775086976, "val_perplexity": 33980.42067028248}
|
| 5 |
+
{"step": 3000, "val_loss": 9.699867817209386, "val_perplexity": 16315.450433706554}
|
| 6 |
+
{"step": 4000, "val_loss": 7.975991479512034, "val_perplexity": 2910.241887350923}
|
| 7 |
+
{"step": 5000, "val_loss": 7.3839335973528755, "val_perplexity": 1609.9100669046134}
|
| 8 |
+
{"step": 6000, "val_loss": 7.123817476527341, "val_perplexity": 1241.1795749226658}
|
| 9 |
+
{"step": 7000, "val_loss": 6.93326405729873, "val_perplexity": 1025.8369113520998}
|
| 10 |
+
{"step": 8000, "val_loss": 6.718656777023613, "val_perplexity": 827.7049721183079}
|
| 11 |
+
{"step": 9000, "val_loss": 6.4967232516194775, "val_perplexity": 662.9656982178569}
|
| 12 |
+
{"step": 10000, "val_loss": 6.268896476932619, "val_perplexity": 527.8945125624202}
|
| 13 |
+
{"step": 11000, "val_loss": 6.077692753914418, "val_perplexity": 436.02202316270376}
|
| 14 |
+
{"step": 12000, "val_loss": 5.864625998530881, "val_perplexity": 352.3503519125041}
|
| 15 |
+
{"step": 13000, "val_loss": 5.621483257259352, "val_perplexity": 276.2989018008563}
|
| 16 |
+
{"step": 14000, "val_loss": 5.430720788231487, "val_perplexity": 228.3137519952063}
|
| 17 |
+
{"step": 15000, "val_loss": 5.348461539701201, "val_perplexity": 210.2845344742303}
|
| 18 |
+
{"step": 16000, "val_loss": 5.172298092195664, "val_perplexity": 176.31957087839228}
|
| 19 |
+
{"step": 17000, "val_loss": 5.154795171261549, "val_perplexity": 173.26031443245617}
|
| 20 |
+
{"step": 18000, "val_loss": 4.9594926745847445, "val_perplexity": 142.52147278749976}
|
| 21 |
+
{"step": 19000, "val_loss": 4.865670127591948, "val_perplexity": 129.75786380787983}
|
| 22 |
+
{"step": 20000, "val_loss": 4.619028402484018, "val_perplexity": 101.39546866980153}
|
| 23 |
+
{"step": 21000, "val_loss": 4.598245897431442, "val_perplexity": 99.30996290932829}
|
| 24 |
+
{"step": 22000, "val_loss": 4.481863986497643, "val_perplexity": 88.39929429271452}
|
| 25 |
+
{"step": 23000, "val_loss": 4.595661891586128, "val_perplexity": 99.05367664987267}
|
| 26 |
+
{"step": 24000, "val_loss": 4.2548980779681225, "val_perplexity": 70.4496364524089}
|
| 27 |
+
{"step": 25000, "val_loss": 4.305100770876848, "val_perplexity": 74.07667984872775}
|
| 28 |
+
{"step": 26000, "val_loss": 4.337328298023905, "val_perplexity": 76.5028731790972}
|
| 29 |
+
{"step": 27000, "val_loss": 4.304209994220209, "val_perplexity": 74.01072345208766}
|
| 30 |
+
{"step": 28000, "val_loss": 4.244395425046545, "val_perplexity": 69.71360030413672}
|
| 31 |
+
{"step": 29000, "val_loss": 4.192933628832239, "val_perplexity": 66.21676170352634}
|
| 32 |
+
{"step": 30000, "val_loss": 4.336854466442587, "val_perplexity": 76.46663228844069}
|
| 33 |
+
{"step": 31000, "val_loss": 4.053855293807296, "val_perplexity": 57.61916821237945}
|
| 34 |
+
{"step": 32000, "val_loss": 4.06435650822638, "val_perplexity": 58.227427593855}
|
| 35 |
+
{"step": 33000, "val_loss": 4.18760727130514, "val_perplexity": 65.86500517852713}
|
| 36 |
+
{"step": 34000, "val_loss": 4.202326839956538, "val_perplexity": 66.84168012668538}
|
| 37 |
+
{"step": 35000, "val_loss": 4.121915236421082, "val_perplexity": 61.67725574304972}
|
| 38 |
+
{"step": 36000, "val_loss": 4.2215694388847105, "val_perplexity": 68.14034251692064}
|
| 39 |
+
{"step": 37000, "val_loss": 3.989921209870606, "val_perplexity": 54.050630539378126}
|
| 40 |
+
{"step": 38000, "val_loss": 4.066742621522477, "val_perplexity": 58.36653072495045}
|
| 41 |
+
{"step": 39000, "val_loss": 3.9866048712441775, "val_perplexity": 53.87167724441196}
|
| 42 |
+
{"step": 40000, "val_loss": 4.127016034348122, "val_perplexity": 61.99266269098427}
|
| 43 |
+
{"step": 41000, "val_loss": 4.0143460805205, "val_perplexity": 55.387064877226265}
|
| 44 |
+
{"step": 42000, "val_loss": 4.321677796658186, "val_perplexity": 75.3148854085713}
|
| 45 |
+
{"step": 43000, "val_loss": 4.023634690532331, "val_perplexity": 55.90393048792984}
|
| 46 |
+
{"step": 44000, "val_loss": 4.22541426801276, "val_perplexity": 68.40283478622423}
|
| 47 |
+
{"step": 45000, "val_loss": 4.035941032840944, "val_perplexity": 56.596154026633265}
|
| 48 |
+
{"step": 46000, "val_loss": 4.091956697027942, "val_perplexity": 59.85689900928358}
|
| 49 |
+
{"step": 47000, "val_loss": 4.138214429060539, "val_perplexity": 62.6907826127185}
|
| 50 |
+
{"step": 48000, "val_loss": 4.099424802046409, "val_perplexity": 60.30558996737639}
|
| 51 |
+
{"step": 49000, "val_loss": 4.01823715569676, "val_perplexity": 55.60299994780596}
|
| 52 |
+
{"step": 50000, "val_loss": 4.248477826779219, "val_perplexity": 69.99878094170532}
|
| 53 |
+
{"step": 51000, "val_loss": 4.157940181152053, "val_perplexity": 63.93968269983898}
|
| 54 |
+
{"step": 52000, "val_loss": 3.9643685601841754, "val_perplexity": 52.68699022045566}
|
| 55 |
+
{"step": 53000, "val_loss": 3.912029364635015, "val_perplexity": 50.00031796135444}
|
| 56 |
+
{"step": 54000, "val_loss": 4.112006359424753, "val_perplexity": 61.06912134274656}
|
| 57 |
+
{"step": 55000, "val_loss": 3.8616734626831084, "val_perplexity": 47.544849363020546}
|
| 58 |
+
{"step": 56000, "val_loss": 4.109003791694584, "val_perplexity": 60.886032176056894}
|
| 59 |
+
{"step": 57000, "val_loss": 4.1741195089999525, "val_perplexity": 64.98259787168921}
|
| 60 |
+
{"step": 58000, "val_loss": 3.9863957266261303, "val_perplexity": 53.860411451182195}
|
| 61 |
+
{"step": 59000, "val_loss": 3.9894471907985394, "val_perplexity": 54.02501558111423}
|
| 62 |
+
{"step": 60000, "val_loss": 4.069726937529205, "val_perplexity": 58.54097506590077}
|
| 63 |
+
{"step": 61000, "val_loss": 4.152612483638594, "val_perplexity": 63.59993724543505}
|
| 64 |
+
{"step": 62000, "val_loss": 3.929075597941488, "val_perplexity": 50.85994089804492}
|
| 65 |
+
{"step": 63000, "val_loss": 3.953994500392553, "val_perplexity": 52.14323757151089}
|
| 66 |
+
{"step": 64000, "val_loss": 4.127124303814886, "val_perplexity": 61.99937496687784}
|
| 67 |
+
{"step": 65000, "val_loss": 3.8555567511443556, "val_perplexity": 47.2549188489323}
|
| 68 |
+
{"step": 66000, "val_loss": 3.936258910714894, "val_perplexity": 51.226599094778216}
|
| 69 |
+
{"step": 67000, "val_loss": 3.939015494399574, "val_perplexity": 51.36800431017743}
|
| 70 |
+
{"step": 68000, "val_loss": 3.97920528025911, "val_perplexity": 53.47452007957059}
|
| 71 |
+
{"step": 69000, "val_loss": 3.965456073077814, "val_perplexity": 52.74431916898261}
|
| 72 |
+
{"step": 70000, "val_loss": 3.92843952985213, "val_perplexity": 50.82760079895593}
|
| 73 |
+
{"step": 71000, "val_loss": 4.156702865297166, "val_perplexity": 63.86061804074312}
|
| 74 |
+
{"step": 72000, "val_loss": 3.9580430380996314, "val_perplexity": 52.35476934323668}
|
| 75 |
+
{"step": 73000, "val_loss": 3.9494376774130013, "val_perplexity": 51.90617061461253}
|
| 76 |
+
{"step": 74000, "val_loss": 4.129416203844721, "val_perplexity": 62.14163429602277}
|
| 77 |
+
{"step": 75000, "val_loss": 4.057544733536965, "val_perplexity": 57.832143298560815}
|
| 78 |
+
{"step": 76000, "val_loss": 3.931178355348176, "val_perplexity": 50.96699953519567}
|
| 79 |
+
{"step": 77000, "val_loss": 4.17460839470486, "val_perplexity": 65.01437470184251}
|
| 80 |
+
{"step": 78000, "val_loss": 4.0674727374521, "val_perplexity": 58.40916061927831}
|
| 81 |
+
{"step": 79000, "val_loss": 4.109246380153807, "val_perplexity": 60.90080421648149}
|
| 82 |
+
{"step": 80000, "val_loss": 3.848056246484143, "val_perplexity": 46.90180901628413}
|
| 83 |
+
{"step": 81000, "val_loss": 3.976353486935576, "val_perplexity": 53.32223904045877}
|
| 84 |
+
{"step": 82000, "val_loss": 3.9818217462155148, "val_perplexity": 53.61461754108883}
|
| 85 |
+
{"step": 83000, "val_loss": 4.002650684091435, "val_perplexity": 54.74306445718919}
|
| 86 |
+
{"step": 84000, "val_loss": 3.895567115394398, "val_perplexity": 49.18393842354999}
|
| 87 |
+
{"step": 85000, "val_loss": 4.309513480022826, "val_perplexity": 74.40428096403147}
|
| 88 |
+
{"step": 86000, "val_loss": 4.150610932115915, "val_perplexity": 63.47276600654878}
|
| 89 |
+
{"step": 87000, "val_loss": 4.032884273843923, "val_perplexity": 56.4234173652884}
|
| 90 |
+
{"step": 88000, "val_loss": 3.9995721442035106, "val_perplexity": 54.57479489484689}
|
| 91 |
+
{"step": 89000, "val_loss": 3.8571679237903864, "val_perplexity": 47.33111604851072}
|
| 92 |
+
{"step": 90000, "val_loss": 4.083757469450133, "val_perplexity": 59.36812519431964}
|
| 93 |
+
{"step": 91000, "val_loss": 4.111885510664573, "val_perplexity": 61.061741661068666}
|
| 94 |
+
{"step": 92000, "val_loss": 4.104862103645893, "val_perplexity": 60.63438271077653}
|
| 95 |
+
{"step": 93000, "val_loss": 4.079413568096915, "val_perplexity": 59.110795227585434}
|
| 96 |
+
{"step": 94000, "val_loss": 4.022256333807697, "val_perplexity": 55.82692801004111}
|
| 97 |
+
{"step": 95000, "val_loss": 3.9508867892341173, "val_perplexity": 51.98144298590835}
|
| 98 |
+
{"step": 96000, "val_loss": 4.001017938798043, "val_perplexity": 54.65375590525009}
|
| 99 |
+
{"step": 97000, "val_loss": 3.8267948008466206, "val_perplexity": 45.91513498024922}
|
| 100 |
+
{"step": 98000, "val_loss": 3.9409920747069016, "val_perplexity": 51.4696377061191}
|
| 101 |
+
{"step": 99000, "val_loss": 3.924658268198125, "val_perplexity": 50.63577124851523}
|
| 102 |
+
{"training_complete": true, "final_step": 100000}
|