Upload folder using huggingface_hub
Browse files- final.pt +3 -0
- logs/metrics.jsonl +101 -0
final.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c41a612f22a2ad5a741a1d9f1cbb275bb2f87e7037bf2876f83663150792c06a
|
| 3 |
+
size 316094327
|
logs/metrics.jsonl
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 0, "val_loss": 10.822597044238215, "val_perplexity": 50141.13681333732}
|
| 2 |
+
{"step": 1000, "val_loss": 10.692986014129044, "val_perplexity": 44045.832307956705}
|
| 3 |
+
{"step": 2000, "val_loss": 10.462919051078273, "val_perplexity": 34993.55046554394}
|
| 4 |
+
{"step": 3000, "val_loss": 9.768150601045916, "val_perplexity": 17468.431228064746}
|
| 5 |
+
{"step": 4000, "val_loss": 8.018009468219828, "val_perplexity": 3035.129793719384}
|
| 6 |
+
{"step": 5000, "val_loss": 7.398580942110994, "val_perplexity": 1633.664519816975}
|
| 7 |
+
{"step": 6000, "val_loss": 7.041477884633235, "val_perplexity": 1143.0756929245154}
|
| 8 |
+
{"step": 7000, "val_loss": 6.930540378478958, "val_perplexity": 1023.0466626796535}
|
| 9 |
+
{"step": 8000, "val_loss": 6.771931521113721, "val_perplexity": 872.9964776914119}
|
| 10 |
+
{"step": 9000, "val_loss": 6.562404041948648, "val_perplexity": 707.971643932348}
|
| 11 |
+
{"step": 10000, "val_loss": 6.426421488446555, "val_perplexity": 617.9586145369051}
|
| 12 |
+
{"step": 11000, "val_loss": 6.265651967419333, "val_perplexity": 526.1845293229039}
|
| 13 |
+
{"step": 12000, "val_loss": 6.065694331884265, "val_perplexity": 430.8217070950238}
|
| 14 |
+
{"step": 13000, "val_loss": 5.962106660343874, "val_perplexity": 388.42754783002056}
|
| 15 |
+
{"step": 14000, "val_loss": 5.780746974248538, "val_perplexity": 324.0011205417491}
|
| 16 |
+
{"step": 15000, "val_loss": 5.587262044375154, "val_perplexity": 267.00357407922075}
|
| 17 |
+
{"step": 16000, "val_loss": 5.580513893573984, "val_perplexity": 265.20785938801276}
|
| 18 |
+
{"step": 17000, "val_loss": 5.371946824079994, "val_perplexity": 215.28157535189797}
|
| 19 |
+
{"step": 18000, "val_loss": 5.174755851646374, "val_perplexity": 176.7534549429846}
|
| 20 |
+
{"step": 19000, "val_loss": 5.019962137135463, "val_perplexity": 151.40557103689775}
|
| 21 |
+
{"step": 20000, "val_loss": 4.868022671098886, "val_perplexity": 130.0634841795119}
|
| 22 |
+
{"step": 21000, "val_loss": 4.747262549793917, "val_perplexity": 115.26831098192787}
|
| 23 |
+
{"step": 22000, "val_loss": 4.7507277169544855, "val_perplexity": 115.66842778323385}
|
| 24 |
+
{"step": 23000, "val_loss": 4.5408862633011, "val_perplexity": 93.77387164345458}
|
| 25 |
+
{"step": 24000, "val_loss": 4.626865603316719, "val_perplexity": 102.19324741400429}
|
| 26 |
+
{"step": 25000, "val_loss": 4.394821035319295, "val_perplexity": 81.03012793404596}
|
| 27 |
+
{"step": 26000, "val_loss": 4.416422590605911, "val_perplexity": 82.79954702021597}
|
| 28 |
+
{"step": 27000, "val_loss": 4.198587345564109, "val_perplexity": 66.59219280761843}
|
| 29 |
+
{"step": 28000, "val_loss": 4.254920883379083, "val_perplexity": 70.45124310364031}
|
| 30 |
+
{"step": 29000, "val_loss": 4.302284657925353, "val_perplexity": 73.86836500793245}
|
| 31 |
+
{"step": 30000, "val_loss": 4.276957466818202, "val_perplexity": 72.02098009785887}
|
| 32 |
+
{"step": 31000, "val_loss": 4.406753761402186, "val_perplexity": 82.00283020815131}
|
| 33 |
+
{"step": 32000, "val_loss": 4.205136942708415, "val_perplexity": 67.02977627692627}
|
| 34 |
+
{"step": 33000, "val_loss": 4.102634137245701, "val_perplexity": 60.499441721214616}
|
| 35 |
+
{"step": 34000, "val_loss": 4.171452327630948, "val_perplexity": 64.80950843041387}
|
| 36 |
+
{"step": 35000, "val_loss": 4.118583370531243, "val_perplexity": 61.47209736835011}
|
| 37 |
+
{"step": 36000, "val_loss": 4.1824542649571095, "val_perplexity": 65.52647536097344}
|
| 38 |
+
{"step": 37000, "val_loss": 4.075685073161734, "val_perplexity": 58.890811286315625}
|
| 39 |
+
{"step": 38000, "val_loss": 4.151409095737444, "val_perplexity": 63.52344788285738}
|
| 40 |
+
{"step": 39000, "val_loss": 3.9426306016806545, "val_perplexity": 51.55404122566792}
|
| 41 |
+
{"step": 40000, "val_loss": 4.310517335069245, "val_perplexity": 74.47900957902556}
|
| 42 |
+
{"step": 41000, "val_loss": 4.230404457609435, "val_perplexity": 68.74503100264343}
|
| 43 |
+
{"step": 42000, "val_loss": 4.33667195374039, "val_perplexity": 76.45267743026194}
|
| 44 |
+
{"step": 43000, "val_loss": 4.080771993910926, "val_perplexity": 59.19114742160031}
|
| 45 |
+
{"step": 44000, "val_loss": 3.9873131050235813, "val_perplexity": 53.909844500064075}
|
| 46 |
+
{"step": 45000, "val_loss": 4.069335594124768, "val_perplexity": 58.51806992359993}
|
| 47 |
+
{"step": 46000, "val_loss": 4.031828113172339, "val_perplexity": 56.363856629300834}
|
| 48 |
+
{"step": 47000, "val_loss": 4.135724690271772, "val_perplexity": 62.53489308218226}
|
| 49 |
+
{"step": 48000, "val_loss": 4.002222927049615, "val_perplexity": 54.719652733498606}
|
| 50 |
+
{"step": 49000, "val_loss": 3.9879069924652724, "val_perplexity": 53.9418703886414}
|
| 51 |
+
{"step": 50000, "val_loss": 4.053920384941845, "val_perplexity": 57.62291883147486}
|
| 52 |
+
{"step": 51000, "val_loss": 4.105187354772434, "val_perplexity": 60.65410731961201}
|
| 53 |
+
{"step": 52000, "val_loss": 4.056863579886028, "val_perplexity": 57.792764136164145}
|
| 54 |
+
{"step": 53000, "val_loss": 4.119403709108201, "val_perplexity": 61.522545990874335}
|
| 55 |
+
{"step": 54000, "val_loss": 4.090256125644304, "val_perplexity": 59.75519458204465}
|
| 56 |
+
{"step": 55000, "val_loss": 4.107904635530045, "val_perplexity": 60.81914568454004}
|
| 57 |
+
{"step": 56000, "val_loss": 4.077356395272984, "val_perplexity": 58.98931909757918}
|
| 58 |
+
{"step": 57000, "val_loss": 4.026693084587032, "val_perplexity": 56.07516845968918}
|
| 59 |
+
{"step": 58000, "val_loss": 4.179727129246844, "val_perplexity": 65.3480192177114}
|
| 60 |
+
{"step": 59000, "val_loss": 4.025255821656441, "val_perplexity": 55.99463158892877}
|
| 61 |
+
{"step": 60000, "val_loss": 3.9968152983657355, "val_perplexity": 54.42454779789451}
|
| 62 |
+
{"step": 61000, "val_loss": 3.9376771968862543, "val_perplexity": 51.29930461831197}
|
| 63 |
+
{"step": 62000, "val_loss": 4.065776554449252, "val_perplexity": 58.310171968991185}
|
| 64 |
+
{"step": 63000, "val_loss": 4.071613242114527, "val_perplexity": 58.65150538975158}
|
| 65 |
+
{"step": 64000, "val_loss": 3.8766597564844205, "val_perplexity": 48.26273624040304}
|
| 66 |
+
{"step": 65000, "val_loss": 4.243556539555559, "val_perplexity": 69.65514309919587}
|
| 67 |
+
{"step": 66000, "val_loss": 3.8733662746738586, "val_perplexity": 48.10404526283507}
|
| 68 |
+
{"step": 67000, "val_loss": 4.050294651634518, "val_perplexity": 57.41437179183827}
|
| 69 |
+
{"step": 68000, "val_loss": 3.9392243543226995, "val_perplexity": 51.37873414808631}
|
| 70 |
+
{"step": 69000, "val_loss": 3.9842519464344903, "val_perplexity": 53.745070245269524}
|
| 71 |
+
{"step": 70000, "val_loss": 4.166442912837874, "val_perplexity": 64.48566253657818}
|
| 72 |
+
{"step": 71000, "val_loss": 4.036478777776186, "val_perplexity": 56.62659650622628}
|
| 73 |
+
{"step": 72000, "val_loss": 3.956797821632679, "val_perplexity": 52.28961689519616}
|
| 74 |
+
{"step": 73000, "val_loss": 4.031381442941624, "val_perplexity": 56.33868619431674}
|
| 75 |
+
{"step": 74000, "val_loss": 3.997959792226359, "val_perplexity": 54.48687201675631}
|
| 76 |
+
{"step": 75000, "val_loss": 3.8671833241564326, "val_perplexity": 47.8075379209929}
|
| 77 |
+
{"step": 76000, "val_loss": 4.035905570134692, "val_perplexity": 56.59414700943534}
|
| 78 |
+
{"step": 77000, "val_loss": 4.123749804532546, "val_perplexity": 61.79051072486291}
|
| 79 |
+
{"step": 78000, "val_loss": 4.219275814703311, "val_perplexity": 67.98423327599707}
|
| 80 |
+
{"step": 79000, "val_loss": 3.8975399326479034, "val_perplexity": 49.28106512097807}
|
| 81 |
+
{"step": 80000, "val_loss": 4.055938215002887, "val_perplexity": 57.739309478079036}
|
| 82 |
+
{"step": 81000, "val_loss": 4.131514830730032, "val_perplexity": 62.27218333940566}
|
| 83 |
+
{"step": 82000, "val_loss": 3.9476103534574447, "val_perplexity": 51.81140783312622}
|
| 84 |
+
{"step": 83000, "val_loss": 4.083147538787666, "val_perplexity": 59.33192579507446}
|
| 85 |
+
{"step": 84000, "val_loss": 4.184608652091969, "val_perplexity": 65.66779693251095}
|
| 86 |
+
{"step": 85000, "val_loss": 4.029318009214797, "val_perplexity": 56.222554904912805}
|
| 87 |
+
{"step": 86000, "val_loss": 4.10216541156702, "val_perplexity": 60.47109072427321}
|
| 88 |
+
{"step": 87000, "val_loss": 3.8234665442729603, "val_perplexity": 45.762571656196144}
|
| 89 |
+
{"step": 88000, "val_loss": 4.149154015217142, "val_perplexity": 63.38035879230338}
|
| 90 |
+
{"step": 89000, "val_loss": 4.110245572023835, "val_perplexity": 60.961686216267104}
|
| 91 |
+
{"step": 90000, "val_loss": 4.012419129085875, "val_perplexity": 55.280439457097486}
|
| 92 |
+
{"step": 91000, "val_loss": 4.102249706787369, "val_perplexity": 60.47618836304082}
|
| 93 |
+
{"step": 92000, "val_loss": 4.098241672926155, "val_perplexity": 60.23428285884811}
|
| 94 |
+
{"step": 93000, "val_loss": 4.095214923421641, "val_perplexity": 60.05224440442561}
|
| 95 |
+
{"step": 94000, "val_loss": 3.8456500436497545, "val_perplexity": 46.789089417985096}
|
| 96 |
+
{"step": 95000, "val_loss": 3.939535648361214, "val_perplexity": 51.39473053139323}
|
| 97 |
+
{"step": 96000, "val_loss": 3.749884278730132, "val_perplexity": 42.51616169115527}
|
| 98 |
+
{"step": 97000, "val_loss": 4.167497279943378, "val_perplexity": 64.55368995456718}
|
| 99 |
+
{"step": 98000, "val_loss": 4.189003613067902, "val_perplexity": 65.95703947667566}
|
| 100 |
+
{"step": 99000, "val_loss": 3.989953420590376, "val_perplexity": 54.052371577131716}
|
| 101 |
+
{"training_complete": true, "final_step": 100000}
|