Marcus2112 commited on
Commit
e6f47ee
·
verified ·
1 Parent(s): e920a1c

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. final.pt +3 -0
  2. logs/metrics.jsonl +101 -0
final.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c41a612f22a2ad5a741a1d9f1cbb275bb2f87e7037bf2876f83663150792c06a
3
+ size 316094327
logs/metrics.jsonl ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"step": 0, "val_loss": 10.822597044238215, "val_perplexity": 50141.13681333732}
2
+ {"step": 1000, "val_loss": 10.692986014129044, "val_perplexity": 44045.832307956705}
3
+ {"step": 2000, "val_loss": 10.462919051078273, "val_perplexity": 34993.55046554394}
4
+ {"step": 3000, "val_loss": 9.768150601045916, "val_perplexity": 17468.431228064746}
5
+ {"step": 4000, "val_loss": 8.018009468219828, "val_perplexity": 3035.129793719384}
6
+ {"step": 5000, "val_loss": 7.398580942110994, "val_perplexity": 1633.664519816975}
7
+ {"step": 6000, "val_loss": 7.041477884633235, "val_perplexity": 1143.0756929245154}
8
+ {"step": 7000, "val_loss": 6.930540378478958, "val_perplexity": 1023.0466626796535}
9
+ {"step": 8000, "val_loss": 6.771931521113721, "val_perplexity": 872.9964776914119}
10
+ {"step": 9000, "val_loss": 6.562404041948648, "val_perplexity": 707.971643932348}
11
+ {"step": 10000, "val_loss": 6.426421488446555, "val_perplexity": 617.9586145369051}
12
+ {"step": 11000, "val_loss": 6.265651967419333, "val_perplexity": 526.1845293229039}
13
+ {"step": 12000, "val_loss": 6.065694331884265, "val_perplexity": 430.8217070950238}
14
+ {"step": 13000, "val_loss": 5.962106660343874, "val_perplexity": 388.42754783002056}
15
+ {"step": 14000, "val_loss": 5.780746974248538, "val_perplexity": 324.0011205417491}
16
+ {"step": 15000, "val_loss": 5.587262044375154, "val_perplexity": 267.00357407922075}
17
+ {"step": 16000, "val_loss": 5.580513893573984, "val_perplexity": 265.20785938801276}
18
+ {"step": 17000, "val_loss": 5.371946824079994, "val_perplexity": 215.28157535189797}
19
+ {"step": 18000, "val_loss": 5.174755851646374, "val_perplexity": 176.7534549429846}
20
+ {"step": 19000, "val_loss": 5.019962137135463, "val_perplexity": 151.40557103689775}
21
+ {"step": 20000, "val_loss": 4.868022671098886, "val_perplexity": 130.0634841795119}
22
+ {"step": 21000, "val_loss": 4.747262549793917, "val_perplexity": 115.26831098192787}
23
+ {"step": 22000, "val_loss": 4.7507277169544855, "val_perplexity": 115.66842778323385}
24
+ {"step": 23000, "val_loss": 4.5408862633011, "val_perplexity": 93.77387164345458}
25
+ {"step": 24000, "val_loss": 4.626865603316719, "val_perplexity": 102.19324741400429}
26
+ {"step": 25000, "val_loss": 4.394821035319295, "val_perplexity": 81.03012793404596}
27
+ {"step": 26000, "val_loss": 4.416422590605911, "val_perplexity": 82.79954702021597}
28
+ {"step": 27000, "val_loss": 4.198587345564109, "val_perplexity": 66.59219280761843}
29
+ {"step": 28000, "val_loss": 4.254920883379083, "val_perplexity": 70.45124310364031}
30
+ {"step": 29000, "val_loss": 4.302284657925353, "val_perplexity": 73.86836500793245}
31
+ {"step": 30000, "val_loss": 4.276957466818202, "val_perplexity": 72.02098009785887}
32
+ {"step": 31000, "val_loss": 4.406753761402186, "val_perplexity": 82.00283020815131}
33
+ {"step": 32000, "val_loss": 4.205136942708415, "val_perplexity": 67.02977627692627}
34
+ {"step": 33000, "val_loss": 4.102634137245701, "val_perplexity": 60.499441721214616}
35
+ {"step": 34000, "val_loss": 4.171452327630948, "val_perplexity": 64.80950843041387}
36
+ {"step": 35000, "val_loss": 4.118583370531243, "val_perplexity": 61.47209736835011}
37
+ {"step": 36000, "val_loss": 4.1824542649571095, "val_perplexity": 65.52647536097344}
38
+ {"step": 37000, "val_loss": 4.075685073161734, "val_perplexity": 58.890811286315625}
39
+ {"step": 38000, "val_loss": 4.151409095737444, "val_perplexity": 63.52344788285738}
40
+ {"step": 39000, "val_loss": 3.9426306016806545, "val_perplexity": 51.55404122566792}
41
+ {"step": 40000, "val_loss": 4.310517335069245, "val_perplexity": 74.47900957902556}
42
+ {"step": 41000, "val_loss": 4.230404457609435, "val_perplexity": 68.74503100264343}
43
+ {"step": 42000, "val_loss": 4.33667195374039, "val_perplexity": 76.45267743026194}
44
+ {"step": 43000, "val_loss": 4.080771993910926, "val_perplexity": 59.19114742160031}
45
+ {"step": 44000, "val_loss": 3.9873131050235813, "val_perplexity": 53.909844500064075}
46
+ {"step": 45000, "val_loss": 4.069335594124768, "val_perplexity": 58.51806992359993}
47
+ {"step": 46000, "val_loss": 4.031828113172339, "val_perplexity": 56.363856629300834}
48
+ {"step": 47000, "val_loss": 4.135724690271772, "val_perplexity": 62.53489308218226}
49
+ {"step": 48000, "val_loss": 4.002222927049615, "val_perplexity": 54.719652733498606}
50
+ {"step": 49000, "val_loss": 3.9879069924652724, "val_perplexity": 53.9418703886414}
51
+ {"step": 50000, "val_loss": 4.053920384941845, "val_perplexity": 57.62291883147486}
52
+ {"step": 51000, "val_loss": 4.105187354772434, "val_perplexity": 60.65410731961201}
53
+ {"step": 52000, "val_loss": 4.056863579886028, "val_perplexity": 57.792764136164145}
54
+ {"step": 53000, "val_loss": 4.119403709108201, "val_perplexity": 61.522545990874335}
55
+ {"step": 54000, "val_loss": 4.090256125644304, "val_perplexity": 59.75519458204465}
56
+ {"step": 55000, "val_loss": 4.107904635530045, "val_perplexity": 60.81914568454004}
57
+ {"step": 56000, "val_loss": 4.077356395272984, "val_perplexity": 58.98931909757918}
58
+ {"step": 57000, "val_loss": 4.026693084587032, "val_perplexity": 56.07516845968918}
59
+ {"step": 58000, "val_loss": 4.179727129246844, "val_perplexity": 65.3480192177114}
60
+ {"step": 59000, "val_loss": 4.025255821656441, "val_perplexity": 55.99463158892877}
61
+ {"step": 60000, "val_loss": 3.9968152983657355, "val_perplexity": 54.42454779789451}
62
+ {"step": 61000, "val_loss": 3.9376771968862543, "val_perplexity": 51.29930461831197}
63
+ {"step": 62000, "val_loss": 4.065776554449252, "val_perplexity": 58.310171968991185}
64
+ {"step": 63000, "val_loss": 4.071613242114527, "val_perplexity": 58.65150538975158}
65
+ {"step": 64000, "val_loss": 3.8766597564844205, "val_perplexity": 48.26273624040304}
66
+ {"step": 65000, "val_loss": 4.243556539555559, "val_perplexity": 69.65514309919587}
67
+ {"step": 66000, "val_loss": 3.8733662746738586, "val_perplexity": 48.10404526283507}
68
+ {"step": 67000, "val_loss": 4.050294651634518, "val_perplexity": 57.41437179183827}
69
+ {"step": 68000, "val_loss": 3.9392243543226995, "val_perplexity": 51.37873414808631}
70
+ {"step": 69000, "val_loss": 3.9842519464344903, "val_perplexity": 53.745070245269524}
71
+ {"step": 70000, "val_loss": 4.166442912837874, "val_perplexity": 64.48566253657818}
72
+ {"step": 71000, "val_loss": 4.036478777776186, "val_perplexity": 56.62659650622628}
73
+ {"step": 72000, "val_loss": 3.956797821632679, "val_perplexity": 52.28961689519616}
74
+ {"step": 73000, "val_loss": 4.031381442941624, "val_perplexity": 56.33868619431674}
75
+ {"step": 74000, "val_loss": 3.997959792226359, "val_perplexity": 54.48687201675631}
76
+ {"step": 75000, "val_loss": 3.8671833241564326, "val_perplexity": 47.8075379209929}
77
+ {"step": 76000, "val_loss": 4.035905570134692, "val_perplexity": 56.59414700943534}
78
+ {"step": 77000, "val_loss": 4.123749804532546, "val_perplexity": 61.79051072486291}
79
+ {"step": 78000, "val_loss": 4.219275814703311, "val_perplexity": 67.98423327599707}
80
+ {"step": 79000, "val_loss": 3.8975399326479034, "val_perplexity": 49.28106512097807}
81
+ {"step": 80000, "val_loss": 4.055938215002887, "val_perplexity": 57.739309478079036}
82
+ {"step": 81000, "val_loss": 4.131514830730032, "val_perplexity": 62.27218333940566}
83
+ {"step": 82000, "val_loss": 3.9476103534574447, "val_perplexity": 51.81140783312622}
84
+ {"step": 83000, "val_loss": 4.083147538787666, "val_perplexity": 59.33192579507446}
85
+ {"step": 84000, "val_loss": 4.184608652091969, "val_perplexity": 65.66779693251095}
86
+ {"step": 85000, "val_loss": 4.029318009214797, "val_perplexity": 56.222554904912805}
87
+ {"step": 86000, "val_loss": 4.10216541156702, "val_perplexity": 60.47109072427321}
88
+ {"step": 87000, "val_loss": 3.8234665442729603, "val_perplexity": 45.762571656196144}
89
+ {"step": 88000, "val_loss": 4.149154015217142, "val_perplexity": 63.38035879230338}
90
+ {"step": 89000, "val_loss": 4.110245572023835, "val_perplexity": 60.961686216267104}
91
+ {"step": 90000, "val_loss": 4.012419129085875, "val_perplexity": 55.280439457097486}
92
+ {"step": 91000, "val_loss": 4.102249706787369, "val_perplexity": 60.47618836304082}
93
+ {"step": 92000, "val_loss": 4.098241672926155, "val_perplexity": 60.23428285884811}
94
+ {"step": 93000, "val_loss": 4.095214923421641, "val_perplexity": 60.05224440442561}
95
+ {"step": 94000, "val_loss": 3.8456500436497545, "val_perplexity": 46.789089417985096}
96
+ {"step": 95000, "val_loss": 3.939535648361214, "val_perplexity": 51.39473053139323}
97
+ {"step": 96000, "val_loss": 3.749884278730132, "val_perplexity": 42.51616169115527}
98
+ {"step": 97000, "val_loss": 4.167497279943378, "val_perplexity": 64.55368995456718}
99
+ {"step": 98000, "val_loss": 4.189003613067902, "val_perplexity": 65.95703947667566}
100
+ {"step": 99000, "val_loss": 3.989953420590376, "val_perplexity": 54.052371577131716}
101
+ {"training_complete": true, "final_step": 100000}