magatex commited on
Commit
283cc94
·
verified ·
1 Parent(s): 03f6cb4

Training in progress, step 279, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d03be7cd88f841328a915192c41c5230d9dca513190e2fe330b82acac365c837
3
  size 78480072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3e4eb2b85b34775dda40d612d8e0e0fef4dd234a5021de12dd66d4d842bfe2e
3
  size 78480072
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:def033ca616a262ca52681afe9bcce7f1179e5df7160ed8d9aecca176fb57a66
3
  size 157104826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fc12d556dd8950cac42d8a8e2a85b02939dc0486f0de7537f7dd46d2022b6e7
3
  size 157104826
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07d39337f30b730c860666e174e427e0055432130190b901f313c33485b2842d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abf148214261af1951ab5cd5cec16e89c9a503c9a6d288ec5a3ee096d6ee1f54
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f62a2b7b80424390be488e4fe1a2355752b2472c2cc007b9b58ab8f8c8cce220
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df88d9a2fa8f6aa22628bd0a19f8f680f15f56bd367aca8ac1af6d2eda14e4e8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.49775421619415283,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-250",
4
- "epoch": 3.5906642728904847,
5
  "eval_steps": 25,
6
- "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1845,6 +1845,217 @@
1845
  "eval_samples_per_second": 15.333,
1846
  "eval_steps_per_second": 2.147,
1847
  "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1848
  }
1849
  ],
1850
  "logging_steps": 1,
@@ -1868,12 +2079,12 @@
1868
  "should_evaluate": false,
1869
  "should_log": false,
1870
  "should_save": true,
1871
- "should_training_stop": false
1872
  },
1873
  "attributes": {}
1874
  }
1875
  },
1876
- "total_flos": 7.866513605473075e+17,
1877
  "train_batch_size": 8,
1878
  "trial_name": null,
1879
  "trial_params": null
 
1
  {
2
  "best_metric": 0.49775421619415283,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-250",
4
+ "epoch": 4.007181328545781,
5
  "eval_steps": 25,
6
+ "global_step": 279,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1845
  "eval_samples_per_second": 15.333,
1846
  "eval_steps_per_second": 2.147,
1847
  "step": 250
1848
+ },
1849
+ {
1850
+ "epoch": 3.6050269299820465,
1851
+ "grad_norm": 3.0492210388183594,
1852
+ "learning_rate": 1.2438224123471442e-05,
1853
+ "loss": 0.3956,
1854
+ "step": 251
1855
+ },
1856
+ {
1857
+ "epoch": 3.6193895870736084,
1858
+ "grad_norm": 2.8214497566223145,
1859
+ "learning_rate": 1.2268627707128812e-05,
1860
+ "loss": 0.3482,
1861
+ "step": 252
1862
+ },
1863
+ {
1864
+ "epoch": 3.6337522441651706,
1865
+ "grad_norm": 2.3415520191192627,
1866
+ "learning_rate": 1.210499173001039e-05,
1867
+ "loss": 0.3028,
1868
+ "step": 253
1869
+ },
1870
+ {
1871
+ "epoch": 3.6481149012567324,
1872
+ "grad_norm": 2.5095021724700928,
1873
+ "learning_rate": 1.194733901708629e-05,
1874
+ "loss": 0.2931,
1875
+ "step": 254
1876
+ },
1877
+ {
1878
+ "epoch": 3.6624775583482947,
1879
+ "grad_norm": 2.4495046138763428,
1880
+ "learning_rate": 1.1795691558743535e-05,
1881
+ "loss": 0.2587,
1882
+ "step": 255
1883
+ },
1884
+ {
1885
+ "epoch": 3.6768402154398565,
1886
+ "grad_norm": 2.3127005100250244,
1887
+ "learning_rate": 1.1650070507718633e-05,
1888
+ "loss": 0.2124,
1889
+ "step": 256
1890
+ },
1891
+ {
1892
+ "epoch": 3.6912028725314183,
1893
+ "grad_norm": 2.227742910385132,
1894
+ "learning_rate": 1.1510496176147086e-05,
1895
+ "loss": 0.1828,
1896
+ "step": 257
1897
+ },
1898
+ {
1899
+ "epoch": 3.70556552962298,
1900
+ "grad_norm": 1.9458461999893188,
1901
+ "learning_rate": 1.1376988032730134e-05,
1902
+ "loss": 0.1345,
1903
+ "step": 258
1904
+ },
1905
+ {
1906
+ "epoch": 3.719928186714542,
1907
+ "grad_norm": 1.859049916267395,
1908
+ "learning_rate": 1.1249564700019125e-05,
1909
+ "loss": 0.0947,
1910
+ "step": 259
1911
+ },
1912
+ {
1913
+ "epoch": 3.734290843806104,
1914
+ "grad_norm": 1.2114741802215576,
1915
+ "learning_rate": 1.1128243951817937e-05,
1916
+ "loss": 0.1815,
1917
+ "step": 260
1918
+ },
1919
+ {
1920
+ "epoch": 3.748653500897666,
1921
+ "grad_norm": 2.4042224884033203,
1922
+ "learning_rate": 1.1013042710703766e-05,
1923
+ "loss": 0.8597,
1924
+ "step": 261
1925
+ },
1926
+ {
1927
+ "epoch": 3.7630161579892283,
1928
+ "grad_norm": 2.3829939365386963,
1929
+ "learning_rate": 1.0903977045666654e-05,
1930
+ "loss": 0.7914,
1931
+ "step": 262
1932
+ },
1933
+ {
1934
+ "epoch": 3.77737881508079,
1935
+ "grad_norm": 2.3067331314086914,
1936
+ "learning_rate": 1.0801062169868106e-05,
1937
+ "loss": 0.5984,
1938
+ "step": 263
1939
+ },
1940
+ {
1941
+ "epoch": 3.791741472172352,
1942
+ "grad_norm": 2.29392671585083,
1943
+ "learning_rate": 1.0704312438519058e-05,
1944
+ "loss": 0.531,
1945
+ "step": 264
1946
+ },
1947
+ {
1948
+ "epoch": 3.8061041292639137,
1949
+ "grad_norm": 2.4506468772888184,
1950
+ "learning_rate": 1.0613741346877497e-05,
1951
+ "loss": 0.4849,
1952
+ "step": 265
1953
+ },
1954
+ {
1955
+ "epoch": 3.8204667863554755,
1956
+ "grad_norm": 2.338438034057617,
1957
+ "learning_rate": 1.0529361528366089e-05,
1958
+ "loss": 0.4381,
1959
+ "step": 266
1960
+ },
1961
+ {
1962
+ "epoch": 3.834829443447038,
1963
+ "grad_norm": 2.8582491874694824,
1964
+ "learning_rate": 1.0451184752809978e-05,
1965
+ "loss": 0.4172,
1966
+ "step": 267
1967
+ },
1968
+ {
1969
+ "epoch": 3.8491921005385996,
1970
+ "grad_norm": 2.70757794380188,
1971
+ "learning_rate": 1.0379221924795052e-05,
1972
+ "loss": 0.389,
1973
+ "step": 268
1974
+ },
1975
+ {
1976
+ "epoch": 3.8635547576301614,
1977
+ "grad_norm": 2.733470916748047,
1978
+ "learning_rate": 1.031348308214692e-05,
1979
+ "loss": 0.3339,
1980
+ "step": 269
1981
+ },
1982
+ {
1983
+ "epoch": 3.8779174147217237,
1984
+ "grad_norm": 2.574880361557007,
1985
+ "learning_rate": 1.0253977394530751e-05,
1986
+ "loss": 0.2985,
1987
+ "step": 270
1988
+ },
1989
+ {
1990
+ "epoch": 3.8922800718132855,
1991
+ "grad_norm": 2.384779453277588,
1992
+ "learning_rate": 1.0200713162172245e-05,
1993
+ "loss": 0.313,
1994
+ "step": 271
1995
+ },
1996
+ {
1997
+ "epoch": 3.9066427289048473,
1998
+ "grad_norm": 2.192976713180542,
1999
+ "learning_rate": 1.0153697814699859e-05,
2000
+ "loss": 0.2477,
2001
+ "step": 272
2002
+ },
2003
+ {
2004
+ "epoch": 3.921005385996409,
2005
+ "grad_norm": 2.00533127784729,
2006
+ "learning_rate": 1.0112937910108495e-05,
2007
+ "loss": 0.2028,
2008
+ "step": 273
2009
+ },
2010
+ {
2011
+ "epoch": 3.9353680430879714,
2012
+ "grad_norm": 1.7238686084747314,
2013
+ "learning_rate": 1.0078439133844709e-05,
2014
+ "loss": 0.1615,
2015
+ "step": 274
2016
+ },
2017
+ {
2018
+ "epoch": 3.949730700179533,
2019
+ "grad_norm": 1.5112303495407104,
2020
+ "learning_rate": 1.0050206298013732e-05,
2021
+ "loss": 0.13,
2022
+ "step": 275
2023
+ },
2024
+ {
2025
+ "epoch": 3.949730700179533,
2026
+ "eval_loss": 0.46412649750709534,
2027
+ "eval_runtime": 3.284,
2028
+ "eval_samples_per_second": 15.225,
2029
+ "eval_steps_per_second": 2.132,
2030
+ "step": 275
2031
+ },
2032
+ {
2033
+ "epoch": 3.964093357271095,
2034
+ "grad_norm": 1.2303223609924316,
2035
+ "learning_rate": 1.0028243340708174e-05,
2036
+ "loss": 0.097,
2037
+ "step": 276
2038
+ },
2039
+ {
2040
+ "epoch": 3.9784560143626573,
2041
+ "grad_norm": 1.2757847309112549,
2042
+ "learning_rate": 1.0012553325458767e-05,
2043
+ "loss": 0.2561,
2044
+ "step": 277
2045
+ },
2046
+ {
2047
+ "epoch": 3.992818671454219,
2048
+ "grad_norm": 1.8387738466262817,
2049
+ "learning_rate": 1.0003138440807033e-05,
2050
+ "loss": 0.3984,
2051
+ "step": 278
2052
+ },
2053
+ {
2054
+ "epoch": 4.007181328545781,
2055
+ "grad_norm": 1.8089104890823364,
2056
+ "learning_rate": 1e-05,
2057
+ "loss": 0.5447,
2058
+ "step": 279
2059
  }
2060
  ],
2061
  "logging_steps": 1,
 
2079
  "should_evaluate": false,
2080
  "should_log": false,
2081
  "should_save": true,
2082
+ "should_training_stop": true
2083
  },
2084
  "attributes": {}
2085
  }
2086
  },
2087
+ "total_flos": 8.77822740680147e+17,
2088
  "train_batch_size": 8,
2089
  "trial_name": null,
2090
  "trial_params": null