NairaRahim commited on
Commit
fa9267f
·
verified ·
1 Parent(s): f69270c

Training in progress, epoch 20, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca9c0fa45ca737106eab6be7425caef617a753946ed0508c1aef1f3a4291004d
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f875ff0bfa8c3f03718200317018a9c1320ef659ed8be49eb8d1545f90dca2b
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acbe2b1c3985ee1c5128707c924ffa1b789c4d55f3b36a9e00d5043900ec85eb
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4edde3d63fc51cb76d2b8798e35123bee17f10b37ac6770074f24fbb1849dc32
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f0baebfc08807f25c4a6326e1681bcfbdd8c24e4c42d43ef5df074269e679b0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b0066a3b21610aa70bfcf0b5c4ca5da7f43ab12c9e601ab15813e745474a36d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0854ef6140ae5c2ba277188e63d5e8a4b0a0fea517aba028586326cadbf26b4b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8c5b80067b711daea816f97793263fb6b6d08534034a3999a4ce7590fa85de8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 34.54485321044922,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-20880",
4
- "epoch": 19.0,
5
  "eval_steps": 500,
6
- "global_step": 24795,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1888,6 +1888,112 @@
1888
  "eval_samples_per_second": 26.464,
1889
  "eval_steps_per_second": 3.326,
1890
  "step": 24795
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1891
  }
1892
  ],
1893
  "logging_steps": 100,
@@ -1902,7 +2008,7 @@
1902
  "early_stopping_threshold": 0.0
1903
  },
1904
  "attributes": {
1905
- "early_stopping_patience_counter": 3
1906
  }
1907
  },
1908
  "TrainerControl": {
@@ -1916,7 +2022,7 @@
1916
  "attributes": {}
1917
  }
1918
  },
1919
- "total_flos": 2.673890715789619e+16,
1920
  "train_batch_size": 8,
1921
  "trial_name": null,
1922
  "trial_params": null
 
1
  {
2
  "best_metric": 34.54485321044922,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-20880",
4
+ "epoch": 20.0,
5
  "eval_steps": 500,
6
+ "global_step": 26100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1888
  "eval_samples_per_second": 26.464,
1889
  "eval_steps_per_second": 3.326,
1890
  "step": 24795
1891
+ },
1892
+ {
1893
+ "epoch": 19.00383141762452,
1894
+ "grad_norm": 3.04927659034729,
1895
+ "learning_rate": 3.8127873563218394e-05,
1896
+ "loss": 33.7055,
1897
+ "step": 24800
1898
+ },
1899
+ {
1900
+ "epoch": 19.080459770114942,
1901
+ "grad_norm": 2.725443124771118,
1902
+ "learning_rate": 3.8079980842911874e-05,
1903
+ "loss": 33.5355,
1904
+ "step": 24900
1905
+ },
1906
+ {
1907
+ "epoch": 19.157088122605366,
1908
+ "grad_norm": 3.853895425796509,
1909
+ "learning_rate": 3.803208812260536e-05,
1910
+ "loss": 33.5267,
1911
+ "step": 25000
1912
+ },
1913
+ {
1914
+ "epoch": 19.233716475095786,
1915
+ "grad_norm": 2.666419267654419,
1916
+ "learning_rate": 3.798419540229885e-05,
1917
+ "loss": 33.4069,
1918
+ "step": 25100
1919
+ },
1920
+ {
1921
+ "epoch": 19.310344827586206,
1922
+ "grad_norm": 3.5618317127227783,
1923
+ "learning_rate": 3.793630268199234e-05,
1924
+ "loss": 33.7295,
1925
+ "step": 25200
1926
+ },
1927
+ {
1928
+ "epoch": 19.386973180076627,
1929
+ "grad_norm": 3.351062297821045,
1930
+ "learning_rate": 3.788840996168583e-05,
1931
+ "loss": 33.1994,
1932
+ "step": 25300
1933
+ },
1934
+ {
1935
+ "epoch": 19.46360153256705,
1936
+ "grad_norm": 3.3226547241210938,
1937
+ "learning_rate": 3.7840996168582374e-05,
1938
+ "loss": 33.3149,
1939
+ "step": 25400
1940
+ },
1941
+ {
1942
+ "epoch": 19.54022988505747,
1943
+ "grad_norm": 4.15867805480957,
1944
+ "learning_rate": 3.779310344827586e-05,
1945
+ "loss": 33.5592,
1946
+ "step": 25500
1947
+ },
1948
+ {
1949
+ "epoch": 19.61685823754789,
1950
+ "grad_norm": 2.333674430847168,
1951
+ "learning_rate": 3.774521072796935e-05,
1952
+ "loss": 33.7336,
1953
+ "step": 25600
1954
+ },
1955
+ {
1956
+ "epoch": 19.693486590038315,
1957
+ "grad_norm": 2.9516782760620117,
1958
+ "learning_rate": 3.7697318007662834e-05,
1959
+ "loss": 33.3228,
1960
+ "step": 25700
1961
+ },
1962
+ {
1963
+ "epoch": 19.770114942528735,
1964
+ "grad_norm": 1.734508991241455,
1965
+ "learning_rate": 3.764942528735632e-05,
1966
+ "loss": 33.3216,
1967
+ "step": 25800
1968
+ },
1969
+ {
1970
+ "epoch": 19.846743295019156,
1971
+ "grad_norm": 2.4886648654937744,
1972
+ "learning_rate": 3.760153256704981e-05,
1973
+ "loss": 33.5157,
1974
+ "step": 25900
1975
+ },
1976
+ {
1977
+ "epoch": 19.92337164750958,
1978
+ "grad_norm": 3.6624252796173096,
1979
+ "learning_rate": 3.75536398467433e-05,
1980
+ "loss": 33.2399,
1981
+ "step": 26000
1982
+ },
1983
+ {
1984
+ "epoch": 20.0,
1985
+ "grad_norm": 4.810445785522461,
1986
+ "learning_rate": 3.750574712643679e-05,
1987
+ "loss": 32.548,
1988
+ "step": 26100
1989
+ },
1990
+ {
1991
+ "epoch": 20.0,
1992
+ "eval_loss": 34.746856689453125,
1993
+ "eval_runtime": 49.2861,
1994
+ "eval_samples_per_second": 26.478,
1995
+ "eval_steps_per_second": 3.328,
1996
+ "step": 26100
1997
  }
1998
  ],
1999
  "logging_steps": 100,
 
2008
  "early_stopping_threshold": 0.0
2009
  },
2010
  "attributes": {
2011
+ "early_stopping_patience_counter": 4
2012
  }
2013
  },
2014
  "TrainerControl": {
 
2022
  "attributes": {}
2023
  }
2024
  },
2025
+ "total_flos": 2.814621806094336e+16,
2026
  "train_batch_size": 8,
2027
  "trial_name": null,
2028
  "trial_params": null