FormlessAI commited on
Commit
3f0eb2e
·
verified ·
1 Parent(s): 38ec5b3

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b57425fe0979d64ee2759d723efa2d4501cd5b152cfaae9163875752c74cb054
3
  size 1172343536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2120d5d536aba10e00501ceedc3558e455e3cff895955ef8f273753d39d93536
3
  size 1172343536
last-checkpoint/global_step600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71c75215f949f064e98b107405fa6aae40b5205b30cabfe2cde3899256528d4f
3
+ size 883824229
last-checkpoint/global_step600/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a1b461d1862ca0a9cafbdae2cb669945757227de55fa3ce0c1d781ae3f90421
3
+ size 883824293
last-checkpoint/global_step600/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fe3bf8b7feff582e268b6267f68b2540e2f1cf395d73dc27afed7f0c4483117
3
+ size 883824293
last-checkpoint/global_step600/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c6fa1a128376e603e0299556dbc7b04b8d834855f8748dc77f062b5648f0a2e
3
+ size 883824293
last-checkpoint/global_step600/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cd543b383fc20bec50c96fdce70c3321fd1820efd8785aee965509ca605487b
3
+ size 1172522073
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step550
 
1
+ global_step600
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed8ed81af49ff602ccd13d5e84967bee6bca6a78ea6d7206a2104463b69a1059
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4894c218b3f6eaf3b9761899ca66cc4ee052559eaf58bed0eb77d1f141f5a8f8
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f53e27b1f587841700b654201a4c9a9680a6d49a754e25cd2a82432e613117b4
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e35dc37a61e3442d3a3c91b1def510a65866249fe0f6bfe143097becbb018fdc
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db90f184c98dee2e965ee13d085db00953c6b696c1ef3da2b032f72d72e768a0
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cee7319258b43ce62816538b7f06b4a5ae5b8b56e7ea61d662ecb9ed3402c92a
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d32bc21799c1730e164d6b71e088e1ad7f9f709b9172577b8ff961df3d429692
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aed48390c7aa15be53030fdcd4b9104f35ff8b16f59f6cd4b6566c973f83388a
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a96d6b66528c6387aa2a111a67d7f07cc9d6463a1b49e77f2099de0ba990f1c
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b643f96fae1c7c195d82363db91efd66b514c2fc5280977aad9c8846720b5046
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.03315730020403862,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.4777415852334419,
6
  "eval_steps": 50,
7
- "global_step": 550,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1834,6 +1834,172 @@
1834
  "eval_samples_per_second": 45.331,
1835
  "eval_steps_per_second": 2.841,
1836
  "step": 550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1837
  }
1838
  ],
1839
  "logging_steps": 5,
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.030555352568626404,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.5211726384364821,
6
  "eval_steps": 50,
7
+ "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1834
  "eval_samples_per_second": 45.331,
1835
  "eval_steps_per_second": 2.841,
1836
  "step": 550
1837
+ },
1838
+ {
1839
+ "epoch": 0.4820846905537459,
1840
+ "grad_norm": 0.8903060555458069,
1841
+ "learning_rate": 2.5856192706887134e-05,
1842
+ "logits/chosen": -0.6571289300918579,
1843
+ "logits/rejected": -1.188867211341858,
1844
+ "logps/chosen": -96.125,
1845
+ "logps/rejected": -192.35000610351562,
1846
+ "loss": 0.0269,
1847
+ "rewards/accuracies": 0.9906250238418579,
1848
+ "rewards/chosen": -1.5890381336212158,
1849
+ "rewards/margins": 13.0625,
1850
+ "rewards/rejected": -14.651562690734863,
1851
+ "step": 555
1852
+ },
1853
+ {
1854
+ "epoch": 0.48642779587404994,
1855
+ "grad_norm": 1.1956876516342163,
1856
+ "learning_rate": 2.5853216177644404e-05,
1857
+ "logits/chosen": -0.7413085699081421,
1858
+ "logits/rejected": -1.279687523841858,
1859
+ "logps/chosen": -98.73750305175781,
1860
+ "logps/rejected": -205.35000610351562,
1861
+ "loss": 0.043,
1862
+ "rewards/accuracies": 0.9859374761581421,
1863
+ "rewards/chosen": -2.1949219703674316,
1864
+ "rewards/margins": 14.765625,
1865
+ "rewards/rejected": -16.9609375,
1866
+ "step": 560
1867
+ },
1868
+ {
1869
+ "epoch": 0.49077090119435396,
1870
+ "grad_norm": 2.65529727935791,
1871
+ "learning_rate": 2.5850207278896458e-05,
1872
+ "logits/chosen": -0.840527355670929,
1873
+ "logits/rejected": -1.378515601158142,
1874
+ "logps/chosen": -98.76249694824219,
1875
+ "logps/rejected": -209.47500610351562,
1876
+ "loss": 0.0498,
1877
+ "rewards/accuracies": 0.9859374761581421,
1878
+ "rewards/chosen": -2.4097657203674316,
1879
+ "rewards/margins": 15.260937690734863,
1880
+ "rewards/rejected": -17.678125381469727,
1881
+ "step": 565
1882
+ },
1883
+ {
1884
+ "epoch": 0.495114006514658,
1885
+ "grad_norm": 0.81071537733078,
1886
+ "learning_rate": 2.5847166018218197e-05,
1887
+ "logits/chosen": -0.894726574420929,
1888
+ "logits/rejected": -1.419531226158142,
1889
+ "logps/chosen": -98.92500305175781,
1890
+ "logps/rejected": -204.97500610351562,
1891
+ "loss": 0.0342,
1892
+ "rewards/accuracies": 0.984375,
1893
+ "rewards/chosen": -2.265429735183716,
1894
+ "rewards/margins": 14.667187690734863,
1895
+ "rewards/rejected": -16.9296875,
1896
+ "step": 570
1897
+ },
1898
+ {
1899
+ "epoch": 0.499457111834962,
1900
+ "grad_norm": 0.6797189116477966,
1901
+ "learning_rate": 2.584409240326599e-05,
1902
+ "logits/chosen": -0.8995116949081421,
1903
+ "logits/rejected": -1.4072265625,
1904
+ "logps/chosen": -94.38749694824219,
1905
+ "logps/rejected": -199.77499389648438,
1906
+ "loss": 0.0158,
1907
+ "rewards/accuracies": 0.996874988079071,
1908
+ "rewards/chosen": -1.7000000476837158,
1909
+ "rewards/margins": 14.209375381469727,
1910
+ "rewards/rejected": -15.912500381469727,
1911
+ "step": 575
1912
+ },
1913
+ {
1914
+ "epoch": 0.503800217155266,
1915
+ "grad_norm": 0.652454674243927,
1916
+ "learning_rate": 2.5840986441777657e-05,
1917
+ "logits/chosen": -0.890917956829071,
1918
+ "logits/rejected": -1.447265625,
1919
+ "logps/chosen": -94.3375015258789,
1920
+ "logps/rejected": -209.125,
1921
+ "loss": 0.0152,
1922
+ "rewards/accuracies": 0.996874988079071,
1923
+ "rewards/chosen": -1.7142822742462158,
1924
+ "rewards/margins": 15.628125190734863,
1925
+ "rewards/rejected": -17.357812881469727,
1926
+ "step": 580
1927
+ },
1928
+ {
1929
+ "epoch": 0.50814332247557,
1930
+ "grad_norm": 0.3430173397064209,
1931
+ "learning_rate": 2.583784814157246e-05,
1932
+ "logits/chosen": -0.957714855670929,
1933
+ "logits/rejected": -1.4734375476837158,
1934
+ "logps/chosen": -96.76249694824219,
1935
+ "logps/rejected": -213.375,
1936
+ "loss": 0.0379,
1937
+ "rewards/accuracies": 0.984375,
1938
+ "rewards/chosen": -2.357714891433716,
1939
+ "rewards/margins": 16.392187118530273,
1940
+ "rewards/rejected": -18.745311737060547,
1941
+ "step": 585
1942
+ },
1943
+ {
1944
+ "epoch": 0.512486427795874,
1945
+ "grad_norm": 0.7423095703125,
1946
+ "learning_rate": 2.5834677510551053e-05,
1947
+ "logits/chosen": -0.953320324420929,
1948
+ "logits/rejected": -1.4753906726837158,
1949
+ "logps/chosen": -100.38749694824219,
1950
+ "logps/rejected": -217.75,
1951
+ "loss": 0.0363,
1952
+ "rewards/accuracies": 0.989062488079071,
1953
+ "rewards/chosen": -2.314160108566284,
1954
+ "rewards/margins": 16.709375381469727,
1955
+ "rewards/rejected": -19.021875381469727,
1956
+ "step": 590
1957
+ },
1958
+ {
1959
+ "epoch": 0.5168295331161781,
1960
+ "grad_norm": 1.2152386903762817,
1961
+ "learning_rate": 2.5831474556695505e-05,
1962
+ "logits/chosen": -0.947949230670929,
1963
+ "logits/rejected": -1.437109351158142,
1964
+ "logps/chosen": -98.9749984741211,
1965
+ "logps/rejected": -211.02499389648438,
1966
+ "loss": 0.0182,
1967
+ "rewards/accuracies": 0.9937499761581421,
1968
+ "rewards/chosen": -2.5445313453674316,
1969
+ "rewards/margins": 15.350000381469727,
1970
+ "rewards/rejected": -17.909374237060547,
1971
+ "step": 595
1972
+ },
1973
+ {
1974
+ "epoch": 0.5211726384364821,
1975
+ "grad_norm": 0.7218368649482727,
1976
+ "learning_rate": 2.582823928806925e-05,
1977
+ "logits/chosen": -0.8983398675918579,
1978
+ "logits/rejected": -1.4113280773162842,
1979
+ "logps/chosen": -98.23750305175781,
1980
+ "logps/rejected": -201.5749969482422,
1981
+ "loss": 0.0276,
1982
+ "rewards/accuracies": 0.9906250238418579,
1983
+ "rewards/chosen": -2.386914014816284,
1984
+ "rewards/margins": 14.265625,
1985
+ "rewards/rejected": -16.65625,
1986
+ "step": 600
1987
+ },
1988
+ {
1989
+ "epoch": 0.5211726384364821,
1990
+ "eval_logits/chosen": -0.9090139865875244,
1991
+ "eval_logits/rejected": -1.4213109016418457,
1992
+ "eval_logps/chosen": -99.9981918334961,
1993
+ "eval_logps/rejected": -205.8050537109375,
1994
+ "eval_loss": 0.030555352568626404,
1995
+ "eval_rewards/accuracies": 0.9889440536499023,
1996
+ "eval_rewards/chosen": -2.4407715797424316,
1997
+ "eval_rewards/margins": 14.665614128112793,
1998
+ "eval_rewards/rejected": -17.1055965423584,
1999
+ "eval_runtime": 97.707,
2000
+ "eval_samples_per_second": 45.237,
2001
+ "eval_steps_per_second": 2.835,
2002
+ "step": 600
2003
  }
2004
  ],
2005
  "logging_steps": 5,