error577 commited on
Commit
7652cfa
·
verified ·
1 Parent(s): a36b348

Training in progress, step 2500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d736be1f9ccc37e19f6ace25457bd3caecf74b78b6bd0a9c3c82bf78a0aa09c
3
  size 500770656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb3ef2bdae1f8b5428086a199f5d745478de4ff3d5cf2109c1bda2a315b5376e
3
  size 500770656
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e155cc132677ef963555f9b24629bc9313c22c472ca708fd3b6d6b2c09a3b97c
3
  size 134320806
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e542f6a8d209acfb7e9646e9a861e1364535a21abf254ae7b28c1bc690aa6c2b
3
  size 134320806
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:228e42bc899d2ecf95afbe4078371d96704b6df2b057bad532f7a23a19fd7a31
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a71c117db3d2138ce3e4cb621e3704e64fd4eb5c5aee72c3b7e77780e034491a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:107d1b031d80cd6355934d12b914afcffa03995e0a0b9696f39588911c8f3447
3
  size 2080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdae9d1ef7ba78274fffa78822b9ce8b8494e222e6b4e5b3e212211abfddfe81
3
  size 2080
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 2.529033899307251,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-2200",
4
- "epoch": 0.7454573691567014,
5
  "eval_steps": 100,
6
- "global_step": 2400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1887,6 +1887,84 @@
1887
  "eval_samples_per_second": 2.513,
1888
  "eval_steps_per_second": 2.513,
1889
  "step": 2400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1890
  }
1891
  ],
1892
  "logging_steps": 10,
@@ -1901,7 +1979,7 @@
1901
  "early_stopping_threshold": 0.0
1902
  },
1903
  "attributes": {
1904
- "early_stopping_patience_counter": 2
1905
  }
1906
  },
1907
  "TrainerControl": {
@@ -1910,12 +1988,12 @@
1910
  "should_evaluate": false,
1911
  "should_log": false,
1912
  "should_save": true,
1913
- "should_training_stop": false
1914
  },
1915
  "attributes": {}
1916
  }
1917
  },
1918
- "total_flos": 3.82713144016896e+17,
1919
  "train_batch_size": 1,
1920
  "trial_name": null,
1921
  "trial_params": null
 
1
  {
2
  "best_metric": 2.529033899307251,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-2200",
4
+ "epoch": 0.7765180928715639,
5
  "eval_steps": 100,
6
+ "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1887
  "eval_samples_per_second": 2.513,
1888
  "eval_steps_per_second": 2.513,
1889
  "step": 2400
1890
+ },
1891
+ {
1892
+ "epoch": 0.7485634415281877,
1893
+ "grad_norm": 3.974891424179077,
1894
+ "learning_rate": 0.00017132522771134973,
1895
+ "loss": 2.7161,
1896
+ "step": 2410
1897
+ },
1898
+ {
1899
+ "epoch": 0.7516695138996738,
1900
+ "grad_norm": 4.18734073638916,
1901
+ "learning_rate": 0.00017109612235799432,
1902
+ "loss": 2.7066,
1903
+ "step": 2420
1904
+ },
1905
+ {
1906
+ "epoch": 0.7547755862711601,
1907
+ "grad_norm": 5.06152868270874,
1908
+ "learning_rate": 0.00017086626030504704,
1909
+ "loss": 2.6885,
1910
+ "step": 2430
1911
+ },
1912
+ {
1913
+ "epoch": 0.7578816586426463,
1914
+ "grad_norm": 4.947213172912598,
1915
+ "learning_rate": 0.00017063564155250788,
1916
+ "loss": 2.7594,
1917
+ "step": 2440
1918
+ },
1919
+ {
1920
+ "epoch": 0.7609877310141326,
1921
+ "grad_norm": 8.47783088684082,
1922
+ "learning_rate": 0.00017040426610037684,
1923
+ "loss": 2.7865,
1924
+ "step": 2450
1925
+ },
1926
+ {
1927
+ "epoch": 0.7640938033856188,
1928
+ "grad_norm": 4.791199207305908,
1929
+ "learning_rate": 0.00017017214850056916,
1930
+ "loss": 2.7591,
1931
+ "step": 2460
1932
+ },
1933
+ {
1934
+ "epoch": 0.7671998757571051,
1935
+ "grad_norm": 4.4141411781311035,
1936
+ "learning_rate": 0.0001699392742011696,
1937
+ "loss": 2.7741,
1938
+ "step": 2470
1939
+ },
1940
+ {
1941
+ "epoch": 0.7703059481285914,
1942
+ "grad_norm": 4.622017860412598,
1943
+ "learning_rate": 0.0001697056577540934,
1944
+ "loss": 2.5143,
1945
+ "step": 2480
1946
+ },
1947
+ {
1948
+ "epoch": 0.7734120205000776,
1949
+ "grad_norm": 5.536156177520752,
1950
+ "learning_rate": 0.0001694713137112558,
1951
+ "loss": 2.8695,
1952
+ "step": 2490
1953
+ },
1954
+ {
1955
+ "epoch": 0.7765180928715639,
1956
+ "grad_norm": 6.285493850708008,
1957
+ "learning_rate": 0.0001692362129688263,
1958
+ "loss": 2.9384,
1959
+ "step": 2500
1960
+ },
1961
+ {
1962
+ "epoch": 0.7765180928715639,
1963
+ "eval_loss": 2.551605224609375,
1964
+ "eval_runtime": 25.8773,
1965
+ "eval_samples_per_second": 2.512,
1966
+ "eval_steps_per_second": 2.512,
1967
+ "step": 2500
1968
  }
1969
  ],
1970
  "logging_steps": 10,
 
1979
  "early_stopping_threshold": 0.0
1980
  },
1981
  "attributes": {
1982
+ "early_stopping_patience_counter": 3
1983
  }
1984
  },
1985
  "TrainerControl": {
 
1988
  "should_evaluate": false,
1989
  "should_log": false,
1990
  "should_save": true,
1991
+ "should_training_stop": true
1992
  },
1993
  "attributes": {}
1994
  }
1995
  },
1996
+ "total_flos": 3.986595250176e+17,
1997
  "train_batch_size": 1,
1998
  "trial_name": null,
1999
  "trial_params": null