Training in progress, step 2500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 500770656
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb3ef2bdae1f8b5428086a199f5d745478de4ff3d5cf2109c1bda2a315b5376e
|
| 3 |
size 500770656
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 134320806
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e542f6a8d209acfb7e9646e9a861e1364535a21abf254ae7b28c1bc690aa6c2b
|
| 3 |
size 134320806
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a71c117db3d2138ce3e4cb621e3704e64fd4eb5c5aee72c3b7e77780e034491a
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2080
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bdae9d1ef7ba78274fffa78822b9ce8b8494e222e6b4e5b3e212211abfddfe81
|
| 3 |
size 2080
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 2.529033899307251,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-2200",
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 100,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1887,6 +1887,84 @@
|
|
| 1887 |
"eval_samples_per_second": 2.513,
|
| 1888 |
"eval_steps_per_second": 2.513,
|
| 1889 |
"step": 2400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1890 |
}
|
| 1891 |
],
|
| 1892 |
"logging_steps": 10,
|
|
@@ -1901,7 +1979,7 @@
|
|
| 1901 |
"early_stopping_threshold": 0.0
|
| 1902 |
},
|
| 1903 |
"attributes": {
|
| 1904 |
-
"early_stopping_patience_counter":
|
| 1905 |
}
|
| 1906 |
},
|
| 1907 |
"TrainerControl": {
|
|
@@ -1910,12 +1988,12 @@
|
|
| 1910 |
"should_evaluate": false,
|
| 1911 |
"should_log": false,
|
| 1912 |
"should_save": true,
|
| 1913 |
-
"should_training_stop":
|
| 1914 |
},
|
| 1915 |
"attributes": {}
|
| 1916 |
}
|
| 1917 |
},
|
| 1918 |
-
"total_flos": 3.
|
| 1919 |
"train_batch_size": 1,
|
| 1920 |
"trial_name": null,
|
| 1921 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 2.529033899307251,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-2200",
|
| 4 |
+
"epoch": 0.7765180928715639,
|
| 5 |
"eval_steps": 100,
|
| 6 |
+
"global_step": 2500,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1887 |
"eval_samples_per_second": 2.513,
|
| 1888 |
"eval_steps_per_second": 2.513,
|
| 1889 |
"step": 2400
|
| 1890 |
+
},
|
| 1891 |
+
{
|
| 1892 |
+
"epoch": 0.7485634415281877,
|
| 1893 |
+
"grad_norm": 3.974891424179077,
|
| 1894 |
+
"learning_rate": 0.00017132522771134973,
|
| 1895 |
+
"loss": 2.7161,
|
| 1896 |
+
"step": 2410
|
| 1897 |
+
},
|
| 1898 |
+
{
|
| 1899 |
+
"epoch": 0.7516695138996738,
|
| 1900 |
+
"grad_norm": 4.18734073638916,
|
| 1901 |
+
"learning_rate": 0.00017109612235799432,
|
| 1902 |
+
"loss": 2.7066,
|
| 1903 |
+
"step": 2420
|
| 1904 |
+
},
|
| 1905 |
+
{
|
| 1906 |
+
"epoch": 0.7547755862711601,
|
| 1907 |
+
"grad_norm": 5.06152868270874,
|
| 1908 |
+
"learning_rate": 0.00017086626030504704,
|
| 1909 |
+
"loss": 2.6885,
|
| 1910 |
+
"step": 2430
|
| 1911 |
+
},
|
| 1912 |
+
{
|
| 1913 |
+
"epoch": 0.7578816586426463,
|
| 1914 |
+
"grad_norm": 4.947213172912598,
|
| 1915 |
+
"learning_rate": 0.00017063564155250788,
|
| 1916 |
+
"loss": 2.7594,
|
| 1917 |
+
"step": 2440
|
| 1918 |
+
},
|
| 1919 |
+
{
|
| 1920 |
+
"epoch": 0.7609877310141326,
|
| 1921 |
+
"grad_norm": 8.47783088684082,
|
| 1922 |
+
"learning_rate": 0.00017040426610037684,
|
| 1923 |
+
"loss": 2.7865,
|
| 1924 |
+
"step": 2450
|
| 1925 |
+
},
|
| 1926 |
+
{
|
| 1927 |
+
"epoch": 0.7640938033856188,
|
| 1928 |
+
"grad_norm": 4.791199207305908,
|
| 1929 |
+
"learning_rate": 0.00017017214850056916,
|
| 1930 |
+
"loss": 2.7591,
|
| 1931 |
+
"step": 2460
|
| 1932 |
+
},
|
| 1933 |
+
{
|
| 1934 |
+
"epoch": 0.7671998757571051,
|
| 1935 |
+
"grad_norm": 4.4141411781311035,
|
| 1936 |
+
"learning_rate": 0.0001699392742011696,
|
| 1937 |
+
"loss": 2.7741,
|
| 1938 |
+
"step": 2470
|
| 1939 |
+
},
|
| 1940 |
+
{
|
| 1941 |
+
"epoch": 0.7703059481285914,
|
| 1942 |
+
"grad_norm": 4.622017860412598,
|
| 1943 |
+
"learning_rate": 0.0001697056577540934,
|
| 1944 |
+
"loss": 2.5143,
|
| 1945 |
+
"step": 2480
|
| 1946 |
+
},
|
| 1947 |
+
{
|
| 1948 |
+
"epoch": 0.7734120205000776,
|
| 1949 |
+
"grad_norm": 5.536156177520752,
|
| 1950 |
+
"learning_rate": 0.0001694713137112558,
|
| 1951 |
+
"loss": 2.8695,
|
| 1952 |
+
"step": 2490
|
| 1953 |
+
},
|
| 1954 |
+
{
|
| 1955 |
+
"epoch": 0.7765180928715639,
|
| 1956 |
+
"grad_norm": 6.285493850708008,
|
| 1957 |
+
"learning_rate": 0.0001692362129688263,
|
| 1958 |
+
"loss": 2.9384,
|
| 1959 |
+
"step": 2500
|
| 1960 |
+
},
|
| 1961 |
+
{
|
| 1962 |
+
"epoch": 0.7765180928715639,
|
| 1963 |
+
"eval_loss": 2.551605224609375,
|
| 1964 |
+
"eval_runtime": 25.8773,
|
| 1965 |
+
"eval_samples_per_second": 2.512,
|
| 1966 |
+
"eval_steps_per_second": 2.512,
|
| 1967 |
+
"step": 2500
|
| 1968 |
}
|
| 1969 |
],
|
| 1970 |
"logging_steps": 10,
|
|
|
|
| 1979 |
"early_stopping_threshold": 0.0
|
| 1980 |
},
|
| 1981 |
"attributes": {
|
| 1982 |
+
"early_stopping_patience_counter": 3
|
| 1983 |
}
|
| 1984 |
},
|
| 1985 |
"TrainerControl": {
|
|
|
|
| 1988 |
"should_evaluate": false,
|
| 1989 |
"should_log": false,
|
| 1990 |
"should_save": true,
|
| 1991 |
+
"should_training_stop": true
|
| 1992 |
},
|
| 1993 |
"attributes": {}
|
| 1994 |
}
|
| 1995 |
},
|
| 1996 |
+
"total_flos": 3.986595250176e+17,
|
| 1997 |
"train_batch_size": 1,
|
| 1998 |
"trial_name": null,
|
| 1999 |
"trial_params": null
|