Training in progress, step 11500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66d99b0cb397502b025a6c7417fc75e69557e2db9d4b2101658c2730782a54e5
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1736
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eacebe9389e2329614f557172e304528d2b5b66f68c1b3c5e68453452e430f51
|
| 3 |
size 1736
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a032319df626b2b110bd852b78fc23d492586d6fccaeb0841874a11adbfaea49
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a16095fe7c7a01fb5cdfa33bc3b7f9026f465c5d2d551f188966908ce9a84c9
|
| 3 |
+
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2124619b96ef0628e278d9139421c199b6678f87b3a0cfd00afb352c22439c91
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:129303cede08862e45aff723e13523f2863b1a8c5dd6144e719bcbf05975af10
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 1.
|
| 4 |
-
"best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1900,6 +1900,92 @@
|
|
| 1900 |
"eval_samples_per_second": 59.571,
|
| 1901 |
"eval_steps_per_second": 14.893,
|
| 1902 |
"step": 11000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1903 |
}
|
| 1904 |
],
|
| 1905 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 11500,
|
| 3 |
+
"best_metric": 1.3371928930282593,
|
| 4 |
+
"best_model_checkpoint": "./ar-diffusion-checkpoints-progressive-attention/checkpoint-11500",
|
| 5 |
+
"epoch": 0.8845473425121144,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 11500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1900 |
"eval_samples_per_second": 59.571,
|
| 1901 |
"eval_steps_per_second": 14.893,
|
| 1902 |
"step": 11000
|
| 1903 |
+
},
|
| 1904 |
+
{
|
| 1905 |
+
"epoch": 0.8499346204138143,
|
| 1906 |
+
"grad_norm": 1.5268698930740356,
|
| 1907 |
+
"learning_rate": 0.00014523024179934033,
|
| 1908 |
+
"loss": 1.3185,
|
| 1909 |
+
"step": 11050
|
| 1910 |
+
},
|
| 1911 |
+
{
|
| 1912 |
+
"epoch": 0.8537804784247366,
|
| 1913 |
+
"grad_norm": 1.5343180894851685,
|
| 1914 |
+
"learning_rate": 0.00014497052177752385,
|
| 1915 |
+
"loss": 1.4061,
|
| 1916 |
+
"step": 11100
|
| 1917 |
+
},
|
| 1918 |
+
{
|
| 1919 |
+
"epoch": 0.8576263364356588,
|
| 1920 |
+
"grad_norm": 1.1495877504348755,
|
| 1921 |
+
"learning_rate": 0.00014471080175570736,
|
| 1922 |
+
"loss": 1.3393,
|
| 1923 |
+
"step": 11150
|
| 1924 |
+
},
|
| 1925 |
+
{
|
| 1926 |
+
"epoch": 0.8614721944465811,
|
| 1927 |
+
"grad_norm": 1.5720888376235962,
|
| 1928 |
+
"learning_rate": 0.00014445108173389087,
|
| 1929 |
+
"loss": 1.3666,
|
| 1930 |
+
"step": 11200
|
| 1931 |
+
},
|
| 1932 |
+
{
|
| 1933 |
+
"epoch": 0.8653180524575033,
|
| 1934 |
+
"grad_norm": 2.1202750205993652,
|
| 1935 |
+
"learning_rate": 0.00014419655611251072,
|
| 1936 |
+
"loss": 1.3463,
|
| 1937 |
+
"step": 11250
|
| 1938 |
+
},
|
| 1939 |
+
{
|
| 1940 |
+
"epoch": 0.8653180524575033,
|
| 1941 |
+
"eval_loss": 1.3461755514144897,
|
| 1942 |
+
"eval_runtime": 16.8798,
|
| 1943 |
+
"eval_samples_per_second": 59.242,
|
| 1944 |
+
"eval_steps_per_second": 14.811,
|
| 1945 |
+
"step": 11250
|
| 1946 |
+
},
|
| 1947 |
+
{
|
| 1948 |
+
"epoch": 0.8691639104684256,
|
| 1949 |
+
"grad_norm": 1.0920432806015015,
|
| 1950 |
+
"learning_rate": 0.00014393683609069424,
|
| 1951 |
+
"loss": 1.3401,
|
| 1952 |
+
"step": 11300
|
| 1953 |
+
},
|
| 1954 |
+
{
|
| 1955 |
+
"epoch": 0.8730097684793477,
|
| 1956 |
+
"grad_norm": 1.1317682266235352,
|
| 1957 |
+
"learning_rate": 0.00014367711606887775,
|
| 1958 |
+
"loss": 1.3506,
|
| 1959 |
+
"step": 11350
|
| 1960 |
+
},
|
| 1961 |
+
{
|
| 1962 |
+
"epoch": 0.8768556264902699,
|
| 1963 |
+
"grad_norm": 0.7948962450027466,
|
| 1964 |
+
"learning_rate": 0.0001434173960470613,
|
| 1965 |
+
"loss": 1.3906,
|
| 1966 |
+
"step": 11400
|
| 1967 |
+
},
|
| 1968 |
+
{
|
| 1969 |
+
"epoch": 0.8807014845011922,
|
| 1970 |
+
"grad_norm": 0.8204107880592346,
|
| 1971 |
+
"learning_rate": 0.0001431576760252448,
|
| 1972 |
+
"loss": 1.3556,
|
| 1973 |
+
"step": 11450
|
| 1974 |
+
},
|
| 1975 |
+
{
|
| 1976 |
+
"epoch": 0.8845473425121144,
|
| 1977 |
+
"grad_norm": 1.5152668952941895,
|
| 1978 |
+
"learning_rate": 0.00014289795600342831,
|
| 1979 |
+
"loss": 1.3396,
|
| 1980 |
+
"step": 11500
|
| 1981 |
+
},
|
| 1982 |
+
{
|
| 1983 |
+
"epoch": 0.8845473425121144,
|
| 1984 |
+
"eval_loss": 1.3371928930282593,
|
| 1985 |
+
"eval_runtime": 16.8684,
|
| 1986 |
+
"eval_samples_per_second": 59.282,
|
| 1987 |
+
"eval_steps_per_second": 14.821,
|
| 1988 |
+
"step": 11500
|
| 1989 |
}
|
| 1990 |
],
|
| 1991 |
"logging_steps": 50,
|