Video-Text-to-Text
Transformers
Safetensors
qwen3_vl
image-text-to-text
llama-factory
full
Generated from Trainer
video-language-model
video-captioning
Instructions to use chancharikm/CHAI_SFT_model_8b with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use chancharikm/CHAI_SFT_model_8b with Transformers:
# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("chancharikm/CHAI_SFT_model_8b") model = AutoModelForImageTextToText.from_pretrained("chancharikm/CHAI_SFT_model_8b") - Notebooks
- Google Colab
- Kaggle
Training in progress, step 741, checkpoint
Browse files- last-checkpoint/global_step741/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step741/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step741/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step741/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step741/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step741/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step741/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step741/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step741/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step741/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step741/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step741/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step741/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step741/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step741/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step741/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00004.safetensors +1 -1
- last-checkpoint/model-00002-of-00004.safetensors +1 -1
- last-checkpoint/model-00003-of-00004.safetensors +1 -1
- last-checkpoint/model-00004-of-00004.safetensors +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +291 -4
last-checkpoint/global_step741/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0fda13d029ab0f9c8edee071d87b09a7de5b5804e4fecda73db1e5afd454bc4d
|
| 3 |
+
size 1558836997
|
last-checkpoint/global_step741/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f31fd81fedd8a51b114d1ce307479f12286b843b8d0bf0218078fa38aa1e597
|
| 3 |
+
size 1558836997
|
last-checkpoint/global_step741/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3747894a963f5a7a402f64de90807dfb74ad2a9948ee1630297df5e5fa704a50
|
| 3 |
+
size 1558836997
|
last-checkpoint/global_step741/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6b1612c1e037b02a903d5c8f8d9c1bd8b50e9ee640fedd013404bb340f4732e
|
| 3 |
+
size 1558836997
|
last-checkpoint/global_step741/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fcfea27c535ab3cd36d881ec247de2c72d6bdfb73796b1bffbb08730c519d24a
|
| 3 |
+
size 1558836997
|
last-checkpoint/global_step741/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25677e0605fc00cf4f9c0c667e34e22187c0b3909186e1864d3e602cb910d56f
|
| 3 |
+
size 1558836997
|
last-checkpoint/global_step741/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae0f271ecdbb8b5030de0b834c7942d9253857873739457f97d1961852a28333
|
| 3 |
+
size 1558836997
|
last-checkpoint/global_step741/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ec54bcd226b366ac40312b32230f8b9e70a90020625973a8b07f38510726399
|
| 3 |
+
size 1558836997
|
last-checkpoint/global_step741/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd56729d92ecff4d18c51f3585cdffc2fadb7e4e3094dd9bef8e99ce72e29f45
|
| 3 |
+
size 14663005
|
last-checkpoint/global_step741/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e9b430189b114033f0425c7954abfe335a78443ea8e21a184f882ed3399fd3ca
|
| 3 |
+
size 14663005
|
last-checkpoint/global_step741/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46a10535d49418aa0458dd81a5114cdcccbd9b9b640eafb4f37de100b0eb5023
|
| 3 |
+
size 14663005
|
last-checkpoint/global_step741/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:017dc1f9478fc98b96271b64c504487112c7ced098cd5d62d4f5fe6cc6076e8d
|
| 3 |
+
size 14663005
|
last-checkpoint/global_step741/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:22ab0b6d2d7b6c850a9c225498fb3c402921a4d40a72ae452f0c7671b61688d6
|
| 3 |
+
size 14663005
|
last-checkpoint/global_step741/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89c5225ce5da36546c551d76404ba07edd5fa906dfab5b6e486b9b93a3f239ca
|
| 3 |
+
size 14663005
|
last-checkpoint/global_step741/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d2205bc88c456ff61beb21bc4ad6a3e8627bb9ad1d2875dd2dd3a0bb3fda4727
|
| 3 |
+
size 14663005
|
last-checkpoint/global_step741/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37a2b835829b0e26aa8937b8c52f028b4e899def3bbf86088709661d1a727c74
|
| 3 |
+
size 14663005
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step741
|
last-checkpoint/model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4998056552
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49e7042302ff6a729c45385680fa4cb4a81f634dd19c11c9b11146b0c5077077
|
| 3 |
size 4998056552
|
last-checkpoint/model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4915962464
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea7e84e8433c1602ab31b03c77c594a11a03c5b96dbe716e1ea3be92cd66bf8e
|
| 3 |
size 4915962464
|
last-checkpoint/model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4915962496
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:519c9fc303c91360165e09254c2a98449fa60ad64e0c0b5a61775dfbabe1ac1c
|
| 3 |
size 4915962496
|
last-checkpoint/model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2704357976
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f963c93178755b9faccbda482f1d1bb31346e09b0a583ae7ea0cf2dcc99034d2
|
| 3 |
size 2704357976
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d33f1b5fcc0e2b43e77e522f70fde811afa6c0c979f0ac4bc190eb32c0e39007
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4908,6 +4908,293 @@
|
|
| 4908 |
"learning_rate": 2.6343508932521243e-07,
|
| 4909 |
"loss": 0.0086,
|
| 4910 |
"step": 700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4911 |
}
|
| 4912 |
],
|
| 4913 |
"logging_steps": 1,
|
|
@@ -4922,12 +5209,12 @@
|
|
| 4922 |
"should_evaluate": false,
|
| 4923 |
"should_log": false,
|
| 4924 |
"should_save": true,
|
| 4925 |
-
"should_training_stop":
|
| 4926 |
},
|
| 4927 |
"attributes": {}
|
| 4928 |
}
|
| 4929 |
},
|
| 4930 |
-
"total_flos":
|
| 4931 |
"train_batch_size": 10,
|
| 4932 |
"trial_name": null,
|
| 4933 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 3.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 741,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4908 |
"learning_rate": 2.6343508932521243e-07,
|
| 4909 |
"loss": 0.0086,
|
| 4910 |
"step": 700
|
| 4911 |
+
},
|
| 4912 |
+
{
|
| 4913 |
+
"epoch": 2.839756592292089,
|
| 4914 |
+
"grad_norm": 0.1847332447135413,
|
| 4915 |
+
"learning_rate": 2.510745991525992e-07,
|
| 4916 |
+
"loss": 0.0069,
|
| 4917 |
+
"step": 701
|
| 4918 |
+
},
|
| 4919 |
+
{
|
| 4920 |
+
"epoch": 2.843813387423935,
|
| 4921 |
+
"grad_norm": 0.21938966536504567,
|
| 4922 |
+
"learning_rate": 2.39008652010963e-07,
|
| 4923 |
+
"loss": 0.0086,
|
| 4924 |
+
"step": 702
|
| 4925 |
+
},
|
| 4926 |
+
{
|
| 4927 |
+
"epoch": 2.847870182555781,
|
| 4928 |
+
"grad_norm": 0.23134459803966625,
|
| 4929 |
+
"learning_rate": 2.2723748886298523e-07,
|
| 4930 |
+
"loss": 0.009,
|
| 4931 |
+
"step": 703
|
| 4932 |
+
},
|
| 4933 |
+
{
|
| 4934 |
+
"epoch": 2.8519269776876266,
|
| 4935 |
+
"grad_norm": 0.26561602719544714,
|
| 4936 |
+
"learning_rate": 2.1576134478437315e-07,
|
| 4937 |
+
"loss": 0.0102,
|
| 4938 |
+
"step": 704
|
| 4939 |
+
},
|
| 4940 |
+
{
|
| 4941 |
+
"epoch": 2.8559837728194726,
|
| 4942 |
+
"grad_norm": 1.1071488735425152,
|
| 4943 |
+
"learning_rate": 2.0458044895916516e-07,
|
| 4944 |
+
"loss": 0.0084,
|
| 4945 |
+
"step": 705
|
| 4946 |
+
},
|
| 4947 |
+
{
|
| 4948 |
+
"epoch": 2.8600405679513186,
|
| 4949 |
+
"grad_norm": 0.27265875970319914,
|
| 4950 |
+
"learning_rate": 1.9369502467514788e-07,
|
| 4951 |
+
"loss": 0.0111,
|
| 4952 |
+
"step": 706
|
| 4953 |
+
},
|
| 4954 |
+
{
|
| 4955 |
+
"epoch": 2.8640973630831645,
|
| 4956 |
+
"grad_norm": 0.22724233458925192,
|
| 4957 |
+
"learning_rate": 1.831052893194063e-07,
|
| 4958 |
+
"loss": 0.0089,
|
| 4959 |
+
"step": 707
|
| 4960 |
+
},
|
| 4961 |
+
{
|
| 4962 |
+
"epoch": 2.86815415821501,
|
| 4963 |
+
"grad_norm": 0.26572957456388885,
|
| 4964 |
+
"learning_rate": 1.7281145437397394e-07,
|
| 4965 |
+
"loss": 0.0102,
|
| 4966 |
+
"step": 708
|
| 4967 |
+
},
|
| 4968 |
+
{
|
| 4969 |
+
"epoch": 2.872210953346856,
|
| 4970 |
+
"grad_norm": 0.21131153496136418,
|
| 4971 |
+
"learning_rate": 1.62813725411613e-07,
|
| 4972 |
+
"loss": 0.0081,
|
| 4973 |
+
"step": 709
|
| 4974 |
+
},
|
| 4975 |
+
{
|
| 4976 |
+
"epoch": 2.8762677484787016,
|
| 4977 |
+
"grad_norm": 0.2164022001956463,
|
| 4978 |
+
"learning_rate": 1.5311230209171078e-07,
|
| 4979 |
+
"loss": 0.0115,
|
| 4980 |
+
"step": 710
|
| 4981 |
+
},
|
| 4982 |
+
{
|
| 4983 |
+
"epoch": 2.8803245436105476,
|
| 4984 |
+
"grad_norm": 0.18362776733451797,
|
| 4985 |
+
"learning_rate": 1.4370737815628809e-07,
|
| 4986 |
+
"loss": 0.0068,
|
| 4987 |
+
"step": 711
|
| 4988 |
+
},
|
| 4989 |
+
{
|
| 4990 |
+
"epoch": 2.8843813387423936,
|
| 4991 |
+
"grad_norm": 0.22201535643027853,
|
| 4992 |
+
"learning_rate": 1.3459914142613384e-07,
|
| 4993 |
+
"loss": 0.0093,
|
| 4994 |
+
"step": 712
|
| 4995 |
+
},
|
| 4996 |
+
{
|
| 4997 |
+
"epoch": 2.8884381338742395,
|
| 4998 |
+
"grad_norm": 0.23084858527126895,
|
| 4999 |
+
"learning_rate": 1.2578777379705476e-07,
|
| 5000 |
+
"loss": 0.0084,
|
| 5001 |
+
"step": 713
|
| 5002 |
+
},
|
| 5003 |
+
{
|
| 5004 |
+
"epoch": 2.892494929006085,
|
| 5005 |
+
"grad_norm": 0.33392265560180945,
|
| 5006 |
+
"learning_rate": 1.1727345123623667e-07,
|
| 5007 |
+
"loss": 0.0108,
|
| 5008 |
+
"step": 714
|
| 5009 |
+
},
|
| 5010 |
+
{
|
| 5011 |
+
"epoch": 2.896551724137931,
|
| 5012 |
+
"grad_norm": 0.2883283240758792,
|
| 5013 |
+
"learning_rate": 1.0905634377873563e-07,
|
| 5014 |
+
"loss": 0.0092,
|
| 5015 |
+
"step": 715
|
| 5016 |
+
},
|
| 5017 |
+
{
|
| 5018 |
+
"epoch": 2.900608519269777,
|
| 5019 |
+
"grad_norm": 0.2686572168243743,
|
| 5020 |
+
"learning_rate": 1.011366155240856e-07,
|
| 5021 |
+
"loss": 0.0131,
|
| 5022 |
+
"step": 716
|
| 5023 |
+
},
|
| 5024 |
+
{
|
| 5025 |
+
"epoch": 2.9046653144016226,
|
| 5026 |
+
"grad_norm": 0.2101145436417912,
|
| 5027 |
+
"learning_rate": 9.351442463301274e-08,
|
| 5028 |
+
"loss": 0.0073,
|
| 5029 |
+
"step": 717
|
| 5030 |
+
},
|
| 5031 |
+
{
|
| 5032 |
+
"epoch": 2.9087221095334685,
|
| 5033 |
+
"grad_norm": 0.18153151909382642,
|
| 5034 |
+
"learning_rate": 8.618992332427966e-08,
|
| 5035 |
+
"loss": 0.0054,
|
| 5036 |
+
"step": 718
|
| 5037 |
+
},
|
| 5038 |
+
{
|
| 5039 |
+
"epoch": 2.9127789046653145,
|
| 5040 |
+
"grad_norm": 0.1970262952306368,
|
| 5041 |
+
"learning_rate": 7.916325787164947e-08,
|
| 5042 |
+
"loss": 0.0073,
|
| 5043 |
+
"step": 719
|
| 5044 |
+
},
|
| 5045 |
+
{
|
| 5046 |
+
"epoch": 2.9168356997971605,
|
| 5047 |
+
"grad_norm": 0.23074904705177698,
|
| 5048 |
+
"learning_rate": 7.243456860096476e-08,
|
| 5049 |
+
"loss": 0.0079,
|
| 5050 |
+
"step": 720
|
| 5051 |
+
},
|
| 5052 |
+
{
|
| 5053 |
+
"epoch": 2.920892494929006,
|
| 5054 |
+
"grad_norm": 0.2628324630369001,
|
| 5055 |
+
"learning_rate": 6.600398988733824e-08,
|
| 5056 |
+
"loss": 0.0085,
|
| 5057 |
+
"step": 721
|
| 5058 |
+
},
|
| 5059 |
+
{
|
| 5060 |
+
"epoch": 2.924949290060852,
|
| 5061 |
+
"grad_norm": 0.28909539190720107,
|
| 5062 |
+
"learning_rate": 5.98716501524732e-08,
|
| 5063 |
+
"loss": 0.0126,
|
| 5064 |
+
"step": 722
|
| 5065 |
+
},
|
| 5066 |
+
{
|
| 5067 |
+
"epoch": 2.9290060851926976,
|
| 5068 |
+
"grad_norm": 0.27286922538855823,
|
| 5069 |
+
"learning_rate": 5.403767186210218e-08,
|
| 5070 |
+
"loss": 0.0093,
|
| 5071 |
+
"step": 723
|
| 5072 |
+
},
|
| 5073 |
+
{
|
| 5074 |
+
"epoch": 2.9330628803245435,
|
| 5075 |
+
"grad_norm": 0.2568428715793184,
|
| 5076 |
+
"learning_rate": 4.850217152353731e-08,
|
| 5077 |
+
"loss": 0.0074,
|
| 5078 |
+
"step": 724
|
| 5079 |
+
},
|
| 5080 |
+
{
|
| 5081 |
+
"epoch": 2.9371196754563895,
|
| 5082 |
+
"grad_norm": 0.4731785556263277,
|
| 5083 |
+
"learning_rate": 4.326525968334216e-08,
|
| 5084 |
+
"loss": 0.0076,
|
| 5085 |
+
"step": 725
|
| 5086 |
+
},
|
| 5087 |
+
{
|
| 5088 |
+
"epoch": 2.9411764705882355,
|
| 5089 |
+
"grad_norm": 0.2031164746992896,
|
| 5090 |
+
"learning_rate": 3.8327040925130175e-08,
|
| 5091 |
+
"loss": 0.0074,
|
| 5092 |
+
"step": 726
|
| 5093 |
+
},
|
| 5094 |
+
{
|
| 5095 |
+
"epoch": 2.945233265720081,
|
| 5096 |
+
"grad_norm": 0.4807857991510011,
|
| 5097 |
+
"learning_rate": 3.368761386746966e-08,
|
| 5098 |
+
"loss": 0.0072,
|
| 5099 |
+
"step": 727
|
| 5100 |
+
},
|
| 5101 |
+
{
|
| 5102 |
+
"epoch": 2.949290060851927,
|
| 5103 |
+
"grad_norm": 0.66364903613515,
|
| 5104 |
+
"learning_rate": 2.9347071161918703e-08,
|
| 5105 |
+
"loss": 0.0097,
|
| 5106 |
+
"step": 728
|
| 5107 |
+
},
|
| 5108 |
+
{
|
| 5109 |
+
"epoch": 2.9533468559837726,
|
| 5110 |
+
"grad_norm": 0.3120023778508287,
|
| 5111 |
+
"learning_rate": 2.530549949117167e-08,
|
| 5112 |
+
"loss": 0.0103,
|
| 5113 |
+
"step": 729
|
| 5114 |
+
},
|
| 5115 |
+
{
|
| 5116 |
+
"epoch": 2.9574036511156185,
|
| 5117 |
+
"grad_norm": 0.20907205106461102,
|
| 5118 |
+
"learning_rate": 2.1562979567330554e-08,
|
| 5119 |
+
"loss": 0.0069,
|
| 5120 |
+
"step": 730
|
| 5121 |
+
},
|
| 5122 |
+
{
|
| 5123 |
+
"epoch": 2.9614604462474645,
|
| 5124 |
+
"grad_norm": 0.23184300955403422,
|
| 5125 |
+
"learning_rate": 1.8119586130292964e-08,
|
| 5126 |
+
"loss": 0.0062,
|
| 5127 |
+
"step": 731
|
| 5128 |
+
},
|
| 5129 |
+
{
|
| 5130 |
+
"epoch": 2.9655172413793105,
|
| 5131 |
+
"grad_norm": 0.2240122101684402,
|
| 5132 |
+
"learning_rate": 1.4975387946256634e-08,
|
| 5133 |
+
"loss": 0.0083,
|
| 5134 |
+
"step": 732
|
| 5135 |
+
},
|
| 5136 |
+
{
|
| 5137 |
+
"epoch": 2.969574036511156,
|
| 5138 |
+
"grad_norm": 0.23638952694105592,
|
| 5139 |
+
"learning_rate": 1.213044780635053e-08,
|
| 5140 |
+
"loss": 0.0078,
|
| 5141 |
+
"step": 733
|
| 5142 |
+
},
|
| 5143 |
+
{
|
| 5144 |
+
"epoch": 2.973630831643002,
|
| 5145 |
+
"grad_norm": 0.23626023739586408,
|
| 5146 |
+
"learning_rate": 9.584822525377512e-09,
|
| 5147 |
+
"loss": 0.0081,
|
| 5148 |
+
"step": 734
|
| 5149 |
+
},
|
| 5150 |
+
{
|
| 5151 |
+
"epoch": 2.977687626774848,
|
| 5152 |
+
"grad_norm": 0.19999524138260236,
|
| 5153 |
+
"learning_rate": 7.338562940680249e-09,
|
| 5154 |
+
"loss": 0.0074,
|
| 5155 |
+
"step": 735
|
| 5156 |
+
},
|
| 5157 |
+
{
|
| 5158 |
+
"epoch": 2.9817444219066935,
|
| 5159 |
+
"grad_norm": 0.21364040585606983,
|
| 5160 |
+
"learning_rate": 5.391713911128693e-09,
|
| 5161 |
+
"loss": 0.0069,
|
| 5162 |
+
"step": 736
|
| 5163 |
+
},
|
| 5164 |
+
{
|
| 5165 |
+
"epoch": 2.9858012170385395,
|
| 5166 |
+
"grad_norm": 0.20996179998709147,
|
| 5167 |
+
"learning_rate": 3.744314316220798e-09,
|
| 5168 |
+
"loss": 0.0072,
|
| 5169 |
+
"step": 737
|
| 5170 |
+
},
|
| 5171 |
+
{
|
| 5172 |
+
"epoch": 2.9898580121703855,
|
| 5173 |
+
"grad_norm": 0.19358339065466926,
|
| 5174 |
+
"learning_rate": 2.396397055306476e-09,
|
| 5175 |
+
"loss": 0.0061,
|
| 5176 |
+
"step": 738
|
| 5177 |
+
},
|
| 5178 |
+
{
|
| 5179 |
+
"epoch": 2.9939148073022315,
|
| 5180 |
+
"grad_norm": 0.18667044909228617,
|
| 5181 |
+
"learning_rate": 1.3479890469314527e-09,
|
| 5182 |
+
"loss": 0.0075,
|
| 5183 |
+
"step": 739
|
| 5184 |
+
},
|
| 5185 |
+
{
|
| 5186 |
+
"epoch": 2.997971602434077,
|
| 5187 |
+
"grad_norm": 0.1920159799469308,
|
| 5188 |
+
"learning_rate": 5.991112283026956e-10,
|
| 5189 |
+
"loss": 0.0074,
|
| 5190 |
+
"step": 740
|
| 5191 |
+
},
|
| 5192 |
+
{
|
| 5193 |
+
"epoch": 3.0,
|
| 5194 |
+
"grad_norm": 0.1960028788559094,
|
| 5195 |
+
"learning_rate": 1.4977855486209002e-10,
|
| 5196 |
+
"loss": 0.0037,
|
| 5197 |
+
"step": 741
|
| 5198 |
}
|
| 5199 |
],
|
| 5200 |
"logging_steps": 1,
|
|
|
|
| 5209 |
"should_evaluate": false,
|
| 5210 |
"should_log": false,
|
| 5211 |
"should_save": true,
|
| 5212 |
+
"should_training_stop": true
|
| 5213 |
},
|
| 5214 |
"attributes": {}
|
| 5215 |
}
|
| 5216 |
},
|
| 5217 |
+
"total_flos": 5016006509985792.0,
|
| 5218 |
"train_batch_size": 10,
|
| 5219 |
"trial_name": null,
|
| 5220 |
"trial_params": null
|