Video-Text-to-Text
Transformers
Safetensors
qwen3_vl
image-text-to-text
llama-factory
full
Generated from Trainer
video-language-model
video-captioning
Instructions to use chancharikm/CHAI_SFT_model_8b with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use chancharikm/CHAI_SFT_model_8b with Transformers:
# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("chancharikm/CHAI_SFT_model_8b") model = AutoModelForImageTextToText.from_pretrained("chancharikm/CHAI_SFT_model_8b") - Notebooks
- Google Colab
- Kaggle
Training in progress, step 600, checkpoint
Browse files- last-checkpoint/global_step600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step600/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step600/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step600/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step600/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step600/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step600/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step600/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step600/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step600/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step600/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step600/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step600/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step600/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step600/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step600/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model-00001-of-00004.safetensors +1 -1
- last-checkpoint/model-00002-of-00004.safetensors +1 -1
- last-checkpoint/model-00003-of-00004.safetensors +1 -1
- last-checkpoint/model-00004-of-00004.safetensors +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +353 -3
last-checkpoint/global_step600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:575afa6a1f625837deb97d36829cf83613db5bb28dc430891e46d774f162da21
|
| 3 |
+
size 1558836997
|
last-checkpoint/global_step600/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c4881d23fee62fd5b4a6cf936f0ebdefef57ec07e3dfa31282f2284359fa2aa
|
| 3 |
+
size 1558836997
|
last-checkpoint/global_step600/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f07e487580b850d30a35017e718a7c6a89f993a051d12afec2e13a1a44e9f50
|
| 3 |
+
size 1558836997
|
last-checkpoint/global_step600/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d510aed0ef34d764fd1ba01feea798f08fedba50778bdfde1270df4ea16faa5b
|
| 3 |
+
size 1558836997
|
last-checkpoint/global_step600/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:466f59c3f29bcd334552bfe6f82c293e1067254ddcbed8bfee74bf81ccb7726e
|
| 3 |
+
size 1558836997
|
last-checkpoint/global_step600/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4fa80af3fe7a16b69b321622bfdedc367ec98e6eb6f5e2e5846332cfb261ac5
|
| 3 |
+
size 1558836997
|
last-checkpoint/global_step600/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27d08a0327bd85ec61184f8d12fc0a2b6c18681f3604a1e1f2de2f66fa528c17
|
| 3 |
+
size 1558836997
|
last-checkpoint/global_step600/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b7e88fb8044af0641fdcc8aad302a9ba1cbc6ff04f364abf943bbd55c5815f1
|
| 3 |
+
size 1558836997
|
last-checkpoint/global_step600/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf1bef5b13beed9d51facb4d5f618f12d61db66d3498b160498e3ac414cd8606
|
| 3 |
+
size 14663005
|
last-checkpoint/global_step600/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed405ce2ac5336d8b940632dc024427513763d5cc0808a368d1c00ebe343a75e
|
| 3 |
+
size 14663005
|
last-checkpoint/global_step600/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74ab8baecfc881c3d04100f693bbb730b1956715b7553999d2f9cc9bee7a154a
|
| 3 |
+
size 14663005
|
last-checkpoint/global_step600/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ce78c768cfc550ddb1e0e89d7c609e056e6b7f680fbd3ccbd7eb175772f37ce
|
| 3 |
+
size 14663005
|
last-checkpoint/global_step600/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fdbcbdd3965c37a0136af46a2e2bc6e39f344de6d142fcd228e91217ca7d8555
|
| 3 |
+
size 14663005
|
last-checkpoint/global_step600/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75a428081055ad9e47b3a7c5e927d063ff25362a3ac32b3ab5410bcf438a7f3e
|
| 3 |
+
size 14663005
|
last-checkpoint/global_step600/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8619b0c57795776e245f2f2a334b994f3566ee67a81fbc2a1621d70d5dbaebaf
|
| 3 |
+
size 14663005
|
last-checkpoint/global_step600/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5e440df922365762744af5fde272e85434cc5189d153e06413c4cce034bd488
|
| 3 |
+
size 14663005
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step600
|
last-checkpoint/model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4998056552
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03c7328d9e1f45351312f0574ab9b50cc0739c85c9a860a7ef8ec98e51a13393
|
| 3 |
size 4998056552
|
last-checkpoint/model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4915962464
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7bfb52488190ea3cf4ded5e45301f704ebd5bf10eb7b5b465f8d553e5090d505
|
| 3 |
size 4915962464
|
last-checkpoint/model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4915962496
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca3d879bfaf76c6064fd3dabe79ff3a9f374c38f77f4a00d18e34cedfbabfd55
|
| 3 |
size 4915962496
|
last-checkpoint/model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2704357976
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76fed592470092909399a550e3bf05f250cb2e02fe17c96e925b503becd41b31
|
| 3 |
size 2704357976
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e80278f55e8d70153299e2706453208b1be7c51ade602e3812c4d61736a1757b
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 2.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3858,6 +3858,356 @@
|
|
| 3858 |
"learning_rate": 5.190912599873818e-06,
|
| 3859 |
"loss": 0.0126,
|
| 3860 |
"step": 550
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3861 |
}
|
| 3862 |
],
|
| 3863 |
"logging_steps": 1,
|
|
@@ -3877,7 +4227,7 @@
|
|
| 3877 |
"attributes": {}
|
| 3878 |
}
|
| 3879 |
},
|
| 3880 |
-
"total_flos":
|
| 3881 |
"train_batch_size": 10,
|
| 3882 |
"trial_name": null,
|
| 3883 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 2.4300202839756593,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 600,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3858 |
"learning_rate": 5.190912599873818e-06,
|
| 3859 |
"loss": 0.0126,
|
| 3860 |
"step": 550
|
| 3861 |
+
},
|
| 3862 |
+
{
|
| 3863 |
+
"epoch": 2.231237322515213,
|
| 3864 |
+
"grad_norm": 0.24802505785116624,
|
| 3865 |
+
"learning_rate": 5.14029742304441e-06,
|
| 3866 |
+
"loss": 0.0108,
|
| 3867 |
+
"step": 551
|
| 3868 |
+
},
|
| 3869 |
+
{
|
| 3870 |
+
"epoch": 2.235294117647059,
|
| 3871 |
+
"grad_norm": 0.2595006809811974,
|
| 3872 |
+
"learning_rate": 5.089879149148781e-06,
|
| 3873 |
+
"loss": 0.0117,
|
| 3874 |
+
"step": 552
|
| 3875 |
+
},
|
| 3876 |
+
{
|
| 3877 |
+
"epoch": 2.239350912778905,
|
| 3878 |
+
"grad_norm": 0.25411628526886854,
|
| 3879 |
+
"learning_rate": 5.0396587850637554e-06,
|
| 3880 |
+
"loss": 0.0091,
|
| 3881 |
+
"step": 553
|
| 3882 |
+
},
|
| 3883 |
+
{
|
| 3884 |
+
"epoch": 2.2434077079107504,
|
| 3885 |
+
"grad_norm": 0.2612915952356164,
|
| 3886 |
+
"learning_rate": 4.989637333713814e-06,
|
| 3887 |
+
"loss": 0.01,
|
| 3888 |
+
"step": 554
|
| 3889 |
+
},
|
| 3890 |
+
{
|
| 3891 |
+
"epoch": 2.2474645030425964,
|
| 3892 |
+
"grad_norm": 0.26843182159434964,
|
| 3893 |
+
"learning_rate": 4.93981579405105e-06,
|
| 3894 |
+
"loss": 0.0111,
|
| 3895 |
+
"step": 555
|
| 3896 |
+
},
|
| 3897 |
+
{
|
| 3898 |
+
"epoch": 2.2515212981744424,
|
| 3899 |
+
"grad_norm": 0.21565821566578344,
|
| 3900 |
+
"learning_rate": 4.89019516103522e-06,
|
| 3901 |
+
"loss": 0.0093,
|
| 3902 |
+
"step": 556
|
| 3903 |
+
},
|
| 3904 |
+
{
|
| 3905 |
+
"epoch": 2.255578093306288,
|
| 3906 |
+
"grad_norm": 0.257015977515393,
|
| 3907 |
+
"learning_rate": 4.840776425613887e-06,
|
| 3908 |
+
"loss": 0.014,
|
| 3909 |
+
"step": 557
|
| 3910 |
+
},
|
| 3911 |
+
{
|
| 3912 |
+
"epoch": 2.259634888438134,
|
| 3913 |
+
"grad_norm": 0.20799379451848776,
|
| 3914 |
+
"learning_rate": 4.791560574702614e-06,
|
| 3915 |
+
"loss": 0.0082,
|
| 3916 |
+
"step": 558
|
| 3917 |
+
},
|
| 3918 |
+
{
|
| 3919 |
+
"epoch": 2.26369168356998,
|
| 3920 |
+
"grad_norm": 0.31771401066766947,
|
| 3921 |
+
"learning_rate": 4.742548591165289e-06,
|
| 3922 |
+
"loss": 0.0145,
|
| 3923 |
+
"step": 559
|
| 3924 |
+
},
|
| 3925 |
+
{
|
| 3926 |
+
"epoch": 2.2677484787018254,
|
| 3927 |
+
"grad_norm": 0.2951694121169533,
|
| 3928 |
+
"learning_rate": 4.693741453794433e-06,
|
| 3929 |
+
"loss": 0.0133,
|
| 3930 |
+
"step": 560
|
| 3931 |
+
},
|
| 3932 |
+
{
|
| 3933 |
+
"epoch": 2.2718052738336714,
|
| 3934 |
+
"grad_norm": 0.2628861243112249,
|
| 3935 |
+
"learning_rate": 4.6451401372917275e-06,
|
| 3936 |
+
"loss": 0.0099,
|
| 3937 |
+
"step": 561
|
| 3938 |
+
},
|
| 3939 |
+
{
|
| 3940 |
+
"epoch": 2.2758620689655173,
|
| 3941 |
+
"grad_norm": 0.22269435944973373,
|
| 3942 |
+
"learning_rate": 4.596745612248488e-06,
|
| 3943 |
+
"loss": 0.01,
|
| 3944 |
+
"step": 562
|
| 3945 |
+
},
|
| 3946 |
+
{
|
| 3947 |
+
"epoch": 2.279918864097363,
|
| 3948 |
+
"grad_norm": 0.28704577522446245,
|
| 3949 |
+
"learning_rate": 4.548558845126334e-06,
|
| 3950 |
+
"loss": 0.0133,
|
| 3951 |
+
"step": 563
|
| 3952 |
+
},
|
| 3953 |
+
{
|
| 3954 |
+
"epoch": 2.283975659229209,
|
| 3955 |
+
"grad_norm": 0.26309253454993753,
|
| 3956 |
+
"learning_rate": 4.500580798237831e-06,
|
| 3957 |
+
"loss": 0.0112,
|
| 3958 |
+
"step": 564
|
| 3959 |
+
},
|
| 3960 |
+
{
|
| 3961 |
+
"epoch": 2.288032454361055,
|
| 3962 |
+
"grad_norm": 0.36204770339604975,
|
| 3963 |
+
"learning_rate": 4.452812429727313e-06,
|
| 3964 |
+
"loss": 0.0102,
|
| 3965 |
+
"step": 565
|
| 3966 |
+
},
|
| 3967 |
+
{
|
| 3968 |
+
"epoch": 2.292089249492901,
|
| 3969 |
+
"grad_norm": 0.27129867464793533,
|
| 3970 |
+
"learning_rate": 4.405254693551754e-06,
|
| 3971 |
+
"loss": 0.0129,
|
| 3972 |
+
"step": 566
|
| 3973 |
+
},
|
| 3974 |
+
{
|
| 3975 |
+
"epoch": 2.2961460446247464,
|
| 3976 |
+
"grad_norm": 0.22000281062189,
|
| 3977 |
+
"learning_rate": 4.357908539461679e-06,
|
| 3978 |
+
"loss": 0.008,
|
| 3979 |
+
"step": 567
|
| 3980 |
+
},
|
| 3981 |
+
{
|
| 3982 |
+
"epoch": 2.3002028397565923,
|
| 3983 |
+
"grad_norm": 0.29624602171723224,
|
| 3984 |
+
"learning_rate": 4.310774912982227e-06,
|
| 3985 |
+
"loss": 0.0144,
|
| 3986 |
+
"step": 568
|
| 3987 |
+
},
|
| 3988 |
+
{
|
| 3989 |
+
"epoch": 2.3042596348884383,
|
| 3990 |
+
"grad_norm": 0.22414337252422092,
|
| 3991 |
+
"learning_rate": 4.263854755394256e-06,
|
| 3992 |
+
"loss": 0.0089,
|
| 3993 |
+
"step": 569
|
| 3994 |
+
},
|
| 3995 |
+
{
|
| 3996 |
+
"epoch": 2.308316430020284,
|
| 3997 |
+
"grad_norm": 0.28474418070215113,
|
| 3998 |
+
"learning_rate": 4.21714900371556e-06,
|
| 3999 |
+
"loss": 0.012,
|
| 4000 |
+
"step": 570
|
| 4001 |
+
},
|
| 4002 |
+
{
|
| 4003 |
+
"epoch": 2.31237322515213,
|
| 4004 |
+
"grad_norm": 0.210326196860165,
|
| 4005 |
+
"learning_rate": 4.170658590682134e-06,
|
| 4006 |
+
"loss": 0.0082,
|
| 4007 |
+
"step": 571
|
| 4008 |
+
},
|
| 4009 |
+
{
|
| 4010 |
+
"epoch": 2.316430020283976,
|
| 4011 |
+
"grad_norm": 0.25084743918519153,
|
| 4012 |
+
"learning_rate": 4.124384444729561e-06,
|
| 4013 |
+
"loss": 0.0089,
|
| 4014 |
+
"step": 572
|
| 4015 |
+
},
|
| 4016 |
+
{
|
| 4017 |
+
"epoch": 2.3204868154158214,
|
| 4018 |
+
"grad_norm": 0.28537259628253975,
|
| 4019 |
+
"learning_rate": 4.078327489974466e-06,
|
| 4020 |
+
"loss": 0.0113,
|
| 4021 |
+
"step": 573
|
| 4022 |
+
},
|
| 4023 |
+
{
|
| 4024 |
+
"epoch": 2.3245436105476673,
|
| 4025 |
+
"grad_norm": 0.22908276842060815,
|
| 4026 |
+
"learning_rate": 4.032488646196077e-06,
|
| 4027 |
+
"loss": 0.0101,
|
| 4028 |
+
"step": 574
|
| 4029 |
+
},
|
| 4030 |
+
{
|
| 4031 |
+
"epoch": 2.3286004056795133,
|
| 4032 |
+
"grad_norm": 0.23845424830115683,
|
| 4033 |
+
"learning_rate": 3.986868828817818e-06,
|
| 4034 |
+
"loss": 0.0082,
|
| 4035 |
+
"step": 575
|
| 4036 |
+
},
|
| 4037 |
+
{
|
| 4038 |
+
"epoch": 2.332657200811359,
|
| 4039 |
+
"grad_norm": 0.184143153098664,
|
| 4040 |
+
"learning_rate": 3.941468948889067e-06,
|
| 4041 |
+
"loss": 0.0068,
|
| 4042 |
+
"step": 576
|
| 4043 |
+
},
|
| 4044 |
+
{
|
| 4045 |
+
"epoch": 2.336713995943205,
|
| 4046 |
+
"grad_norm": 0.6122313601455306,
|
| 4047 |
+
"learning_rate": 3.8962899130669525e-06,
|
| 4048 |
+
"loss": 0.0108,
|
| 4049 |
+
"step": 577
|
| 4050 |
+
},
|
| 4051 |
+
{
|
| 4052 |
+
"epoch": 2.340770791075051,
|
| 4053 |
+
"grad_norm": 0.28166906523725416,
|
| 4054 |
+
"learning_rate": 3.851332623598227e-06,
|
| 4055 |
+
"loss": 0.0123,
|
| 4056 |
+
"step": 578
|
| 4057 |
+
},
|
| 4058 |
+
{
|
| 4059 |
+
"epoch": 2.344827586206897,
|
| 4060 |
+
"grad_norm": 0.25383591722677934,
|
| 4061 |
+
"learning_rate": 3.8065979783012746e-06,
|
| 4062 |
+
"loss": 0.0112,
|
| 4063 |
+
"step": 579
|
| 4064 |
+
},
|
| 4065 |
+
{
|
| 4066 |
+
"epoch": 2.3488843813387423,
|
| 4067 |
+
"grad_norm": 0.19603119074642442,
|
| 4068 |
+
"learning_rate": 3.7620868705481586e-06,
|
| 4069 |
+
"loss": 0.0079,
|
| 4070 |
+
"step": 580
|
| 4071 |
+
},
|
| 4072 |
+
{
|
| 4073 |
+
"epoch": 2.3529411764705883,
|
| 4074 |
+
"grad_norm": 0.6430341184106714,
|
| 4075 |
+
"learning_rate": 3.717800189246807e-06,
|
| 4076 |
+
"loss": 0.0079,
|
| 4077 |
+
"step": 581
|
| 4078 |
+
},
|
| 4079 |
+
{
|
| 4080 |
+
"epoch": 2.356997971602434,
|
| 4081 |
+
"grad_norm": 0.18811787242514347,
|
| 4082 |
+
"learning_rate": 3.6737388188232305e-06,
|
| 4083 |
+
"loss": 0.0083,
|
| 4084 |
+
"step": 582
|
| 4085 |
+
},
|
| 4086 |
+
{
|
| 4087 |
+
"epoch": 2.36105476673428,
|
| 4088 |
+
"grad_norm": 0.20181404498766184,
|
| 4089 |
+
"learning_rate": 3.629903639203884e-06,
|
| 4090 |
+
"loss": 0.0083,
|
| 4091 |
+
"step": 583
|
| 4092 |
+
},
|
| 4093 |
+
{
|
| 4094 |
+
"epoch": 2.365111561866126,
|
| 4095 |
+
"grad_norm": 0.277342849633144,
|
| 4096 |
+
"learning_rate": 3.5862955257980813e-06,
|
| 4097 |
+
"loss": 0.0117,
|
| 4098 |
+
"step": 584
|
| 4099 |
+
},
|
| 4100 |
+
{
|
| 4101 |
+
"epoch": 2.369168356997972,
|
| 4102 |
+
"grad_norm": 0.33333350892821556,
|
| 4103 |
+
"learning_rate": 3.5429153494805087e-06,
|
| 4104 |
+
"loss": 0.0123,
|
| 4105 |
+
"step": 585
|
| 4106 |
+
},
|
| 4107 |
+
{
|
| 4108 |
+
"epoch": 2.3732251521298173,
|
| 4109 |
+
"grad_norm": 0.23513638472728224,
|
| 4110 |
+
"learning_rate": 3.499763976573866e-06,
|
| 4111 |
+
"loss": 0.0108,
|
| 4112 |
+
"step": 586
|
| 4113 |
+
},
|
| 4114 |
+
{
|
| 4115 |
+
"epoch": 2.3772819472616633,
|
| 4116 |
+
"grad_norm": 0.9093270963617072,
|
| 4117 |
+
"learning_rate": 3.4568422688315027e-06,
|
| 4118 |
+
"loss": 0.0142,
|
| 4119 |
+
"step": 587
|
| 4120 |
+
},
|
| 4121 |
+
{
|
| 4122 |
+
"epoch": 2.3813387423935093,
|
| 4123 |
+
"grad_norm": 0.34746001689384526,
|
| 4124 |
+
"learning_rate": 3.41415108342028e-06,
|
| 4125 |
+
"loss": 0.0091,
|
| 4126 |
+
"step": 588
|
| 4127 |
+
},
|
| 4128 |
+
{
|
| 4129 |
+
"epoch": 2.385395537525355,
|
| 4130 |
+
"grad_norm": 0.2173220084157316,
|
| 4131 |
+
"learning_rate": 3.371691272903398e-06,
|
| 4132 |
+
"loss": 0.0072,
|
| 4133 |
+
"step": 589
|
| 4134 |
+
},
|
| 4135 |
+
{
|
| 4136 |
+
"epoch": 2.389452332657201,
|
| 4137 |
+
"grad_norm": 0.24334697959441584,
|
| 4138 |
+
"learning_rate": 3.329463685223411e-06,
|
| 4139 |
+
"loss": 0.01,
|
| 4140 |
+
"step": 590
|
| 4141 |
+
},
|
| 4142 |
+
{
|
| 4143 |
+
"epoch": 2.393509127789047,
|
| 4144 |
+
"grad_norm": 0.17003427758882395,
|
| 4145 |
+
"learning_rate": 3.287469163685241e-06,
|
| 4146 |
+
"loss": 0.0078,
|
| 4147 |
+
"step": 591
|
| 4148 |
+
},
|
| 4149 |
+
{
|
| 4150 |
+
"epoch": 2.3975659229208923,
|
| 4151 |
+
"grad_norm": 0.275983232519286,
|
| 4152 |
+
"learning_rate": 3.2457085469394015e-06,
|
| 4153 |
+
"loss": 0.0098,
|
| 4154 |
+
"step": 592
|
| 4155 |
+
},
|
| 4156 |
+
{
|
| 4157 |
+
"epoch": 2.4016227180527383,
|
| 4158 |
+
"grad_norm": 0.2634629301055357,
|
| 4159 |
+
"learning_rate": 3.204182668965198e-06,
|
| 4160 |
+
"loss": 0.0093,
|
| 4161 |
+
"step": 593
|
| 4162 |
+
},
|
| 4163 |
+
{
|
| 4164 |
+
"epoch": 2.4056795131845843,
|
| 4165 |
+
"grad_norm": 0.22325919432645464,
|
| 4166 |
+
"learning_rate": 3.162892359054098e-06,
|
| 4167 |
+
"loss": 0.0102,
|
| 4168 |
+
"step": 594
|
| 4169 |
+
},
|
| 4170 |
+
{
|
| 4171 |
+
"epoch": 2.40973630831643,
|
| 4172 |
+
"grad_norm": 0.18144358371062383,
|
| 4173 |
+
"learning_rate": 3.12183844179316e-06,
|
| 4174 |
+
"loss": 0.0072,
|
| 4175 |
+
"step": 595
|
| 4176 |
+
},
|
| 4177 |
+
{
|
| 4178 |
+
"epoch": 2.413793103448276,
|
| 4179 |
+
"grad_norm": 0.19726131763241822,
|
| 4180 |
+
"learning_rate": 3.081021737048565e-06,
|
| 4181 |
+
"loss": 0.0079,
|
| 4182 |
+
"step": 596
|
| 4183 |
+
},
|
| 4184 |
+
{
|
| 4185 |
+
"epoch": 2.417849898580122,
|
| 4186 |
+
"grad_norm": 0.2681563404805776,
|
| 4187 |
+
"learning_rate": 3.040443059949264e-06,
|
| 4188 |
+
"loss": 0.0124,
|
| 4189 |
+
"step": 597
|
| 4190 |
+
},
|
| 4191 |
+
{
|
| 4192 |
+
"epoch": 2.4219066937119678,
|
| 4193 |
+
"grad_norm": 2.0846202190030847,
|
| 4194 |
+
"learning_rate": 3.0001032208706653e-06,
|
| 4195 |
+
"loss": 0.0108,
|
| 4196 |
+
"step": 598
|
| 4197 |
+
},
|
| 4198 |
+
{
|
| 4199 |
+
"epoch": 2.4259634888438133,
|
| 4200 |
+
"grad_norm": 0.20806969883961424,
|
| 4201 |
+
"learning_rate": 2.960003025418478e-06,
|
| 4202 |
+
"loss": 0.0082,
|
| 4203 |
+
"step": 599
|
| 4204 |
+
},
|
| 4205 |
+
{
|
| 4206 |
+
"epoch": 2.4300202839756593,
|
| 4207 |
+
"grad_norm": 0.21726326567282914,
|
| 4208 |
+
"learning_rate": 2.9201432744126074e-06,
|
| 4209 |
+
"loss": 0.0114,
|
| 4210 |
+
"step": 600
|
| 4211 |
}
|
| 4212 |
],
|
| 4213 |
"logging_steps": 1,
|
|
|
|
| 4227 |
"attributes": {}
|
| 4228 |
}
|
| 4229 |
},
|
| 4230 |
+
"total_flos": 4063291632517120.0,
|
| 4231 |
"train_batch_size": 10,
|
| 4232 |
"trial_name": null,
|
| 4233 |
"trial_params": null
|