ByungOh-Ko
commited on
Commit
·
55df646
1
Parent(s):
3ec9009
update
Browse files- SimPO_Beta_2.5_Gamma_1.5/README.md +63 -0
- SimPO_Beta_2.5_Gamma_1.5/adapter_config.json +42 -0
- SimPO_Beta_2.5_Gamma_1.5/adapter_model.safetensors +3 -0
- SimPO_Beta_2.5_Gamma_1.5/added_tokens.json +16 -0
- SimPO_Beta_2.5_Gamma_1.5/all_results.json +8 -0
- SimPO_Beta_2.5_Gamma_1.5/chat_template.jinja +7 -0
- SimPO_Beta_2.5_Gamma_1.5/merges.txt +0 -0
- SimPO_Beta_2.5_Gamma_1.5/preprocessor_config.json +29 -0
- SimPO_Beta_2.5_Gamma_1.5/special_tokens_map.json +31 -0
- SimPO_Beta_2.5_Gamma_1.5/tokenizer_config.json +144 -0
- SimPO_Beta_2.5_Gamma_1.5/train_results.json +8 -0
- SimPO_Beta_2.5_Gamma_1.5/trainer_log.jsonl +189 -0
- SimPO_Beta_2.5_Gamma_1.5/trainer_state.json +0 -0
- SimPO_Beta_2.5_Gamma_1.5/training_args.bin +3 -0
- SimPO_Beta_2.5_Gamma_1.5/training_loss.png +0 -0
- SimPO_Beta_2.5_Gamma_1.5/training_rewards_accuracies.png +0 -0
- SimPO_Beta_2.5_Gamma_1.5/video_preprocessor_config.json +43 -0
- SimPO_Beta_2.5_Gamma_1.5/vocab.json +0 -0
SimPO_Beta_2.5_Gamma_1.5/README.md
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: peft
|
| 3 |
+
license: other
|
| 4 |
+
base_model: ''
|
| 5 |
+
tags:
|
| 6 |
+
- base_model:adapter:Qwen/Qwen2-VL-2B-Instruct
|
| 7 |
+
- llama-factory
|
| 8 |
+
- lora
|
| 9 |
+
- transformers
|
| 10 |
+
pipeline_tag: text-generation
|
| 11 |
+
model-index:
|
| 12 |
+
- name: SimPO_Beta_2.5_Gamma_1.5
|
| 13 |
+
results: []
|
| 14 |
+
---
|
| 15 |
+
|
| 16 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
| 17 |
+
should probably proofread and complete it, then remove this comment. -->
|
| 18 |
+
|
| 19 |
+
# SimPO_Beta_2.5_Gamma_1.5
|
| 20 |
+
|
| 21 |
+
This model is a fine-tuned version of [Qwen/Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct) on the Qwen2_VL_2B_Instruct_12k dataset.
|
| 22 |
+
|
| 23 |
+
## Model description
|
| 24 |
+
|
| 25 |
+
More information needed
|
| 26 |
+
|
| 27 |
+
## Intended uses & limitations
|
| 28 |
+
|
| 29 |
+
More information needed
|
| 30 |
+
|
| 31 |
+
## Training and evaluation data
|
| 32 |
+
|
| 33 |
+
More information needed
|
| 34 |
+
|
| 35 |
+
## Training procedure
|
| 36 |
+
|
| 37 |
+
### Training hyperparameters
|
| 38 |
+
|
| 39 |
+
The following hyperparameters were used during training:
|
| 40 |
+
- learning_rate: 2e-06
|
| 41 |
+
- train_batch_size: 1
|
| 42 |
+
- eval_batch_size: 8
|
| 43 |
+
- seed: 42
|
| 44 |
+
- distributed_type: multi-GPU
|
| 45 |
+
- num_devices: 4
|
| 46 |
+
- gradient_accumulation_steps: 16
|
| 47 |
+
- total_train_batch_size: 64
|
| 48 |
+
- total_eval_batch_size: 32
|
| 49 |
+
- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
| 50 |
+
- lr_scheduler_type: cosine
|
| 51 |
+
- num_epochs: 1.0
|
| 52 |
+
|
| 53 |
+
### Training results
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
### Framework versions
|
| 58 |
+
|
| 59 |
+
- PEFT 0.17.1
|
| 60 |
+
- Transformers 4.57.1
|
| 61 |
+
- Pytorch 2.5.1+cu121
|
| 62 |
+
- Datasets 4.0.0
|
| 63 |
+
- Tokenizers 0.22.1
|
SimPO_Beta_2.5_Gamma_1.5/adapter_config.json
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha_pattern": {},
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "Qwen/Qwen2-VL-2B-Instruct",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"corda_config": null,
|
| 7 |
+
"eva_config": null,
|
| 8 |
+
"exclude_modules": null,
|
| 9 |
+
"fan_in_fan_out": false,
|
| 10 |
+
"inference_mode": true,
|
| 11 |
+
"init_lora_weights": true,
|
| 12 |
+
"layer_replication": null,
|
| 13 |
+
"layers_pattern": null,
|
| 14 |
+
"layers_to_transform": null,
|
| 15 |
+
"loftq_config": {},
|
| 16 |
+
"lora_alpha": 256,
|
| 17 |
+
"lora_bias": false,
|
| 18 |
+
"lora_dropout": 0.0,
|
| 19 |
+
"megatron_config": null,
|
| 20 |
+
"megatron_core": "megatron.core",
|
| 21 |
+
"modules_to_save": null,
|
| 22 |
+
"peft_type": "LORA",
|
| 23 |
+
"qalora_group_size": 16,
|
| 24 |
+
"r": 128,
|
| 25 |
+
"rank_pattern": {},
|
| 26 |
+
"revision": null,
|
| 27 |
+
"target_modules": [
|
| 28 |
+
"q_proj",
|
| 29 |
+
"k_proj",
|
| 30 |
+
"o_proj",
|
| 31 |
+
"gate_proj",
|
| 32 |
+
"up_proj",
|
| 33 |
+
"down_proj",
|
| 34 |
+
"v_proj"
|
| 35 |
+
],
|
| 36 |
+
"target_parameters": null,
|
| 37 |
+
"task_type": "CAUSAL_LM",
|
| 38 |
+
"trainable_token_indices": null,
|
| 39 |
+
"use_dora": false,
|
| 40 |
+
"use_qalora": false,
|
| 41 |
+
"use_rslora": false
|
| 42 |
+
}
|
SimPO_Beta_2.5_Gamma_1.5/adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6361802ccefa899e67dca3a30d39402610f5c1e4a52359aaaf3e4607e519a267
|
| 3 |
+
size 295495600
|
SimPO_Beta_2.5_Gamma_1.5/added_tokens.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"<|box_end|>": 151649,
|
| 3 |
+
"<|box_start|>": 151648,
|
| 4 |
+
"<|endoftext|>": 151643,
|
| 5 |
+
"<|im_end|>": 151645,
|
| 6 |
+
"<|im_start|>": 151644,
|
| 7 |
+
"<|image_pad|>": 151655,
|
| 8 |
+
"<|object_ref_end|>": 151647,
|
| 9 |
+
"<|object_ref_start|>": 151646,
|
| 10 |
+
"<|quad_end|>": 151651,
|
| 11 |
+
"<|quad_start|>": 151650,
|
| 12 |
+
"<|video_pad|>": 151656,
|
| 13 |
+
"<|vision_end|>": 151653,
|
| 14 |
+
"<|vision_pad|>": 151654,
|
| 15 |
+
"<|vision_start|>": 151652
|
| 16 |
+
}
|
SimPO_Beta_2.5_Gamma_1.5/all_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.0,
|
| 3 |
+
"total_flos": 1.392373855640617e+17,
|
| 4 |
+
"train_loss": 1.7203158742569862,
|
| 5 |
+
"train_runtime": 1743.476,
|
| 6 |
+
"train_samples_per_second": 6.883,
|
| 7 |
+
"train_steps_per_second": 0.108
|
| 8 |
+
}
|
SimPO_Beta_2.5_Gamma_1.5/chat_template.jinja
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
|
| 2 |
+
You are a helpful assistant.<|im_end|>
|
| 3 |
+
{% endif %}<|im_start|>{{ message['role'] }}
|
| 4 |
+
{% if message['content'] is string %}{{ message['content'] }}<|im_end|>
|
| 5 |
+
{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
|
| 6 |
+
{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
|
| 7 |
+
{% endif %}
|
SimPO_Beta_2.5_Gamma_1.5/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
SimPO_Beta_2.5_Gamma_1.5/preprocessor_config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_convert_rgb": true,
|
| 3 |
+
"do_normalize": true,
|
| 4 |
+
"do_rescale": true,
|
| 5 |
+
"do_resize": true,
|
| 6 |
+
"image_mean": [
|
| 7 |
+
0.48145466,
|
| 8 |
+
0.4578275,
|
| 9 |
+
0.40821073
|
| 10 |
+
],
|
| 11 |
+
"image_processor_type": "Qwen2VLImageProcessor",
|
| 12 |
+
"image_std": [
|
| 13 |
+
0.26862954,
|
| 14 |
+
0.26130258,
|
| 15 |
+
0.27577711
|
| 16 |
+
],
|
| 17 |
+
"max_pixels": 12845056,
|
| 18 |
+
"merge_size": 2,
|
| 19 |
+
"min_pixels": 3136,
|
| 20 |
+
"patch_size": 14,
|
| 21 |
+
"processor_class": "Qwen2VLProcessor",
|
| 22 |
+
"resample": 3,
|
| 23 |
+
"rescale_factor": 0.00392156862745098,
|
| 24 |
+
"size": {
|
| 25 |
+
"longest_edge": 12845056,
|
| 26 |
+
"shortest_edge": 3136
|
| 27 |
+
},
|
| 28 |
+
"temporal_patch_size": 2
|
| 29 |
+
}
|
SimPO_Beta_2.5_Gamma_1.5/special_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|im_end|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": {
|
| 25 |
+
"content": "<|endoftext|>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|
SimPO_Beta_2.5_Gamma_1.5/tokenizer_config.json
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"151643": {
|
| 5 |
+
"content": "<|endoftext|>",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": false,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
},
|
| 12 |
+
"151644": {
|
| 13 |
+
"content": "<|im_start|>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": false,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false,
|
| 18 |
+
"special": true
|
| 19 |
+
},
|
| 20 |
+
"151645": {
|
| 21 |
+
"content": "<|im_end|>",
|
| 22 |
+
"lstrip": false,
|
| 23 |
+
"normalized": false,
|
| 24 |
+
"rstrip": false,
|
| 25 |
+
"single_word": false,
|
| 26 |
+
"special": true
|
| 27 |
+
},
|
| 28 |
+
"151646": {
|
| 29 |
+
"content": "<|object_ref_start|>",
|
| 30 |
+
"lstrip": false,
|
| 31 |
+
"normalized": false,
|
| 32 |
+
"rstrip": false,
|
| 33 |
+
"single_word": false,
|
| 34 |
+
"special": true
|
| 35 |
+
},
|
| 36 |
+
"151647": {
|
| 37 |
+
"content": "<|object_ref_end|>",
|
| 38 |
+
"lstrip": false,
|
| 39 |
+
"normalized": false,
|
| 40 |
+
"rstrip": false,
|
| 41 |
+
"single_word": false,
|
| 42 |
+
"special": true
|
| 43 |
+
},
|
| 44 |
+
"151648": {
|
| 45 |
+
"content": "<|box_start|>",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": false,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false,
|
| 50 |
+
"special": true
|
| 51 |
+
},
|
| 52 |
+
"151649": {
|
| 53 |
+
"content": "<|box_end|>",
|
| 54 |
+
"lstrip": false,
|
| 55 |
+
"normalized": false,
|
| 56 |
+
"rstrip": false,
|
| 57 |
+
"single_word": false,
|
| 58 |
+
"special": true
|
| 59 |
+
},
|
| 60 |
+
"151650": {
|
| 61 |
+
"content": "<|quad_start|>",
|
| 62 |
+
"lstrip": false,
|
| 63 |
+
"normalized": false,
|
| 64 |
+
"rstrip": false,
|
| 65 |
+
"single_word": false,
|
| 66 |
+
"special": true
|
| 67 |
+
},
|
| 68 |
+
"151651": {
|
| 69 |
+
"content": "<|quad_end|>",
|
| 70 |
+
"lstrip": false,
|
| 71 |
+
"normalized": false,
|
| 72 |
+
"rstrip": false,
|
| 73 |
+
"single_word": false,
|
| 74 |
+
"special": true
|
| 75 |
+
},
|
| 76 |
+
"151652": {
|
| 77 |
+
"content": "<|vision_start|>",
|
| 78 |
+
"lstrip": false,
|
| 79 |
+
"normalized": false,
|
| 80 |
+
"rstrip": false,
|
| 81 |
+
"single_word": false,
|
| 82 |
+
"special": true
|
| 83 |
+
},
|
| 84 |
+
"151653": {
|
| 85 |
+
"content": "<|vision_end|>",
|
| 86 |
+
"lstrip": false,
|
| 87 |
+
"normalized": false,
|
| 88 |
+
"rstrip": false,
|
| 89 |
+
"single_word": false,
|
| 90 |
+
"special": true
|
| 91 |
+
},
|
| 92 |
+
"151654": {
|
| 93 |
+
"content": "<|vision_pad|>",
|
| 94 |
+
"lstrip": false,
|
| 95 |
+
"normalized": false,
|
| 96 |
+
"rstrip": false,
|
| 97 |
+
"single_word": false,
|
| 98 |
+
"special": true
|
| 99 |
+
},
|
| 100 |
+
"151655": {
|
| 101 |
+
"content": "<|image_pad|>",
|
| 102 |
+
"lstrip": false,
|
| 103 |
+
"normalized": false,
|
| 104 |
+
"rstrip": false,
|
| 105 |
+
"single_word": false,
|
| 106 |
+
"special": true
|
| 107 |
+
},
|
| 108 |
+
"151656": {
|
| 109 |
+
"content": "<|video_pad|>",
|
| 110 |
+
"lstrip": false,
|
| 111 |
+
"normalized": false,
|
| 112 |
+
"rstrip": false,
|
| 113 |
+
"single_word": false,
|
| 114 |
+
"special": true
|
| 115 |
+
}
|
| 116 |
+
},
|
| 117 |
+
"additional_special_tokens": [
|
| 118 |
+
"<|im_start|>",
|
| 119 |
+
"<|im_end|>",
|
| 120 |
+
"<|object_ref_start|>",
|
| 121 |
+
"<|object_ref_end|>",
|
| 122 |
+
"<|box_start|>",
|
| 123 |
+
"<|box_end|>",
|
| 124 |
+
"<|quad_start|>",
|
| 125 |
+
"<|quad_end|>",
|
| 126 |
+
"<|vision_start|>",
|
| 127 |
+
"<|vision_end|>",
|
| 128 |
+
"<|vision_pad|>",
|
| 129 |
+
"<|image_pad|>",
|
| 130 |
+
"<|video_pad|>"
|
| 131 |
+
],
|
| 132 |
+
"bos_token": null,
|
| 133 |
+
"clean_up_tokenization_spaces": false,
|
| 134 |
+
"eos_token": "<|im_end|>",
|
| 135 |
+
"errors": "replace",
|
| 136 |
+
"extra_special_tokens": {},
|
| 137 |
+
"model_max_length": 32768,
|
| 138 |
+
"pad_token": "<|endoftext|>",
|
| 139 |
+
"padding_side": "right",
|
| 140 |
+
"processor_class": "Qwen2VLProcessor",
|
| 141 |
+
"split_special_tokens": false,
|
| 142 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 143 |
+
"unk_token": null
|
| 144 |
+
}
|
SimPO_Beta_2.5_Gamma_1.5/train_results.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.0,
|
| 3 |
+
"total_flos": 1.392373855640617e+17,
|
| 4 |
+
"train_loss": 1.7203158742569862,
|
| 5 |
+
"train_runtime": 1743.476,
|
| 6 |
+
"train_samples_per_second": 6.883,
|
| 7 |
+
"train_steps_per_second": 0.108
|
| 8 |
+
}
|
SimPO_Beta_2.5_Gamma_1.5/trainer_log.jsonl
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"current_steps": 1, "total_steps": 188, "loss": 1.8444, "accuracy": 0.484375, "lr": 2e-06, "epoch": 0.005333333333333333, "percentage": 0.53, "elapsed_time": "0:00:10", "remaining_time": "0:31:38"}
|
| 2 |
+
{"current_steps": 2, "total_steps": 188, "loss": 1.5274, "accuracy": 0.578125, "lr": 1.999860381185857e-06, "epoch": 0.010666666666666666, "percentage": 1.06, "elapsed_time": "0:00:19", "remaining_time": "0:30:36"}
|
| 3 |
+
{"current_steps": 3, "total_steps": 188, "loss": 1.5387, "accuracy": 0.671875, "lr": 1.9994415637302546e-06, "epoch": 0.016, "percentage": 1.6, "elapsed_time": "0:00:30", "remaining_time": "0:30:51"}
|
| 4 |
+
{"current_steps": 4, "total_steps": 188, "loss": 1.7716, "accuracy": 0.5625, "lr": 1.998743664582786e-06, "epoch": 0.021333333333333333, "percentage": 2.13, "elapsed_time": "0:00:38", "remaining_time": "0:29:15"}
|
| 5 |
+
{"current_steps": 5, "total_steps": 188, "loss": 2.1547, "accuracy": 0.359375, "lr": 1.9977668786231533e-06, "epoch": 0.02666666666666667, "percentage": 2.66, "elapsed_time": "0:00:47", "remaining_time": "0:28:43"}
|
| 6 |
+
{"current_steps": 6, "total_steps": 188, "loss": 1.6678, "accuracy": 0.484375, "lr": 1.9965114786067515e-06, "epoch": 0.032, "percentage": 3.19, "elapsed_time": "0:00:56", "remaining_time": "0:28:39"}
|
| 7 |
+
{"current_steps": 7, "total_steps": 188, "loss": 1.7993, "accuracy": 0.453125, "lr": 1.994977815088504e-06, "epoch": 0.037333333333333336, "percentage": 3.72, "elapsed_time": "0:01:05", "remaining_time": "0:28:03"}
|
| 8 |
+
{"current_steps": 8, "total_steps": 188, "loss": 1.8437, "accuracy": 0.421875, "lr": 1.993166316324974e-06, "epoch": 0.042666666666666665, "percentage": 4.26, "elapsed_time": "0:01:16", "remaining_time": "0:28:33"}
|
| 9 |
+
{"current_steps": 9, "total_steps": 188, "loss": 1.6577, "accuracy": 0.421875, "lr": 1.99107748815478e-06, "epoch": 0.048, "percentage": 4.79, "elapsed_time": "0:01:25", "remaining_time": "0:28:13"}
|
| 10 |
+
{"current_steps": 10, "total_steps": 188, "loss": 1.7743, "accuracy": 0.4375, "lr": 1.988711913857346e-06, "epoch": 0.05333333333333334, "percentage": 5.32, "elapsed_time": "0:01:33", "remaining_time": "0:27:46"}
|
| 11 |
+
{"current_steps": 11, "total_steps": 188, "loss": 1.892, "accuracy": 0.421875, "lr": 1.9860702539900285e-06, "epoch": 0.058666666666666666, "percentage": 5.85, "elapsed_time": "0:01:42", "remaining_time": "0:27:22"}
|
| 12 |
+
{"current_steps": 12, "total_steps": 188, "loss": 1.7764, "accuracy": 0.4375, "lr": 1.9831532462036635e-06, "epoch": 0.064, "percentage": 6.38, "elapsed_time": "0:01:51", "remaining_time": "0:27:17"}
|
| 13 |
+
{"current_steps": 13, "total_steps": 188, "loss": 1.7401, "accuracy": 0.46875, "lr": 1.9799617050365867e-06, "epoch": 0.06933333333333333, "percentage": 6.91, "elapsed_time": "0:02:02", "remaining_time": "0:27:29"}
|
| 14 |
+
{"current_steps": 14, "total_steps": 188, "loss": 1.9175, "accuracy": 0.421875, "lr": 1.9764965216871843e-06, "epoch": 0.07466666666666667, "percentage": 7.45, "elapsed_time": "0:02:12", "remaining_time": "0:27:20"}
|
| 15 |
+
{"current_steps": 15, "total_steps": 188, "loss": 1.6756, "accuracy": 0.46875, "lr": 1.9727586637650373e-06, "epoch": 0.08, "percentage": 7.98, "elapsed_time": "0:02:20", "remaining_time": "0:27:02"}
|
| 16 |
+
{"current_steps": 16, "total_steps": 188, "loss": 1.7047, "accuracy": 0.515625, "lr": 1.9687491750207253e-06, "epoch": 0.08533333333333333, "percentage": 8.51, "elapsed_time": "0:02:29", "remaining_time": "0:26:43"}
|
| 17 |
+
{"current_steps": 17, "total_steps": 188, "loss": 1.745, "accuracy": 0.46875, "lr": 1.9644691750543766e-06, "epoch": 0.09066666666666667, "percentage": 9.04, "elapsed_time": "0:02:38", "remaining_time": "0:26:38"}
|
| 18 |
+
{"current_steps": 18, "total_steps": 188, "loss": 1.5597, "accuracy": 0.609375, "lr": 1.9599198590030305e-06, "epoch": 0.096, "percentage": 9.57, "elapsed_time": "0:02:47", "remaining_time": "0:26:18"}
|
| 19 |
+
{"current_steps": 19, "total_steps": 188, "loss": 1.7817, "accuracy": 0.453125, "lr": 1.955102497206912e-06, "epoch": 0.10133333333333333, "percentage": 10.11, "elapsed_time": "0:02:55", "remaining_time": "0:26:05"}
|
| 20 |
+
{"current_steps": 20, "total_steps": 188, "loss": 1.8861, "accuracy": 0.359375, "lr": 1.950018434854704e-06, "epoch": 0.10666666666666667, "percentage": 10.64, "elapsed_time": "0:03:05", "remaining_time": "0:25:55"}
|
| 21 |
+
{"current_steps": 21, "total_steps": 188, "loss": 1.6677, "accuracy": 0.5, "lr": 1.9446690916079185e-06, "epoch": 0.112, "percentage": 11.17, "elapsed_time": "0:03:13", "remaining_time": "0:25:39"}
|
| 22 |
+
{"current_steps": 22, "total_steps": 188, "loss": 1.7143, "accuracy": 0.421875, "lr": 1.939055961204478e-06, "epoch": 0.11733333333333333, "percentage": 11.7, "elapsed_time": "0:03:23", "remaining_time": "0:25:32"}
|
| 23 |
+
{"current_steps": 23, "total_steps": 188, "loss": 1.8268, "accuracy": 0.453125, "lr": 1.9331806110416025e-06, "epoch": 0.12266666666666666, "percentage": 12.23, "elapsed_time": "0:03:31", "remaining_time": "0:25:18"}
|
| 24 |
+
{"current_steps": 24, "total_steps": 188, "loss": 1.6726, "accuracy": 0.5625, "lr": 1.9270446817381372e-06, "epoch": 0.128, "percentage": 12.77, "elapsed_time": "0:03:41", "remaining_time": "0:25:11"}
|
| 25 |
+
{"current_steps": 25, "total_steps": 188, "loss": 1.997, "accuracy": 0.4375, "lr": 1.920649886676429e-06, "epoch": 0.13333333333333333, "percentage": 13.3, "elapsed_time": "0:03:49", "remaining_time": "0:24:57"}
|
| 26 |
+
{"current_steps": 26, "total_steps": 188, "loss": 1.7471, "accuracy": 0.5, "lr": 1.9139980115238826e-06, "epoch": 0.13866666666666666, "percentage": 13.83, "elapsed_time": "0:03:58", "remaining_time": "0:24:45"}
|
| 27 |
+
{"current_steps": 27, "total_steps": 188, "loss": 1.8404, "accuracy": 0.390625, "lr": 1.9070909137343408e-06, "epoch": 0.144, "percentage": 14.36, "elapsed_time": "0:04:07", "remaining_time": "0:24:33"}
|
| 28 |
+
{"current_steps": 28, "total_steps": 188, "loss": 1.6294, "accuracy": 0.53125, "lr": 1.8999305220294077e-06, "epoch": 0.14933333333333335, "percentage": 14.89, "elapsed_time": "0:04:16", "remaining_time": "0:24:24"}
|
| 29 |
+
{"current_steps": 29, "total_steps": 188, "loss": 2.0411, "accuracy": 0.359375, "lr": 1.892518835859881e-06, "epoch": 0.15466666666666667, "percentage": 15.43, "elapsed_time": "0:04:25", "remaining_time": "0:24:13"}
|
| 30 |
+
{"current_steps": 30, "total_steps": 188, "loss": 1.8724, "accuracy": 0.421875, "lr": 1.8848579248474285e-06, "epoch": 0.16, "percentage": 15.96, "elapsed_time": "0:04:35", "remaining_time": "0:24:08"}
|
| 31 |
+
{"current_steps": 31, "total_steps": 188, "loss": 1.7089, "accuracy": 0.40625, "lr": 1.8769499282066714e-06, "epoch": 0.16533333333333333, "percentage": 16.49, "elapsed_time": "0:04:44", "remaining_time": "0:23:59"}
|
| 32 |
+
{"current_steps": 32, "total_steps": 188, "loss": 1.6992, "accuracy": 0.46875, "lr": 1.8687970541478364e-06, "epoch": 0.17066666666666666, "percentage": 17.02, "elapsed_time": "0:04:53", "remaining_time": "0:23:52"}
|
| 33 |
+
{"current_steps": 33, "total_steps": 188, "loss": 2.0114, "accuracy": 0.40625, "lr": 1.8604015792601394e-06, "epoch": 0.176, "percentage": 17.55, "elapsed_time": "0:05:03", "remaining_time": "0:23:43"}
|
| 34 |
+
{"current_steps": 34, "total_steps": 188, "loss": 1.8528, "accuracy": 0.46875, "lr": 1.8517658478760761e-06, "epoch": 0.18133333333333335, "percentage": 18.09, "elapsed_time": "0:05:11", "remaining_time": "0:23:32"}
|
| 35 |
+
{"current_steps": 35, "total_steps": 188, "loss": 1.9673, "accuracy": 0.46875, "lr": 1.842892271416797e-06, "epoch": 0.18666666666666668, "percentage": 18.62, "elapsed_time": "0:05:20", "remaining_time": "0:23:22"}
|
| 36 |
+
{"current_steps": 36, "total_steps": 188, "loss": 1.6934, "accuracy": 0.484375, "lr": 1.833783327718747e-06, "epoch": 0.192, "percentage": 19.15, "elapsed_time": "0:05:31", "remaining_time": "0:23:19"}
|
| 37 |
+
{"current_steps": 37, "total_steps": 188, "loss": 1.8209, "accuracy": 0.453125, "lr": 1.8244415603417603e-06, "epoch": 0.19733333333333333, "percentage": 19.68, "elapsed_time": "0:05:41", "remaining_time": "0:23:15"}
|
| 38 |
+
{"current_steps": 38, "total_steps": 188, "loss": 1.7404, "accuracy": 0.515625, "lr": 1.8148695778588032e-06, "epoch": 0.20266666666666666, "percentage": 20.21, "elapsed_time": "0:05:50", "remaining_time": "0:23:04"}
|
| 39 |
+
{"current_steps": 39, "total_steps": 188, "loss": 1.7009, "accuracy": 0.515625, "lr": 1.805070053127563e-06, "epoch": 0.208, "percentage": 20.74, "elapsed_time": "0:06:00", "remaining_time": "0:22:55"}
|
| 40 |
+
{"current_steps": 40, "total_steps": 188, "loss": 1.7118, "accuracy": 0.5625, "lr": 1.795045722544083e-06, "epoch": 0.21333333333333335, "percentage": 21.28, "elapsed_time": "0:06:08", "remaining_time": "0:22:44"}
|
| 41 |
+
{"current_steps": 41, "total_steps": 188, "loss": 1.5965, "accuracy": 0.5625, "lr": 1.784799385278661e-06, "epoch": 0.21866666666666668, "percentage": 21.81, "elapsed_time": "0:06:18", "remaining_time": "0:22:37"}
|
| 42 |
+
{"current_steps": 42, "total_steps": 188, "loss": 1.6453, "accuracy": 0.46875, "lr": 1.7743339024942135e-06, "epoch": 0.224, "percentage": 22.34, "elapsed_time": "0:06:27", "remaining_time": "0:22:28"}
|
| 43 |
+
{"current_steps": 43, "total_steps": 188, "loss": 1.8012, "accuracy": 0.453125, "lr": 1.7636521965473321e-06, "epoch": 0.22933333333333333, "percentage": 22.87, "elapsed_time": "0:06:36", "remaining_time": "0:22:16"}
|
| 44 |
+
{"current_steps": 44, "total_steps": 188, "loss": 2.0332, "accuracy": 0.421875, "lr": 1.7527572501722513e-06, "epoch": 0.23466666666666666, "percentage": 23.4, "elapsed_time": "0:06:46", "remaining_time": "0:22:10"}
|
| 45 |
+
{"current_steps": 45, "total_steps": 188, "loss": 1.5592, "accuracy": 0.5625, "lr": 1.7416521056479575e-06, "epoch": 0.24, "percentage": 23.94, "elapsed_time": "0:06:56", "remaining_time": "0:22:02"}
|
| 46 |
+
{"current_steps": 46, "total_steps": 188, "loss": 1.9044, "accuracy": 0.484375, "lr": 1.7303398639486693e-06, "epoch": 0.24533333333333332, "percentage": 24.47, "elapsed_time": "0:07:05", "remaining_time": "0:21:52"}
|
| 47 |
+
{"current_steps": 47, "total_steps": 188, "loss": 1.7462, "accuracy": 0.421875, "lr": 1.7188236838779293e-06, "epoch": 0.25066666666666665, "percentage": 25.0, "elapsed_time": "0:07:13", "remaining_time": "0:21:41"}
|
| 48 |
+
{"current_steps": 48, "total_steps": 188, "loss": 1.9812, "accuracy": 0.40625, "lr": 1.7071067811865474e-06, "epoch": 0.256, "percentage": 25.53, "elapsed_time": "0:07:22", "remaining_time": "0:21:31"}
|
| 49 |
+
{"current_steps": 49, "total_steps": 188, "loss": 1.884, "accuracy": 0.484375, "lr": 1.6951924276746423e-06, "epoch": 0.2613333333333333, "percentage": 26.06, "elapsed_time": "0:07:33", "remaining_time": "0:21:26"}
|
| 50 |
+
{"current_steps": 50, "total_steps": 188, "loss": 1.6118, "accuracy": 0.5625, "lr": 1.6830839502780308e-06, "epoch": 0.26666666666666666, "percentage": 26.6, "elapsed_time": "0:07:42", "remaining_time": "0:21:17"}
|
| 51 |
+
{"current_steps": 51, "total_steps": 188, "loss": 1.842, "accuracy": 0.5, "lr": 1.6707847301392235e-06, "epoch": 0.272, "percentage": 27.13, "elapsed_time": "0:07:51", "remaining_time": "0:21:06"}
|
| 52 |
+
{"current_steps": 52, "total_steps": 188, "loss": 1.7435, "accuracy": 0.515625, "lr": 1.6582982016632816e-06, "epoch": 0.2773333333333333, "percentage": 27.66, "elapsed_time": "0:08:01", "remaining_time": "0:20:58"}
|
| 53 |
+
{"current_steps": 53, "total_steps": 188, "loss": 2.0724, "accuracy": 0.40625, "lr": 1.6456278515588023e-06, "epoch": 0.2826666666666667, "percentage": 28.19, "elapsed_time": "0:08:10", "remaining_time": "0:20:49"}
|
| 54 |
+
{"current_steps": 54, "total_steps": 188, "loss": 1.7672, "accuracy": 0.484375, "lr": 1.6327772178642986e-06, "epoch": 0.288, "percentage": 28.72, "elapsed_time": "0:08:18", "remaining_time": "0:20:36"}
|
| 55 |
+
{"current_steps": 55, "total_steps": 188, "loss": 1.5599, "accuracy": 0.5625, "lr": 1.6197498889602449e-06, "epoch": 0.29333333333333333, "percentage": 29.26, "elapsed_time": "0:08:26", "remaining_time": "0:20:25"}
|
| 56 |
+
{"current_steps": 56, "total_steps": 188, "loss": 1.7711, "accuracy": 0.5, "lr": 1.6065495025670672e-06, "epoch": 0.2986666666666667, "percentage": 29.79, "elapsed_time": "0:08:35", "remaining_time": "0:20:15"}
|
| 57 |
+
{"current_steps": 57, "total_steps": 188, "loss": 1.8824, "accuracy": 0.46875, "lr": 1.5931797447293551e-06, "epoch": 0.304, "percentage": 30.32, "elapsed_time": "0:08:44", "remaining_time": "0:20:04"}
|
| 58 |
+
{"current_steps": 58, "total_steps": 188, "loss": 1.7345, "accuracy": 0.5625, "lr": 1.5796443487865775e-06, "epoch": 0.30933333333333335, "percentage": 30.85, "elapsed_time": "0:08:53", "remaining_time": "0:19:56"}
|
| 59 |
+
{"current_steps": 59, "total_steps": 188, "loss": 1.876, "accuracy": 0.453125, "lr": 1.5659470943305953e-06, "epoch": 0.31466666666666665, "percentage": 31.38, "elapsed_time": "0:09:03", "remaining_time": "0:19:47"}
|
| 60 |
+
{"current_steps": 60, "total_steps": 188, "loss": 1.9142, "accuracy": 0.453125, "lr": 1.5520918061502565e-06, "epoch": 0.32, "percentage": 31.91, "elapsed_time": "0:09:11", "remaining_time": "0:19:37"}
|
| 61 |
+
{"current_steps": 61, "total_steps": 188, "loss": 1.7306, "accuracy": 0.484375, "lr": 1.5380823531633727e-06, "epoch": 0.3253333333333333, "percentage": 32.45, "elapsed_time": "0:09:20", "remaining_time": "0:19:27"}
|
| 62 |
+
{"current_steps": 62, "total_steps": 188, "loss": 1.8758, "accuracy": 0.421875, "lr": 1.5239226473363687e-06, "epoch": 0.33066666666666666, "percentage": 32.98, "elapsed_time": "0:09:29", "remaining_time": "0:19:18"}
|
| 63 |
+
{"current_steps": 63, "total_steps": 188, "loss": 1.9443, "accuracy": 0.453125, "lr": 1.5096166425919174e-06, "epoch": 0.336, "percentage": 33.51, "elapsed_time": "0:09:39", "remaining_time": "0:19:10"}
|
| 64 |
+
{"current_steps": 64, "total_steps": 188, "loss": 1.5634, "accuracy": 0.625, "lr": 1.4951683337048535e-06, "epoch": 0.3413333333333333, "percentage": 34.04, "elapsed_time": "0:09:49", "remaining_time": "0:19:02"}
|
| 65 |
+
{"current_steps": 65, "total_steps": 188, "loss": 1.7835, "accuracy": 0.578125, "lr": 1.4805817551866838e-06, "epoch": 0.3466666666666667, "percentage": 34.57, "elapsed_time": "0:09:58", "remaining_time": "0:18:52"}
|
| 66 |
+
{"current_steps": 66, "total_steps": 188, "loss": 1.5983, "accuracy": 0.5625, "lr": 1.465860980158998e-06, "epoch": 0.352, "percentage": 35.11, "elapsed_time": "0:10:07", "remaining_time": "0:18:43"}
|
| 67 |
+
{"current_steps": 67, "total_steps": 188, "loss": 1.7847, "accuracy": 0.5, "lr": 1.4510101192161017e-06, "epoch": 0.35733333333333334, "percentage": 35.64, "elapsed_time": "0:10:18", "remaining_time": "0:18:36"}
|
| 68 |
+
{"current_steps": 68, "total_steps": 188, "loss": 1.6313, "accuracy": 0.484375, "lr": 1.4360333192771828e-06, "epoch": 0.3626666666666667, "percentage": 36.17, "elapsed_time": "0:10:27", "remaining_time": "0:18:27"}
|
| 69 |
+
{"current_steps": 69, "total_steps": 188, "loss": 1.9079, "accuracy": 0.421875, "lr": 1.420934762428335e-06, "epoch": 0.368, "percentage": 36.7, "elapsed_time": "0:10:37", "remaining_time": "0:18:19"}
|
| 70 |
+
{"current_steps": 70, "total_steps": 188, "loss": 1.7673, "accuracy": 0.515625, "lr": 1.4057186647547636e-06, "epoch": 0.37333333333333335, "percentage": 37.23, "elapsed_time": "0:10:47", "remaining_time": "0:18:10"}
|
| 71 |
+
{"current_steps": 71, "total_steps": 188, "loss": 1.815, "accuracy": 0.421875, "lr": 1.3903892751634947e-06, "epoch": 0.37866666666666665, "percentage": 37.77, "elapsed_time": "0:10:57", "remaining_time": "0:18:03"}
|
| 72 |
+
{"current_steps": 72, "total_steps": 188, "loss": 1.7516, "accuracy": 0.484375, "lr": 1.374950874196921e-06, "epoch": 0.384, "percentage": 38.3, "elapsed_time": "0:11:06", "remaining_time": "0:17:53"}
|
| 73 |
+
{"current_steps": 73, "total_steps": 188, "loss": 1.8338, "accuracy": 0.5, "lr": 1.3594077728375126e-06, "epoch": 0.3893333333333333, "percentage": 38.83, "elapsed_time": "0:11:15", "remaining_time": "0:17:44"}
|
| 74 |
+
{"current_steps": 74, "total_steps": 188, "loss": 1.9166, "accuracy": 0.53125, "lr": 1.34376431130403e-06, "epoch": 0.39466666666666667, "percentage": 39.36, "elapsed_time": "0:11:24", "remaining_time": "0:17:34"}
|
| 75 |
+
{"current_steps": 75, "total_steps": 188, "loss": 1.5836, "accuracy": 0.515625, "lr": 1.328024857839569e-06, "epoch": 0.4, "percentage": 39.89, "elapsed_time": "0:11:33", "remaining_time": "0:17:24"}
|
| 76 |
+
{"current_steps": 76, "total_steps": 188, "loss": 1.7019, "accuracy": 0.515625, "lr": 1.3121938074917865e-06, "epoch": 0.4053333333333333, "percentage": 40.43, "elapsed_time": "0:11:41", "remaining_time": "0:17:14"}
|
| 77 |
+
{"current_steps": 77, "total_steps": 188, "loss": 1.8143, "accuracy": 0.484375, "lr": 1.296275580885634e-06, "epoch": 0.4106666666666667, "percentage": 40.96, "elapsed_time": "0:11:51", "remaining_time": "0:17:05"}
|
| 78 |
+
{"current_steps": 78, "total_steps": 188, "loss": 1.6793, "accuracy": 0.515625, "lr": 1.280274622988956e-06, "epoch": 0.416, "percentage": 41.49, "elapsed_time": "0:12:01", "remaining_time": "0:16:57"}
|
| 79 |
+
{"current_steps": 79, "total_steps": 188, "loss": 1.7353, "accuracy": 0.484375, "lr": 1.264195401871286e-06, "epoch": 0.42133333333333334, "percentage": 42.02, "elapsed_time": "0:12:09", "remaining_time": "0:16:47"}
|
| 80 |
+
{"current_steps": 80, "total_steps": 188, "loss": 1.6677, "accuracy": 0.5, "lr": 1.2480424074561933e-06, "epoch": 0.4266666666666667, "percentage": 42.55, "elapsed_time": "0:12:19", "remaining_time": "0:16:37"}
|
| 81 |
+
{"current_steps": 81, "total_steps": 188, "loss": 1.5844, "accuracy": 0.578125, "lr": 1.2318201502675282e-06, "epoch": 0.432, "percentage": 43.09, "elapsed_time": "0:12:29", "remaining_time": "0:16:30"}
|
| 82 |
+
{"current_steps": 82, "total_steps": 188, "loss": 1.5824, "accuracy": 0.53125, "lr": 1.2155331601699134e-06, "epoch": 0.43733333333333335, "percentage": 43.62, "elapsed_time": "0:12:38", "remaining_time": "0:16:21"}
|
| 83 |
+
{"current_steps": 83, "total_steps": 188, "loss": 1.3487, "accuracy": 0.609375, "lr": 1.199185985103836e-06, "epoch": 0.44266666666666665, "percentage": 44.15, "elapsed_time": "0:12:47", "remaining_time": "0:16:10"}
|
| 84 |
+
{"current_steps": 84, "total_steps": 188, "loss": 1.803, "accuracy": 0.53125, "lr": 1.1827831898156904e-06, "epoch": 0.448, "percentage": 44.68, "elapsed_time": "0:12:56", "remaining_time": "0:16:01"}
|
| 85 |
+
{"current_steps": 85, "total_steps": 188, "loss": 1.8717, "accuracy": 0.5, "lr": 1.16632935458313e-06, "epoch": 0.4533333333333333, "percentage": 45.21, "elapsed_time": "0:13:06", "remaining_time": "0:15:52"}
|
| 86 |
+
{"current_steps": 86, "total_steps": 188, "loss": 1.67, "accuracy": 0.5, "lr": 1.1498290739360814e-06, "epoch": 0.45866666666666667, "percentage": 45.74, "elapsed_time": "0:13:15", "remaining_time": "0:15:43"}
|
| 87 |
+
{"current_steps": 87, "total_steps": 188, "loss": 1.8179, "accuracy": 0.546875, "lr": 1.133286955373779e-06, "epoch": 0.464, "percentage": 46.28, "elapsed_time": "0:13:25", "remaining_time": "0:15:34"}
|
| 88 |
+
{"current_steps": 88, "total_steps": 188, "loss": 1.6856, "accuracy": 0.53125, "lr": 1.1167076180781762e-06, "epoch": 0.4693333333333333, "percentage": 46.81, "elapsed_time": "0:13:34", "remaining_time": "0:15:25"}
|
| 89 |
+
{"current_steps": 89, "total_steps": 188, "loss": 1.5637, "accuracy": 0.578125, "lr": 1.1000956916240985e-06, "epoch": 0.4746666666666667, "percentage": 47.34, "elapsed_time": "0:13:44", "remaining_time": "0:15:17"}
|
| 90 |
+
{"current_steps": 90, "total_steps": 188, "loss": 1.5542, "accuracy": 0.625, "lr": 1.0834558146864898e-06, "epoch": 0.48, "percentage": 47.87, "elapsed_time": "0:13:54", "remaining_time": "0:15:08"}
|
| 91 |
+
{"current_steps": 91, "total_steps": 188, "loss": 1.5519, "accuracy": 0.609375, "lr": 1.0667926337451217e-06, "epoch": 0.48533333333333334, "percentage": 48.4, "elapsed_time": "0:14:02", "remaining_time": "0:14:57"}
|
| 92 |
+
{"current_steps": 92, "total_steps": 188, "loss": 1.6359, "accuracy": 0.59375, "lr": 1.0501108017871191e-06, "epoch": 0.49066666666666664, "percentage": 48.94, "elapsed_time": "0:14:11", "remaining_time": "0:14:48"}
|
| 93 |
+
{"current_steps": 93, "total_steps": 188, "loss": 1.6573, "accuracy": 0.5625, "lr": 1.0334149770076745e-06, "epoch": 0.496, "percentage": 49.47, "elapsed_time": "0:14:20", "remaining_time": "0:14:39"}
|
| 94 |
+
{"current_steps": 94, "total_steps": 188, "loss": 1.762, "accuracy": 0.5625, "lr": 1.0167098215093009e-06, "epoch": 0.5013333333333333, "percentage": 50.0, "elapsed_time": "0:14:30", "remaining_time": "0:14:30"}
|
| 95 |
+
{"current_steps": 95, "total_steps": 188, "loss": 1.7451, "accuracy": 0.4375, "lr": 1e-06, "epoch": 0.5066666666666667, "percentage": 50.53, "elapsed_time": "0:14:38", "remaining_time": "0:14:19"}
|
| 96 |
+
{"current_steps": 96, "total_steps": 188, "loss": 1.6563, "accuracy": 0.546875, "lr": 9.83290178490699e-07, "epoch": 0.512, "percentage": 51.06, "elapsed_time": "0:14:47", "remaining_time": "0:14:10"}
|
| 97 |
+
{"current_steps": 97, "total_steps": 188, "loss": 2.0151, "accuracy": 0.46875, "lr": 9.665850229923256e-07, "epoch": 0.5173333333333333, "percentage": 51.6, "elapsed_time": "0:14:56", "remaining_time": "0:14:01"}
|
| 98 |
+
{"current_steps": 98, "total_steps": 188, "loss": 1.6086, "accuracy": 0.59375, "lr": 9.498891982128809e-07, "epoch": 0.5226666666666666, "percentage": 52.13, "elapsed_time": "0:15:06", "remaining_time": "0:13:52"}
|
| 99 |
+
{"current_steps": 99, "total_steps": 188, "loss": 1.7378, "accuracy": 0.53125, "lr": 9.332073662548784e-07, "epoch": 0.528, "percentage": 52.66, "elapsed_time": "0:15:15", "remaining_time": "0:13:42"}
|
| 100 |
+
{"current_steps": 100, "total_steps": 188, "loss": 1.8311, "accuracy": 0.453125, "lr": 9.165441853135103e-07, "epoch": 0.5333333333333333, "percentage": 53.19, "elapsed_time": "0:15:24", "remaining_time": "0:13:33"}
|
| 101 |
+
{"current_steps": 101, "total_steps": 188, "loss": 1.8546, "accuracy": 0.53125, "lr": 8.999043083759016e-07, "epoch": 0.5386666666666666, "percentage": 53.72, "elapsed_time": "0:15:33", "remaining_time": "0:13:24"}
|
| 102 |
+
{"current_steps": 102, "total_steps": 188, "loss": 1.8358, "accuracy": 0.46875, "lr": 8.832923819218238e-07, "epoch": 0.544, "percentage": 54.26, "elapsed_time": "0:15:43", "remaining_time": "0:13:15"}
|
| 103 |
+
{"current_steps": 103, "total_steps": 188, "loss": 1.7745, "accuracy": 0.515625, "lr": 8.667130446262214e-07, "epoch": 0.5493333333333333, "percentage": 54.79, "elapsed_time": "0:15:52", "remaining_time": "0:13:05"}
|
| 104 |
+
{"current_steps": 104, "total_steps": 188, "loss": 1.4798, "accuracy": 0.53125, "lr": 8.501709260639185e-07, "epoch": 0.5546666666666666, "percentage": 55.32, "elapsed_time": "0:16:02", "remaining_time": "0:12:57"}
|
| 105 |
+
{"current_steps": 105, "total_steps": 188, "loss": 1.7661, "accuracy": 0.46875, "lr": 8.336706454168699e-07, "epoch": 0.56, "percentage": 55.85, "elapsed_time": "0:16:11", "remaining_time": "0:12:47"}
|
| 106 |
+
{"current_steps": 106, "total_steps": 188, "loss": 1.5891, "accuracy": 0.5625, "lr": 8.172168101843099e-07, "epoch": 0.5653333333333334, "percentage": 56.38, "elapsed_time": "0:16:20", "remaining_time": "0:12:38"}
|
| 107 |
+
{"current_steps": 107, "total_steps": 188, "loss": 1.6186, "accuracy": 0.5, "lr": 8.008140148961641e-07, "epoch": 0.5706666666666667, "percentage": 56.91, "elapsed_time": "0:16:30", "remaining_time": "0:12:29"}
|
| 108 |
+
{"current_steps": 108, "total_steps": 188, "loss": 1.5772, "accuracy": 0.546875, "lr": 7.844668398300864e-07, "epoch": 0.576, "percentage": 57.45, "elapsed_time": "0:16:41", "remaining_time": "0:12:22"}
|
| 109 |
+
{"current_steps": 109, "total_steps": 188, "loss": 1.5248, "accuracy": 0.515625, "lr": 7.681798497324716e-07, "epoch": 0.5813333333333334, "percentage": 57.98, "elapsed_time": "0:16:50", "remaining_time": "0:12:12"}
|
| 110 |
+
{"current_steps": 110, "total_steps": 188, "loss": 1.8438, "accuracy": 0.453125, "lr": 7.519575925438067e-07, "epoch": 0.5866666666666667, "percentage": 58.51, "elapsed_time": "0:16:58", "remaining_time": "0:12:02"}
|
| 111 |
+
{"current_steps": 111, "total_steps": 188, "loss": 1.6551, "accuracy": 0.546875, "lr": 7.35804598128714e-07, "epoch": 0.592, "percentage": 59.04, "elapsed_time": "0:17:08", "remaining_time": "0:11:53"}
|
| 112 |
+
{"current_steps": 112, "total_steps": 188, "loss": 1.7068, "accuracy": 0.578125, "lr": 7.197253770110437e-07, "epoch": 0.5973333333333334, "percentage": 59.57, "elapsed_time": "0:17:17", "remaining_time": "0:11:43"}
|
| 113 |
+
{"current_steps": 113, "total_steps": 188, "loss": 1.8298, "accuracy": 0.4375, "lr": 7.037244191143661e-07, "epoch": 0.6026666666666667, "percentage": 60.11, "elapsed_time": "0:17:25", "remaining_time": "0:11:34"}
|
| 114 |
+
{"current_steps": 114, "total_steps": 188, "loss": 1.4577, "accuracy": 0.625, "lr": 6.878061925082138e-07, "epoch": 0.608, "percentage": 60.64, "elapsed_time": "0:17:35", "remaining_time": "0:11:25"}
|
| 115 |
+
{"current_steps": 115, "total_steps": 188, "loss": 1.4686, "accuracy": 0.578125, "lr": 6.719751421604308e-07, "epoch": 0.6133333333333333, "percentage": 61.17, "elapsed_time": "0:17:46", "remaining_time": "0:11:16"}
|
| 116 |
+
{"current_steps": 116, "total_steps": 188, "loss": 1.7617, "accuracy": 0.515625, "lr": 6.562356886959704e-07, "epoch": 0.6186666666666667, "percentage": 61.7, "elapsed_time": "0:17:55", "remaining_time": "0:11:07"}
|
| 117 |
+
{"current_steps": 117, "total_steps": 188, "loss": 1.8709, "accuracy": 0.453125, "lr": 6.405922271624873e-07, "epoch": 0.624, "percentage": 62.23, "elapsed_time": "0:18:06", "remaining_time": "0:10:59"}
|
| 118 |
+
{"current_steps": 118, "total_steps": 188, "loss": 1.7076, "accuracy": 0.578125, "lr": 6.25049125803079e-07, "epoch": 0.6293333333333333, "percentage": 62.77, "elapsed_time": "0:18:16", "remaining_time": "0:10:50"}
|
| 119 |
+
{"current_steps": 119, "total_steps": 188, "loss": 1.7807, "accuracy": 0.5, "lr": 6.096107248365052e-07, "epoch": 0.6346666666666667, "percentage": 63.3, "elapsed_time": "0:18:25", "remaining_time": "0:10:40"}
|
| 120 |
+
{"current_steps": 120, "total_steps": 188, "loss": 1.5245, "accuracy": 0.640625, "lr": 5.942813352452364e-07, "epoch": 0.64, "percentage": 63.83, "elapsed_time": "0:18:34", "remaining_time": "0:10:31"}
|
| 121 |
+
{"current_steps": 121, "total_steps": 188, "loss": 1.7885, "accuracy": 0.453125, "lr": 5.790652375716652e-07, "epoch": 0.6453333333333333, "percentage": 64.36, "elapsed_time": "0:18:42", "remaining_time": "0:10:21"}
|
| 122 |
+
{"current_steps": 122, "total_steps": 188, "loss": 1.7987, "accuracy": 0.453125, "lr": 5.639666807228174e-07, "epoch": 0.6506666666666666, "percentage": 64.89, "elapsed_time": "0:18:52", "remaining_time": "0:10:12"}
|
| 123 |
+
{"current_steps": 123, "total_steps": 188, "loss": 1.7203, "accuracy": 0.515625, "lr": 5.48989880783898e-07, "epoch": 0.656, "percentage": 65.43, "elapsed_time": "0:19:02", "remaining_time": "0:10:03"}
|
| 124 |
+
{"current_steps": 124, "total_steps": 188, "loss": 1.8513, "accuracy": 0.484375, "lr": 5.341390198410018e-07, "epoch": 0.6613333333333333, "percentage": 65.96, "elapsed_time": "0:19:11", "remaining_time": "0:09:54"}
|
| 125 |
+
{"current_steps": 125, "total_steps": 188, "loss": 1.6818, "accuracy": 0.5625, "lr": 5.194182448133162e-07, "epoch": 0.6666666666666666, "percentage": 66.49, "elapsed_time": "0:19:19", "remaining_time": "0:09:44"}
|
| 126 |
+
{"current_steps": 126, "total_steps": 188, "loss": 1.53, "accuracy": 0.609375, "lr": 5.048316662951465e-07, "epoch": 0.672, "percentage": 67.02, "elapsed_time": "0:19:28", "remaining_time": "0:09:35"}
|
| 127 |
+
{"current_steps": 127, "total_steps": 188, "loss": 1.6266, "accuracy": 0.609375, "lr": 4.903833574080825e-07, "epoch": 0.6773333333333333, "percentage": 67.55, "elapsed_time": "0:19:37", "remaining_time": "0:09:25"}
|
| 128 |
+
{"current_steps": 128, "total_steps": 188, "loss": 1.6502, "accuracy": 0.53125, "lr": 4.7607735266363146e-07, "epoch": 0.6826666666666666, "percentage": 68.09, "elapsed_time": "0:19:47", "remaining_time": "0:09:16"}
|
| 129 |
+
{"current_steps": 129, "total_steps": 188, "loss": 1.392, "accuracy": 0.53125, "lr": 4.619176468366274e-07, "epoch": 0.688, "percentage": 68.62, "elapsed_time": "0:19:57", "remaining_time": "0:09:07"}
|
| 130 |
+
{"current_steps": 130, "total_steps": 188, "loss": 1.6432, "accuracy": 0.578125, "lr": 4.4790819384974345e-07, "epoch": 0.6933333333333334, "percentage": 69.15, "elapsed_time": "0:20:05", "remaining_time": "0:08:58"}
|
| 131 |
+
{"current_steps": 131, "total_steps": 188, "loss": 1.7429, "accuracy": 0.484375, "lr": 4.340529056694047e-07, "epoch": 0.6986666666666667, "percentage": 69.68, "elapsed_time": "0:20:14", "remaining_time": "0:08:48"}
|
| 132 |
+
{"current_steps": 132, "total_steps": 188, "loss": 1.5342, "accuracy": 0.59375, "lr": 4.2035565121342243e-07, "epoch": 0.704, "percentage": 70.21, "elapsed_time": "0:20:22", "remaining_time": "0:08:38"}
|
| 133 |
+
{"current_steps": 133, "total_steps": 188, "loss": 1.4928, "accuracy": 0.640625, "lr": 4.0682025527064477e-07, "epoch": 0.7093333333333334, "percentage": 70.74, "elapsed_time": "0:20:32", "remaining_time": "0:08:29"}
|
| 134 |
+
{"current_steps": 134, "total_steps": 188, "loss": 1.4017, "accuracy": 0.703125, "lr": 3.934504974329326e-07, "epoch": 0.7146666666666667, "percentage": 71.28, "elapsed_time": "0:20:41", "remaining_time": "0:08:20"}
|
| 135 |
+
{"current_steps": 135, "total_steps": 188, "loss": 1.7, "accuracy": 0.5, "lr": 3.8025011103975524e-07, "epoch": 0.72, "percentage": 71.81, "elapsed_time": "0:20:50", "remaining_time": "0:08:10"}
|
| 136 |
+
{"current_steps": 136, "total_steps": 188, "loss": 1.7133, "accuracy": 0.453125, "lr": 3.6722278213570136e-07, "epoch": 0.7253333333333334, "percentage": 72.34, "elapsed_time": "0:20:59", "remaining_time": "0:08:01"}
|
| 137 |
+
{"current_steps": 137, "total_steps": 188, "loss": 1.9142, "accuracy": 0.46875, "lr": 3.5437214844119757e-07, "epoch": 0.7306666666666667, "percentage": 72.87, "elapsed_time": "0:21:08", "remaining_time": "0:07:52"}
|
| 138 |
+
{"current_steps": 138, "total_steps": 188, "loss": 1.5914, "accuracy": 0.5, "lr": 3.417017983367184e-07, "epoch": 0.736, "percentage": 73.4, "elapsed_time": "0:21:17", "remaining_time": "0:07:42"}
|
| 139 |
+
{"current_steps": 139, "total_steps": 188, "loss": 1.6486, "accuracy": 0.640625, "lr": 3.2921526986077677e-07, "epoch": 0.7413333333333333, "percentage": 73.94, "elapsed_time": "0:21:26", "remaining_time": "0:07:33"}
|
| 140 |
+
{"current_steps": 140, "total_steps": 188, "loss": 1.6391, "accuracy": 0.578125, "lr": 3.169160497219692e-07, "epoch": 0.7466666666666667, "percentage": 74.47, "elapsed_time": "0:21:33", "remaining_time": "0:07:23"}
|
| 141 |
+
{"current_steps": 141, "total_steps": 188, "loss": 1.687, "accuracy": 0.546875, "lr": 3.048075723253577e-07, "epoch": 0.752, "percentage": 75.0, "elapsed_time": "0:21:43", "remaining_time": "0:07:14"}
|
| 142 |
+
{"current_steps": 142, "total_steps": 188, "loss": 1.9397, "accuracy": 0.484375, "lr": 2.9289321881345254e-07, "epoch": 0.7573333333333333, "percentage": 75.53, "elapsed_time": "0:21:52", "remaining_time": "0:07:05"}
|
| 143 |
+
{"current_steps": 143, "total_steps": 188, "loss": 1.6817, "accuracy": 0.515625, "lr": 2.811763161220708e-07, "epoch": 0.7626666666666667, "percentage": 76.06, "elapsed_time": "0:22:02", "remaining_time": "0:06:56"}
|
| 144 |
+
{"current_steps": 144, "total_steps": 188, "loss": 1.5122, "accuracy": 0.578125, "lr": 2.6966013605133084e-07, "epoch": 0.768, "percentage": 76.6, "elapsed_time": "0:22:12", "remaining_time": "0:06:47"}
|
| 145 |
+
{"current_steps": 145, "total_steps": 188, "loss": 1.6209, "accuracy": 0.578125, "lr": 2.583478943520424e-07, "epoch": 0.7733333333333333, "percentage": 77.13, "elapsed_time": "0:22:21", "remaining_time": "0:06:37"}
|
| 146 |
+
{"current_steps": 146, "total_steps": 188, "loss": 1.3693, "accuracy": 0.703125, "lr": 2.472427498277486e-07, "epoch": 0.7786666666666666, "percentage": 77.66, "elapsed_time": "0:22:30", "remaining_time": "0:06:28"}
|
| 147 |
+
{"current_steps": 147, "total_steps": 188, "loss": 1.6764, "accuracy": 0.546875, "lr": 2.3634780345266803e-07, "epoch": 0.784, "percentage": 78.19, "elapsed_time": "0:22:39", "remaining_time": "0:06:19"}
|
| 148 |
+
{"current_steps": 148, "total_steps": 188, "loss": 1.6071, "accuracy": 0.53125, "lr": 2.2566609750578668e-07, "epoch": 0.7893333333333333, "percentage": 78.72, "elapsed_time": "0:22:48", "remaining_time": "0:06:09"}
|
| 149 |
+
{"current_steps": 149, "total_steps": 188, "loss": 1.6005, "accuracy": 0.53125, "lr": 2.15200614721339e-07, "epoch": 0.7946666666666666, "percentage": 79.26, "elapsed_time": "0:22:57", "remaining_time": "0:06:00"}
|
| 150 |
+
{"current_steps": 150, "total_steps": 188, "loss": 1.5617, "accuracy": 0.546875, "lr": 2.04954277455917e-07, "epoch": 0.8, "percentage": 79.79, "elapsed_time": "0:23:06", "remaining_time": "0:05:51"}
|
| 151 |
+
{"current_steps": 151, "total_steps": 188, "loss": 1.566, "accuracy": 0.625, "lr": 1.9492994687243713e-07, "epoch": 0.8053333333333333, "percentage": 80.32, "elapsed_time": "0:23:16", "remaining_time": "0:05:42"}
|
| 152 |
+
{"current_steps": 152, "total_steps": 188, "loss": 1.7248, "accuracy": 0.53125, "lr": 1.8513042214119667e-07, "epoch": 0.8106666666666666, "percentage": 80.85, "elapsed_time": "0:23:25", "remaining_time": "0:05:32"}
|
| 153 |
+
{"current_steps": 153, "total_steps": 188, "loss": 1.7366, "accuracy": 0.46875, "lr": 1.755584396582399e-07, "epoch": 0.816, "percentage": 81.38, "elapsed_time": "0:23:34", "remaining_time": "0:05:23"}
|
| 154 |
+
{"current_steps": 154, "total_steps": 188, "loss": 1.6937, "accuracy": 0.546875, "lr": 1.6621667228125302e-07, "epoch": 0.8213333333333334, "percentage": 81.91, "elapsed_time": "0:23:43", "remaining_time": "0:05:14"}
|
| 155 |
+
{"current_steps": 155, "total_steps": 188, "loss": 1.8771, "accuracy": 0.4375, "lr": 1.57107728583203e-07, "epoch": 0.8266666666666667, "percentage": 82.45, "elapsed_time": "0:23:53", "remaining_time": "0:05:05"}
|
| 156 |
+
{"current_steps": 156, "total_steps": 188, "loss": 1.6005, "accuracy": 0.515625, "lr": 1.4823415212392375e-07, "epoch": 0.832, "percentage": 82.98, "elapsed_time": "0:24:03", "remaining_time": "0:04:56"}
|
| 157 |
+
{"current_steps": 157, "total_steps": 188, "loss": 1.4883, "accuracy": 0.71875, "lr": 1.3959842073986083e-07, "epoch": 0.8373333333333334, "percentage": 83.51, "elapsed_time": "0:24:13", "remaining_time": "0:04:46"}
|
| 158 |
+
{"current_steps": 158, "total_steps": 188, "loss": 1.5267, "accuracy": 0.625, "lr": 1.312029458521635e-07, "epoch": 0.8426666666666667, "percentage": 84.04, "elapsed_time": "0:24:23", "remaining_time": "0:04:37"}
|
| 159 |
+
{"current_steps": 159, "total_steps": 188, "loss": 1.6305, "accuracy": 0.484375, "lr": 1.230500717933285e-07, "epoch": 0.848, "percentage": 84.57, "elapsed_time": "0:24:32", "remaining_time": "0:04:28"}
|
| 160 |
+
{"current_steps": 160, "total_steps": 188, "loss": 2.0496, "accuracy": 0.53125, "lr": 1.1514207515257146e-07, "epoch": 0.8533333333333334, "percentage": 85.11, "elapsed_time": "0:24:41", "remaining_time": "0:04:19"}
|
| 161 |
+
{"current_steps": 161, "total_steps": 188, "loss": 1.9803, "accuracy": 0.484375, "lr": 1.0748116414011887e-07, "epoch": 0.8586666666666667, "percentage": 85.64, "elapsed_time": "0:24:50", "remaining_time": "0:04:10"}
|
| 162 |
+
{"current_steps": 162, "total_steps": 188, "loss": 1.7001, "accuracy": 0.484375, "lr": 1.0006947797059218e-07, "epoch": 0.864, "percentage": 86.17, "elapsed_time": "0:24:59", "remaining_time": "0:04:00"}
|
| 163 |
+
{"current_steps": 163, "total_steps": 188, "loss": 1.4682, "accuracy": 0.5625, "lr": 9.29090862656593e-08, "epoch": 0.8693333333333333, "percentage": 86.7, "elapsed_time": "0:25:08", "remaining_time": "0:03:51"}
|
| 164 |
+
{"current_steps": 164, "total_steps": 188, "loss": 2.0651, "accuracy": 0.40625, "lr": 8.600198847611729e-08, "epoch": 0.8746666666666667, "percentage": 87.23, "elapsed_time": "0:25:18", "remaining_time": "0:03:42"}
|
| 165 |
+
{"current_steps": 165, "total_steps": 188, "loss": 1.828, "accuracy": 0.53125, "lr": 7.93501133235711e-08, "epoch": 0.88, "percentage": 87.77, "elapsed_time": "0:25:26", "remaining_time": "0:03:32"}
|
| 166 |
+
{"current_steps": 166, "total_steps": 188, "loss": 1.8165, "accuracy": 0.515625, "lr": 7.295531826186263e-08, "epoch": 0.8853333333333333, "percentage": 88.3, "elapsed_time": "0:25:34", "remaining_time": "0:03:23"}
|
| 167 |
+
{"current_steps": 167, "total_steps": 188, "loss": 1.8147, "accuracy": 0.515625, "lr": 6.681938895839745e-08, "epoch": 0.8906666666666667, "percentage": 88.83, "elapsed_time": "0:25:43", "remaining_time": "0:03:14"}
|
| 168 |
+
{"current_steps": 168, "total_steps": 188, "loss": 1.8423, "accuracy": 0.484375, "lr": 6.094403879552212e-08, "epoch": 0.896, "percentage": 89.36, "elapsed_time": "0:25:52", "remaining_time": "0:03:04"}
|
| 169 |
+
{"current_steps": 169, "total_steps": 188, "loss": 1.7592, "accuracy": 0.46875, "lr": 5.533090839208132e-08, "epoch": 0.9013333333333333, "percentage": 89.89, "elapsed_time": "0:26:00", "remaining_time": "0:02:55"}
|
| 170 |
+
{"current_steps": 170, "total_steps": 188, "loss": 1.652, "accuracy": 0.609375, "lr": 4.998156514529594e-08, "epoch": 0.9066666666666666, "percentage": 90.43, "elapsed_time": "0:26:08", "remaining_time": "0:02:46"}
|
| 171 |
+
{"current_steps": 171, "total_steps": 188, "loss": 1.8636, "accuracy": 0.46875, "lr": 4.489750279308757e-08, "epoch": 0.912, "percentage": 90.96, "elapsed_time": "0:26:18", "remaining_time": "0:02:36"}
|
| 172 |
+
{"current_steps": 172, "total_steps": 188, "loss": 1.6156, "accuracy": 0.5625, "lr": 4.008014099696921e-08, "epoch": 0.9173333333333333, "percentage": 91.49, "elapsed_time": "0:26:27", "remaining_time": "0:02:27"}
|
| 173 |
+
{"current_steps": 173, "total_steps": 188, "loss": 1.4643, "accuracy": 0.625, "lr": 3.553082494562354e-08, "epoch": 0.9226666666666666, "percentage": 92.02, "elapsed_time": "0:26:37", "remaining_time": "0:02:18"}
|
| 174 |
+
{"current_steps": 174, "total_steps": 188, "loss": 1.6145, "accuracy": 0.46875, "lr": 3.125082497927467e-08, "epoch": 0.928, "percentage": 92.55, "elapsed_time": "0:26:47", "remaining_time": "0:02:09"}
|
| 175 |
+
{"current_steps": 175, "total_steps": 188, "loss": 1.2331, "accuracy": 0.640625, "lr": 2.7241336234962943e-08, "epoch": 0.9333333333333333, "percentage": 93.09, "elapsed_time": "0:26:58", "remaining_time": "0:02:00"}
|
| 176 |
+
{"current_steps": 176, "total_steps": 188, "loss": 1.467, "accuracy": 0.59375, "lr": 2.3503478312815295e-08, "epoch": 0.9386666666666666, "percentage": 93.62, "elapsed_time": "0:27:07", "remaining_time": "0:01:50"}
|
| 177 |
+
{"current_steps": 177, "total_steps": 188, "loss": 1.886, "accuracy": 0.515625, "lr": 2.003829496341325e-08, "epoch": 0.944, "percentage": 94.15, "elapsed_time": "0:27:16", "remaining_time": "0:01:41"}
|
| 178 |
+
{"current_steps": 178, "total_steps": 188, "loss": 1.699, "accuracy": 0.515625, "lr": 1.684675379633649e-08, "epoch": 0.9493333333333334, "percentage": 94.68, "elapsed_time": "0:27:26", "remaining_time": "0:01:32"}
|
| 179 |
+
{"current_steps": 179, "total_steps": 188, "loss": 1.5248, "accuracy": 0.53125, "lr": 1.3929746009971432e-08, "epoch": 0.9546666666666667, "percentage": 95.21, "elapsed_time": "0:27:35", "remaining_time": "0:01:23"}
|
| 180 |
+
{"current_steps": 180, "total_steps": 188, "loss": 1.4797, "accuracy": 0.578125, "lr": 1.1288086142653864e-08, "epoch": 0.96, "percentage": 95.74, "elapsed_time": "0:27:44", "remaining_time": "0:01:13"}
|
| 181 |
+
{"current_steps": 181, "total_steps": 188, "loss": 1.8814, "accuracy": 0.515625, "lr": 8.92251184521997e-09, "epoch": 0.9653333333333334, "percentage": 96.28, "elapsed_time": "0:27:54", "remaining_time": "0:01:04"}
|
| 182 |
+
{"current_steps": 182, "total_steps": 188, "loss": 1.9529, "accuracy": 0.5625, "lr": 6.833683675025903e-09, "epoch": 0.9706666666666667, "percentage": 96.81, "elapsed_time": "0:28:03", "remaining_time": "0:00:55"}
|
| 183 |
+
{"current_steps": 183, "total_steps": 188, "loss": 1.406, "accuracy": 0.65625, "lr": 5.022184911495864e-09, "epoch": 0.976, "percentage": 97.34, "elapsed_time": "0:28:12", "remaining_time": "0:00:46"}
|
| 184 |
+
{"current_steps": 184, "total_steps": 188, "loss": 1.8277, "accuracy": 0.53125, "lr": 3.4885213932484004e-09, "epoch": 0.9813333333333333, "percentage": 97.87, "elapsed_time": "0:28:21", "remaining_time": "0:00:36"}
|
| 185 |
+
{"current_steps": 185, "total_steps": 188, "loss": 1.7893, "accuracy": 0.515625, "lr": 2.233121376846836e-09, "epoch": 0.9866666666666667, "percentage": 98.4, "elapsed_time": "0:28:30", "remaining_time": "0:00:27"}
|
| 186 |
+
{"current_steps": 186, "total_steps": 188, "loss": 1.6867, "accuracy": 0.578125, "lr": 1.2563354172142603e-09, "epoch": 0.992, "percentage": 98.94, "elapsed_time": "0:28:39", "remaining_time": "0:00:18"}
|
| 187 |
+
{"current_steps": 187, "total_steps": 188, "loss": 1.7193, "accuracy": 0.53125, "lr": 5.584362697453881e-10, "epoch": 0.9973333333333333, "percentage": 99.47, "elapsed_time": "0:28:49", "remaining_time": "0:00:09"}
|
| 188 |
+
{"current_steps": 188, "total_steps": 188, "loss": 1.6741, "accuracy": 0.5, "lr": 1.3961881414292774e-10, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:28:54", "remaining_time": "0:00:00"}
|
| 189 |
+
{"current_steps": 188, "total_steps": 188, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:29:00", "remaining_time": "0:00:00"}
|
SimPO_Beta_2.5_Gamma_1.5/trainer_state.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
SimPO_Beta_2.5_Gamma_1.5/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:072dc1577b49f316c877d47cf323accc4220b8bfe183d5066fa7fba65f1d6d9e
|
| 3 |
+
size 7800
|
SimPO_Beta_2.5_Gamma_1.5/training_loss.png
ADDED
|
SimPO_Beta_2.5_Gamma_1.5/training_rewards_accuracies.png
ADDED
|
SimPO_Beta_2.5_Gamma_1.5/video_preprocessor_config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"crop_size": null,
|
| 3 |
+
"data_format": "channels_first",
|
| 4 |
+
"default_to_square": true,
|
| 5 |
+
"device": null,
|
| 6 |
+
"do_center_crop": null,
|
| 7 |
+
"do_convert_rgb": true,
|
| 8 |
+
"do_normalize": true,
|
| 9 |
+
"do_rescale": true,
|
| 10 |
+
"do_resize": true,
|
| 11 |
+
"do_sample_frames": false,
|
| 12 |
+
"fps": null,
|
| 13 |
+
"image_mean": [
|
| 14 |
+
0.48145466,
|
| 15 |
+
0.4578275,
|
| 16 |
+
0.40821073
|
| 17 |
+
],
|
| 18 |
+
"image_std": [
|
| 19 |
+
0.26862954,
|
| 20 |
+
0.26130258,
|
| 21 |
+
0.27577711
|
| 22 |
+
],
|
| 23 |
+
"input_data_format": null,
|
| 24 |
+
"max_frames": 768,
|
| 25 |
+
"max_pixels": 12845056,
|
| 26 |
+
"merge_size": 2,
|
| 27 |
+
"min_frames": 4,
|
| 28 |
+
"min_pixels": 3136,
|
| 29 |
+
"num_frames": null,
|
| 30 |
+
"pad_size": null,
|
| 31 |
+
"patch_size": 14,
|
| 32 |
+
"processor_class": "Qwen2VLProcessor",
|
| 33 |
+
"resample": 3,
|
| 34 |
+
"rescale_factor": 0.00392156862745098,
|
| 35 |
+
"return_metadata": false,
|
| 36 |
+
"size": {
|
| 37 |
+
"longest_edge": 12845056,
|
| 38 |
+
"shortest_edge": 3136
|
| 39 |
+
},
|
| 40 |
+
"temporal_patch_size": 2,
|
| 41 |
+
"video_metadata": null,
|
| 42 |
+
"video_processor_type": "Qwen2VLVideoProcessor"
|
| 43 |
+
}
|
SimPO_Beta_2.5_Gamma_1.5/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|