ByungOh-Ko commited on
Commit
55df646
·
1 Parent(s): 3ec9009
SimPO_Beta_2.5_Gamma_1.5/README.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ license: other
4
+ base_model: ''
5
+ tags:
6
+ - base_model:adapter:Qwen/Qwen2-VL-2B-Instruct
7
+ - llama-factory
8
+ - lora
9
+ - transformers
10
+ pipeline_tag: text-generation
11
+ model-index:
12
+ - name: SimPO_Beta_2.5_Gamma_1.5
13
+ results: []
14
+ ---
15
+
16
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
+ should probably proofread and complete it, then remove this comment. -->
18
+
19
+ # SimPO_Beta_2.5_Gamma_1.5
20
+
21
+ This model is a fine-tuned version of [Qwen/Qwen2-VL-2B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct) on the Qwen2_VL_2B_Instruct_12k dataset.
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 2e-06
41
+ - train_batch_size: 1
42
+ - eval_batch_size: 8
43
+ - seed: 42
44
+ - distributed_type: multi-GPU
45
+ - num_devices: 4
46
+ - gradient_accumulation_steps: 16
47
+ - total_train_batch_size: 64
48
+ - total_eval_batch_size: 32
49
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
50
+ - lr_scheduler_type: cosine
51
+ - num_epochs: 1.0
52
+
53
+ ### Training results
54
+
55
+
56
+
57
+ ### Framework versions
58
+
59
+ - PEFT 0.17.1
60
+ - Transformers 4.57.1
61
+ - Pytorch 2.5.1+cu121
62
+ - Datasets 4.0.0
63
+ - Tokenizers 0.22.1
SimPO_Beta_2.5_Gamma_1.5/adapter_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "Qwen/Qwen2-VL-2B-Instruct",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 256,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.0,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 128,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "q_proj",
29
+ "k_proj",
30
+ "o_proj",
31
+ "gate_proj",
32
+ "up_proj",
33
+ "down_proj",
34
+ "v_proj"
35
+ ],
36
+ "target_parameters": null,
37
+ "task_type": "CAUSAL_LM",
38
+ "trainable_token_indices": null,
39
+ "use_dora": false,
40
+ "use_qalora": false,
41
+ "use_rslora": false
42
+ }
SimPO_Beta_2.5_Gamma_1.5/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6361802ccefa899e67dca3a30d39402610f5c1e4a52359aaaf3e4607e519a267
3
+ size 295495600
SimPO_Beta_2.5_Gamma_1.5/added_tokens.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|box_end|>": 151649,
3
+ "<|box_start|>": 151648,
4
+ "<|endoftext|>": 151643,
5
+ "<|im_end|>": 151645,
6
+ "<|im_start|>": 151644,
7
+ "<|image_pad|>": 151655,
8
+ "<|object_ref_end|>": 151647,
9
+ "<|object_ref_start|>": 151646,
10
+ "<|quad_end|>": 151651,
11
+ "<|quad_start|>": 151650,
12
+ "<|video_pad|>": 151656,
13
+ "<|vision_end|>": 151653,
14
+ "<|vision_pad|>": 151654,
15
+ "<|vision_start|>": 151652
16
+ }
SimPO_Beta_2.5_Gamma_1.5/all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 1.392373855640617e+17,
4
+ "train_loss": 1.7203158742569862,
5
+ "train_runtime": 1743.476,
6
+ "train_samples_per_second": 6.883,
7
+ "train_steps_per_second": 0.108
8
+ }
SimPO_Beta_2.5_Gamma_1.5/chat_template.jinja ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system
2
+ You are a helpful assistant.<|im_end|>
3
+ {% endif %}<|im_start|>{{ message['role'] }}
4
+ {% if message['content'] is string %}{{ message['content'] }}<|im_end|>
5
+ {% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>
6
+ {% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant
7
+ {% endif %}
SimPO_Beta_2.5_Gamma_1.5/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
SimPO_Beta_2.5_Gamma_1.5/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "Qwen2VLImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "max_pixels": 12845056,
18
+ "merge_size": 2,
19
+ "min_pixels": 3136,
20
+ "patch_size": 14,
21
+ "processor_class": "Qwen2VLProcessor",
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "longest_edge": 12845056,
26
+ "shortest_edge": 3136
27
+ },
28
+ "temporal_patch_size": 2
29
+ }
SimPO_Beta_2.5_Gamma_1.5/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
SimPO_Beta_2.5_Gamma_1.5/tokenizer_config.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "151646": {
29
+ "content": "<|object_ref_start|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "151647": {
37
+ "content": "<|object_ref_end|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "151648": {
45
+ "content": "<|box_start|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "151649": {
53
+ "content": "<|box_end|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "151650": {
61
+ "content": "<|quad_start|>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "151651": {
69
+ "content": "<|quad_end|>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "151652": {
77
+ "content": "<|vision_start|>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "151653": {
85
+ "content": "<|vision_end|>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "151654": {
93
+ "content": "<|vision_pad|>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "151655": {
101
+ "content": "<|image_pad|>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "151656": {
109
+ "content": "<|video_pad|>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ }
116
+ },
117
+ "additional_special_tokens": [
118
+ "<|im_start|>",
119
+ "<|im_end|>",
120
+ "<|object_ref_start|>",
121
+ "<|object_ref_end|>",
122
+ "<|box_start|>",
123
+ "<|box_end|>",
124
+ "<|quad_start|>",
125
+ "<|quad_end|>",
126
+ "<|vision_start|>",
127
+ "<|vision_end|>",
128
+ "<|vision_pad|>",
129
+ "<|image_pad|>",
130
+ "<|video_pad|>"
131
+ ],
132
+ "bos_token": null,
133
+ "clean_up_tokenization_spaces": false,
134
+ "eos_token": "<|im_end|>",
135
+ "errors": "replace",
136
+ "extra_special_tokens": {},
137
+ "model_max_length": 32768,
138
+ "pad_token": "<|endoftext|>",
139
+ "padding_side": "right",
140
+ "processor_class": "Qwen2VLProcessor",
141
+ "split_special_tokens": false,
142
+ "tokenizer_class": "Qwen2Tokenizer",
143
+ "unk_token": null
144
+ }
SimPO_Beta_2.5_Gamma_1.5/train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 1.392373855640617e+17,
4
+ "train_loss": 1.7203158742569862,
5
+ "train_runtime": 1743.476,
6
+ "train_samples_per_second": 6.883,
7
+ "train_steps_per_second": 0.108
8
+ }
SimPO_Beta_2.5_Gamma_1.5/trainer_log.jsonl ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 1, "total_steps": 188, "loss": 1.8444, "accuracy": 0.484375, "lr": 2e-06, "epoch": 0.005333333333333333, "percentage": 0.53, "elapsed_time": "0:00:10", "remaining_time": "0:31:38"}
2
+ {"current_steps": 2, "total_steps": 188, "loss": 1.5274, "accuracy": 0.578125, "lr": 1.999860381185857e-06, "epoch": 0.010666666666666666, "percentage": 1.06, "elapsed_time": "0:00:19", "remaining_time": "0:30:36"}
3
+ {"current_steps": 3, "total_steps": 188, "loss": 1.5387, "accuracy": 0.671875, "lr": 1.9994415637302546e-06, "epoch": 0.016, "percentage": 1.6, "elapsed_time": "0:00:30", "remaining_time": "0:30:51"}
4
+ {"current_steps": 4, "total_steps": 188, "loss": 1.7716, "accuracy": 0.5625, "lr": 1.998743664582786e-06, "epoch": 0.021333333333333333, "percentage": 2.13, "elapsed_time": "0:00:38", "remaining_time": "0:29:15"}
5
+ {"current_steps": 5, "total_steps": 188, "loss": 2.1547, "accuracy": 0.359375, "lr": 1.9977668786231533e-06, "epoch": 0.02666666666666667, "percentage": 2.66, "elapsed_time": "0:00:47", "remaining_time": "0:28:43"}
6
+ {"current_steps": 6, "total_steps": 188, "loss": 1.6678, "accuracy": 0.484375, "lr": 1.9965114786067515e-06, "epoch": 0.032, "percentage": 3.19, "elapsed_time": "0:00:56", "remaining_time": "0:28:39"}
7
+ {"current_steps": 7, "total_steps": 188, "loss": 1.7993, "accuracy": 0.453125, "lr": 1.994977815088504e-06, "epoch": 0.037333333333333336, "percentage": 3.72, "elapsed_time": "0:01:05", "remaining_time": "0:28:03"}
8
+ {"current_steps": 8, "total_steps": 188, "loss": 1.8437, "accuracy": 0.421875, "lr": 1.993166316324974e-06, "epoch": 0.042666666666666665, "percentage": 4.26, "elapsed_time": "0:01:16", "remaining_time": "0:28:33"}
9
+ {"current_steps": 9, "total_steps": 188, "loss": 1.6577, "accuracy": 0.421875, "lr": 1.99107748815478e-06, "epoch": 0.048, "percentage": 4.79, "elapsed_time": "0:01:25", "remaining_time": "0:28:13"}
10
+ {"current_steps": 10, "total_steps": 188, "loss": 1.7743, "accuracy": 0.4375, "lr": 1.988711913857346e-06, "epoch": 0.05333333333333334, "percentage": 5.32, "elapsed_time": "0:01:33", "remaining_time": "0:27:46"}
11
+ {"current_steps": 11, "total_steps": 188, "loss": 1.892, "accuracy": 0.421875, "lr": 1.9860702539900285e-06, "epoch": 0.058666666666666666, "percentage": 5.85, "elapsed_time": "0:01:42", "remaining_time": "0:27:22"}
12
+ {"current_steps": 12, "total_steps": 188, "loss": 1.7764, "accuracy": 0.4375, "lr": 1.9831532462036635e-06, "epoch": 0.064, "percentage": 6.38, "elapsed_time": "0:01:51", "remaining_time": "0:27:17"}
13
+ {"current_steps": 13, "total_steps": 188, "loss": 1.7401, "accuracy": 0.46875, "lr": 1.9799617050365867e-06, "epoch": 0.06933333333333333, "percentage": 6.91, "elapsed_time": "0:02:02", "remaining_time": "0:27:29"}
14
+ {"current_steps": 14, "total_steps": 188, "loss": 1.9175, "accuracy": 0.421875, "lr": 1.9764965216871843e-06, "epoch": 0.07466666666666667, "percentage": 7.45, "elapsed_time": "0:02:12", "remaining_time": "0:27:20"}
15
+ {"current_steps": 15, "total_steps": 188, "loss": 1.6756, "accuracy": 0.46875, "lr": 1.9727586637650373e-06, "epoch": 0.08, "percentage": 7.98, "elapsed_time": "0:02:20", "remaining_time": "0:27:02"}
16
+ {"current_steps": 16, "total_steps": 188, "loss": 1.7047, "accuracy": 0.515625, "lr": 1.9687491750207253e-06, "epoch": 0.08533333333333333, "percentage": 8.51, "elapsed_time": "0:02:29", "remaining_time": "0:26:43"}
17
+ {"current_steps": 17, "total_steps": 188, "loss": 1.745, "accuracy": 0.46875, "lr": 1.9644691750543766e-06, "epoch": 0.09066666666666667, "percentage": 9.04, "elapsed_time": "0:02:38", "remaining_time": "0:26:38"}
18
+ {"current_steps": 18, "total_steps": 188, "loss": 1.5597, "accuracy": 0.609375, "lr": 1.9599198590030305e-06, "epoch": 0.096, "percentage": 9.57, "elapsed_time": "0:02:47", "remaining_time": "0:26:18"}
19
+ {"current_steps": 19, "total_steps": 188, "loss": 1.7817, "accuracy": 0.453125, "lr": 1.955102497206912e-06, "epoch": 0.10133333333333333, "percentage": 10.11, "elapsed_time": "0:02:55", "remaining_time": "0:26:05"}
20
+ {"current_steps": 20, "total_steps": 188, "loss": 1.8861, "accuracy": 0.359375, "lr": 1.950018434854704e-06, "epoch": 0.10666666666666667, "percentage": 10.64, "elapsed_time": "0:03:05", "remaining_time": "0:25:55"}
21
+ {"current_steps": 21, "total_steps": 188, "loss": 1.6677, "accuracy": 0.5, "lr": 1.9446690916079185e-06, "epoch": 0.112, "percentage": 11.17, "elapsed_time": "0:03:13", "remaining_time": "0:25:39"}
22
+ {"current_steps": 22, "total_steps": 188, "loss": 1.7143, "accuracy": 0.421875, "lr": 1.939055961204478e-06, "epoch": 0.11733333333333333, "percentage": 11.7, "elapsed_time": "0:03:23", "remaining_time": "0:25:32"}
23
+ {"current_steps": 23, "total_steps": 188, "loss": 1.8268, "accuracy": 0.453125, "lr": 1.9331806110416025e-06, "epoch": 0.12266666666666666, "percentage": 12.23, "elapsed_time": "0:03:31", "remaining_time": "0:25:18"}
24
+ {"current_steps": 24, "total_steps": 188, "loss": 1.6726, "accuracy": 0.5625, "lr": 1.9270446817381372e-06, "epoch": 0.128, "percentage": 12.77, "elapsed_time": "0:03:41", "remaining_time": "0:25:11"}
25
+ {"current_steps": 25, "total_steps": 188, "loss": 1.997, "accuracy": 0.4375, "lr": 1.920649886676429e-06, "epoch": 0.13333333333333333, "percentage": 13.3, "elapsed_time": "0:03:49", "remaining_time": "0:24:57"}
26
+ {"current_steps": 26, "total_steps": 188, "loss": 1.7471, "accuracy": 0.5, "lr": 1.9139980115238826e-06, "epoch": 0.13866666666666666, "percentage": 13.83, "elapsed_time": "0:03:58", "remaining_time": "0:24:45"}
27
+ {"current_steps": 27, "total_steps": 188, "loss": 1.8404, "accuracy": 0.390625, "lr": 1.9070909137343408e-06, "epoch": 0.144, "percentage": 14.36, "elapsed_time": "0:04:07", "remaining_time": "0:24:33"}
28
+ {"current_steps": 28, "total_steps": 188, "loss": 1.6294, "accuracy": 0.53125, "lr": 1.8999305220294077e-06, "epoch": 0.14933333333333335, "percentage": 14.89, "elapsed_time": "0:04:16", "remaining_time": "0:24:24"}
29
+ {"current_steps": 29, "total_steps": 188, "loss": 2.0411, "accuracy": 0.359375, "lr": 1.892518835859881e-06, "epoch": 0.15466666666666667, "percentage": 15.43, "elapsed_time": "0:04:25", "remaining_time": "0:24:13"}
30
+ {"current_steps": 30, "total_steps": 188, "loss": 1.8724, "accuracy": 0.421875, "lr": 1.8848579248474285e-06, "epoch": 0.16, "percentage": 15.96, "elapsed_time": "0:04:35", "remaining_time": "0:24:08"}
31
+ {"current_steps": 31, "total_steps": 188, "loss": 1.7089, "accuracy": 0.40625, "lr": 1.8769499282066714e-06, "epoch": 0.16533333333333333, "percentage": 16.49, "elapsed_time": "0:04:44", "remaining_time": "0:23:59"}
32
+ {"current_steps": 32, "total_steps": 188, "loss": 1.6992, "accuracy": 0.46875, "lr": 1.8687970541478364e-06, "epoch": 0.17066666666666666, "percentage": 17.02, "elapsed_time": "0:04:53", "remaining_time": "0:23:52"}
33
+ {"current_steps": 33, "total_steps": 188, "loss": 2.0114, "accuracy": 0.40625, "lr": 1.8604015792601394e-06, "epoch": 0.176, "percentage": 17.55, "elapsed_time": "0:05:03", "remaining_time": "0:23:43"}
34
+ {"current_steps": 34, "total_steps": 188, "loss": 1.8528, "accuracy": 0.46875, "lr": 1.8517658478760761e-06, "epoch": 0.18133333333333335, "percentage": 18.09, "elapsed_time": "0:05:11", "remaining_time": "0:23:32"}
35
+ {"current_steps": 35, "total_steps": 188, "loss": 1.9673, "accuracy": 0.46875, "lr": 1.842892271416797e-06, "epoch": 0.18666666666666668, "percentage": 18.62, "elapsed_time": "0:05:20", "remaining_time": "0:23:22"}
36
+ {"current_steps": 36, "total_steps": 188, "loss": 1.6934, "accuracy": 0.484375, "lr": 1.833783327718747e-06, "epoch": 0.192, "percentage": 19.15, "elapsed_time": "0:05:31", "remaining_time": "0:23:19"}
37
+ {"current_steps": 37, "total_steps": 188, "loss": 1.8209, "accuracy": 0.453125, "lr": 1.8244415603417603e-06, "epoch": 0.19733333333333333, "percentage": 19.68, "elapsed_time": "0:05:41", "remaining_time": "0:23:15"}
38
+ {"current_steps": 38, "total_steps": 188, "loss": 1.7404, "accuracy": 0.515625, "lr": 1.8148695778588032e-06, "epoch": 0.20266666666666666, "percentage": 20.21, "elapsed_time": "0:05:50", "remaining_time": "0:23:04"}
39
+ {"current_steps": 39, "total_steps": 188, "loss": 1.7009, "accuracy": 0.515625, "lr": 1.805070053127563e-06, "epoch": 0.208, "percentage": 20.74, "elapsed_time": "0:06:00", "remaining_time": "0:22:55"}
40
+ {"current_steps": 40, "total_steps": 188, "loss": 1.7118, "accuracy": 0.5625, "lr": 1.795045722544083e-06, "epoch": 0.21333333333333335, "percentage": 21.28, "elapsed_time": "0:06:08", "remaining_time": "0:22:44"}
41
+ {"current_steps": 41, "total_steps": 188, "loss": 1.5965, "accuracy": 0.5625, "lr": 1.784799385278661e-06, "epoch": 0.21866666666666668, "percentage": 21.81, "elapsed_time": "0:06:18", "remaining_time": "0:22:37"}
42
+ {"current_steps": 42, "total_steps": 188, "loss": 1.6453, "accuracy": 0.46875, "lr": 1.7743339024942135e-06, "epoch": 0.224, "percentage": 22.34, "elapsed_time": "0:06:27", "remaining_time": "0:22:28"}
43
+ {"current_steps": 43, "total_steps": 188, "loss": 1.8012, "accuracy": 0.453125, "lr": 1.7636521965473321e-06, "epoch": 0.22933333333333333, "percentage": 22.87, "elapsed_time": "0:06:36", "remaining_time": "0:22:16"}
44
+ {"current_steps": 44, "total_steps": 188, "loss": 2.0332, "accuracy": 0.421875, "lr": 1.7527572501722513e-06, "epoch": 0.23466666666666666, "percentage": 23.4, "elapsed_time": "0:06:46", "remaining_time": "0:22:10"}
45
+ {"current_steps": 45, "total_steps": 188, "loss": 1.5592, "accuracy": 0.5625, "lr": 1.7416521056479575e-06, "epoch": 0.24, "percentage": 23.94, "elapsed_time": "0:06:56", "remaining_time": "0:22:02"}
46
+ {"current_steps": 46, "total_steps": 188, "loss": 1.9044, "accuracy": 0.484375, "lr": 1.7303398639486693e-06, "epoch": 0.24533333333333332, "percentage": 24.47, "elapsed_time": "0:07:05", "remaining_time": "0:21:52"}
47
+ {"current_steps": 47, "total_steps": 188, "loss": 1.7462, "accuracy": 0.421875, "lr": 1.7188236838779293e-06, "epoch": 0.25066666666666665, "percentage": 25.0, "elapsed_time": "0:07:13", "remaining_time": "0:21:41"}
48
+ {"current_steps": 48, "total_steps": 188, "loss": 1.9812, "accuracy": 0.40625, "lr": 1.7071067811865474e-06, "epoch": 0.256, "percentage": 25.53, "elapsed_time": "0:07:22", "remaining_time": "0:21:31"}
49
+ {"current_steps": 49, "total_steps": 188, "loss": 1.884, "accuracy": 0.484375, "lr": 1.6951924276746423e-06, "epoch": 0.2613333333333333, "percentage": 26.06, "elapsed_time": "0:07:33", "remaining_time": "0:21:26"}
50
+ {"current_steps": 50, "total_steps": 188, "loss": 1.6118, "accuracy": 0.5625, "lr": 1.6830839502780308e-06, "epoch": 0.26666666666666666, "percentage": 26.6, "elapsed_time": "0:07:42", "remaining_time": "0:21:17"}
51
+ {"current_steps": 51, "total_steps": 188, "loss": 1.842, "accuracy": 0.5, "lr": 1.6707847301392235e-06, "epoch": 0.272, "percentage": 27.13, "elapsed_time": "0:07:51", "remaining_time": "0:21:06"}
52
+ {"current_steps": 52, "total_steps": 188, "loss": 1.7435, "accuracy": 0.515625, "lr": 1.6582982016632816e-06, "epoch": 0.2773333333333333, "percentage": 27.66, "elapsed_time": "0:08:01", "remaining_time": "0:20:58"}
53
+ {"current_steps": 53, "total_steps": 188, "loss": 2.0724, "accuracy": 0.40625, "lr": 1.6456278515588023e-06, "epoch": 0.2826666666666667, "percentage": 28.19, "elapsed_time": "0:08:10", "remaining_time": "0:20:49"}
54
+ {"current_steps": 54, "total_steps": 188, "loss": 1.7672, "accuracy": 0.484375, "lr": 1.6327772178642986e-06, "epoch": 0.288, "percentage": 28.72, "elapsed_time": "0:08:18", "remaining_time": "0:20:36"}
55
+ {"current_steps": 55, "total_steps": 188, "loss": 1.5599, "accuracy": 0.5625, "lr": 1.6197498889602449e-06, "epoch": 0.29333333333333333, "percentage": 29.26, "elapsed_time": "0:08:26", "remaining_time": "0:20:25"}
56
+ {"current_steps": 56, "total_steps": 188, "loss": 1.7711, "accuracy": 0.5, "lr": 1.6065495025670672e-06, "epoch": 0.2986666666666667, "percentage": 29.79, "elapsed_time": "0:08:35", "remaining_time": "0:20:15"}
57
+ {"current_steps": 57, "total_steps": 188, "loss": 1.8824, "accuracy": 0.46875, "lr": 1.5931797447293551e-06, "epoch": 0.304, "percentage": 30.32, "elapsed_time": "0:08:44", "remaining_time": "0:20:04"}
58
+ {"current_steps": 58, "total_steps": 188, "loss": 1.7345, "accuracy": 0.5625, "lr": 1.5796443487865775e-06, "epoch": 0.30933333333333335, "percentage": 30.85, "elapsed_time": "0:08:53", "remaining_time": "0:19:56"}
59
+ {"current_steps": 59, "total_steps": 188, "loss": 1.876, "accuracy": 0.453125, "lr": 1.5659470943305953e-06, "epoch": 0.31466666666666665, "percentage": 31.38, "elapsed_time": "0:09:03", "remaining_time": "0:19:47"}
60
+ {"current_steps": 60, "total_steps": 188, "loss": 1.9142, "accuracy": 0.453125, "lr": 1.5520918061502565e-06, "epoch": 0.32, "percentage": 31.91, "elapsed_time": "0:09:11", "remaining_time": "0:19:37"}
61
+ {"current_steps": 61, "total_steps": 188, "loss": 1.7306, "accuracy": 0.484375, "lr": 1.5380823531633727e-06, "epoch": 0.3253333333333333, "percentage": 32.45, "elapsed_time": "0:09:20", "remaining_time": "0:19:27"}
62
+ {"current_steps": 62, "total_steps": 188, "loss": 1.8758, "accuracy": 0.421875, "lr": 1.5239226473363687e-06, "epoch": 0.33066666666666666, "percentage": 32.98, "elapsed_time": "0:09:29", "remaining_time": "0:19:18"}
63
+ {"current_steps": 63, "total_steps": 188, "loss": 1.9443, "accuracy": 0.453125, "lr": 1.5096166425919174e-06, "epoch": 0.336, "percentage": 33.51, "elapsed_time": "0:09:39", "remaining_time": "0:19:10"}
64
+ {"current_steps": 64, "total_steps": 188, "loss": 1.5634, "accuracy": 0.625, "lr": 1.4951683337048535e-06, "epoch": 0.3413333333333333, "percentage": 34.04, "elapsed_time": "0:09:49", "remaining_time": "0:19:02"}
65
+ {"current_steps": 65, "total_steps": 188, "loss": 1.7835, "accuracy": 0.578125, "lr": 1.4805817551866838e-06, "epoch": 0.3466666666666667, "percentage": 34.57, "elapsed_time": "0:09:58", "remaining_time": "0:18:52"}
66
+ {"current_steps": 66, "total_steps": 188, "loss": 1.5983, "accuracy": 0.5625, "lr": 1.465860980158998e-06, "epoch": 0.352, "percentage": 35.11, "elapsed_time": "0:10:07", "remaining_time": "0:18:43"}
67
+ {"current_steps": 67, "total_steps": 188, "loss": 1.7847, "accuracy": 0.5, "lr": 1.4510101192161017e-06, "epoch": 0.35733333333333334, "percentage": 35.64, "elapsed_time": "0:10:18", "remaining_time": "0:18:36"}
68
+ {"current_steps": 68, "total_steps": 188, "loss": 1.6313, "accuracy": 0.484375, "lr": 1.4360333192771828e-06, "epoch": 0.3626666666666667, "percentage": 36.17, "elapsed_time": "0:10:27", "remaining_time": "0:18:27"}
69
+ {"current_steps": 69, "total_steps": 188, "loss": 1.9079, "accuracy": 0.421875, "lr": 1.420934762428335e-06, "epoch": 0.368, "percentage": 36.7, "elapsed_time": "0:10:37", "remaining_time": "0:18:19"}
70
+ {"current_steps": 70, "total_steps": 188, "loss": 1.7673, "accuracy": 0.515625, "lr": 1.4057186647547636e-06, "epoch": 0.37333333333333335, "percentage": 37.23, "elapsed_time": "0:10:47", "remaining_time": "0:18:10"}
71
+ {"current_steps": 71, "total_steps": 188, "loss": 1.815, "accuracy": 0.421875, "lr": 1.3903892751634947e-06, "epoch": 0.37866666666666665, "percentage": 37.77, "elapsed_time": "0:10:57", "remaining_time": "0:18:03"}
72
+ {"current_steps": 72, "total_steps": 188, "loss": 1.7516, "accuracy": 0.484375, "lr": 1.374950874196921e-06, "epoch": 0.384, "percentage": 38.3, "elapsed_time": "0:11:06", "remaining_time": "0:17:53"}
73
+ {"current_steps": 73, "total_steps": 188, "loss": 1.8338, "accuracy": 0.5, "lr": 1.3594077728375126e-06, "epoch": 0.3893333333333333, "percentage": 38.83, "elapsed_time": "0:11:15", "remaining_time": "0:17:44"}
74
+ {"current_steps": 74, "total_steps": 188, "loss": 1.9166, "accuracy": 0.53125, "lr": 1.34376431130403e-06, "epoch": 0.39466666666666667, "percentage": 39.36, "elapsed_time": "0:11:24", "remaining_time": "0:17:34"}
75
+ {"current_steps": 75, "total_steps": 188, "loss": 1.5836, "accuracy": 0.515625, "lr": 1.328024857839569e-06, "epoch": 0.4, "percentage": 39.89, "elapsed_time": "0:11:33", "remaining_time": "0:17:24"}
76
+ {"current_steps": 76, "total_steps": 188, "loss": 1.7019, "accuracy": 0.515625, "lr": 1.3121938074917865e-06, "epoch": 0.4053333333333333, "percentage": 40.43, "elapsed_time": "0:11:41", "remaining_time": "0:17:14"}
77
+ {"current_steps": 77, "total_steps": 188, "loss": 1.8143, "accuracy": 0.484375, "lr": 1.296275580885634e-06, "epoch": 0.4106666666666667, "percentage": 40.96, "elapsed_time": "0:11:51", "remaining_time": "0:17:05"}
78
+ {"current_steps": 78, "total_steps": 188, "loss": 1.6793, "accuracy": 0.515625, "lr": 1.280274622988956e-06, "epoch": 0.416, "percentage": 41.49, "elapsed_time": "0:12:01", "remaining_time": "0:16:57"}
79
+ {"current_steps": 79, "total_steps": 188, "loss": 1.7353, "accuracy": 0.484375, "lr": 1.264195401871286e-06, "epoch": 0.42133333333333334, "percentage": 42.02, "elapsed_time": "0:12:09", "remaining_time": "0:16:47"}
80
+ {"current_steps": 80, "total_steps": 188, "loss": 1.6677, "accuracy": 0.5, "lr": 1.2480424074561933e-06, "epoch": 0.4266666666666667, "percentage": 42.55, "elapsed_time": "0:12:19", "remaining_time": "0:16:37"}
81
+ {"current_steps": 81, "total_steps": 188, "loss": 1.5844, "accuracy": 0.578125, "lr": 1.2318201502675282e-06, "epoch": 0.432, "percentage": 43.09, "elapsed_time": "0:12:29", "remaining_time": "0:16:30"}
82
+ {"current_steps": 82, "total_steps": 188, "loss": 1.5824, "accuracy": 0.53125, "lr": 1.2155331601699134e-06, "epoch": 0.43733333333333335, "percentage": 43.62, "elapsed_time": "0:12:38", "remaining_time": "0:16:21"}
83
+ {"current_steps": 83, "total_steps": 188, "loss": 1.3487, "accuracy": 0.609375, "lr": 1.199185985103836e-06, "epoch": 0.44266666666666665, "percentage": 44.15, "elapsed_time": "0:12:47", "remaining_time": "0:16:10"}
84
+ {"current_steps": 84, "total_steps": 188, "loss": 1.803, "accuracy": 0.53125, "lr": 1.1827831898156904e-06, "epoch": 0.448, "percentage": 44.68, "elapsed_time": "0:12:56", "remaining_time": "0:16:01"}
85
+ {"current_steps": 85, "total_steps": 188, "loss": 1.8717, "accuracy": 0.5, "lr": 1.16632935458313e-06, "epoch": 0.4533333333333333, "percentage": 45.21, "elapsed_time": "0:13:06", "remaining_time": "0:15:52"}
86
+ {"current_steps": 86, "total_steps": 188, "loss": 1.67, "accuracy": 0.5, "lr": 1.1498290739360814e-06, "epoch": 0.45866666666666667, "percentage": 45.74, "elapsed_time": "0:13:15", "remaining_time": "0:15:43"}
87
+ {"current_steps": 87, "total_steps": 188, "loss": 1.8179, "accuracy": 0.546875, "lr": 1.133286955373779e-06, "epoch": 0.464, "percentage": 46.28, "elapsed_time": "0:13:25", "remaining_time": "0:15:34"}
88
+ {"current_steps": 88, "total_steps": 188, "loss": 1.6856, "accuracy": 0.53125, "lr": 1.1167076180781762e-06, "epoch": 0.4693333333333333, "percentage": 46.81, "elapsed_time": "0:13:34", "remaining_time": "0:15:25"}
89
+ {"current_steps": 89, "total_steps": 188, "loss": 1.5637, "accuracy": 0.578125, "lr": 1.1000956916240985e-06, "epoch": 0.4746666666666667, "percentage": 47.34, "elapsed_time": "0:13:44", "remaining_time": "0:15:17"}
90
+ {"current_steps": 90, "total_steps": 188, "loss": 1.5542, "accuracy": 0.625, "lr": 1.0834558146864898e-06, "epoch": 0.48, "percentage": 47.87, "elapsed_time": "0:13:54", "remaining_time": "0:15:08"}
91
+ {"current_steps": 91, "total_steps": 188, "loss": 1.5519, "accuracy": 0.609375, "lr": 1.0667926337451217e-06, "epoch": 0.48533333333333334, "percentage": 48.4, "elapsed_time": "0:14:02", "remaining_time": "0:14:57"}
92
+ {"current_steps": 92, "total_steps": 188, "loss": 1.6359, "accuracy": 0.59375, "lr": 1.0501108017871191e-06, "epoch": 0.49066666666666664, "percentage": 48.94, "elapsed_time": "0:14:11", "remaining_time": "0:14:48"}
93
+ {"current_steps": 93, "total_steps": 188, "loss": 1.6573, "accuracy": 0.5625, "lr": 1.0334149770076745e-06, "epoch": 0.496, "percentage": 49.47, "elapsed_time": "0:14:20", "remaining_time": "0:14:39"}
94
+ {"current_steps": 94, "total_steps": 188, "loss": 1.762, "accuracy": 0.5625, "lr": 1.0167098215093009e-06, "epoch": 0.5013333333333333, "percentage": 50.0, "elapsed_time": "0:14:30", "remaining_time": "0:14:30"}
95
+ {"current_steps": 95, "total_steps": 188, "loss": 1.7451, "accuracy": 0.4375, "lr": 1e-06, "epoch": 0.5066666666666667, "percentage": 50.53, "elapsed_time": "0:14:38", "remaining_time": "0:14:19"}
96
+ {"current_steps": 96, "total_steps": 188, "loss": 1.6563, "accuracy": 0.546875, "lr": 9.83290178490699e-07, "epoch": 0.512, "percentage": 51.06, "elapsed_time": "0:14:47", "remaining_time": "0:14:10"}
97
+ {"current_steps": 97, "total_steps": 188, "loss": 2.0151, "accuracy": 0.46875, "lr": 9.665850229923256e-07, "epoch": 0.5173333333333333, "percentage": 51.6, "elapsed_time": "0:14:56", "remaining_time": "0:14:01"}
98
+ {"current_steps": 98, "total_steps": 188, "loss": 1.6086, "accuracy": 0.59375, "lr": 9.498891982128809e-07, "epoch": 0.5226666666666666, "percentage": 52.13, "elapsed_time": "0:15:06", "remaining_time": "0:13:52"}
99
+ {"current_steps": 99, "total_steps": 188, "loss": 1.7378, "accuracy": 0.53125, "lr": 9.332073662548784e-07, "epoch": 0.528, "percentage": 52.66, "elapsed_time": "0:15:15", "remaining_time": "0:13:42"}
100
+ {"current_steps": 100, "total_steps": 188, "loss": 1.8311, "accuracy": 0.453125, "lr": 9.165441853135103e-07, "epoch": 0.5333333333333333, "percentage": 53.19, "elapsed_time": "0:15:24", "remaining_time": "0:13:33"}
101
+ {"current_steps": 101, "total_steps": 188, "loss": 1.8546, "accuracy": 0.53125, "lr": 8.999043083759016e-07, "epoch": 0.5386666666666666, "percentage": 53.72, "elapsed_time": "0:15:33", "remaining_time": "0:13:24"}
102
+ {"current_steps": 102, "total_steps": 188, "loss": 1.8358, "accuracy": 0.46875, "lr": 8.832923819218238e-07, "epoch": 0.544, "percentage": 54.26, "elapsed_time": "0:15:43", "remaining_time": "0:13:15"}
103
+ {"current_steps": 103, "total_steps": 188, "loss": 1.7745, "accuracy": 0.515625, "lr": 8.667130446262214e-07, "epoch": 0.5493333333333333, "percentage": 54.79, "elapsed_time": "0:15:52", "remaining_time": "0:13:05"}
104
+ {"current_steps": 104, "total_steps": 188, "loss": 1.4798, "accuracy": 0.53125, "lr": 8.501709260639185e-07, "epoch": 0.5546666666666666, "percentage": 55.32, "elapsed_time": "0:16:02", "remaining_time": "0:12:57"}
105
+ {"current_steps": 105, "total_steps": 188, "loss": 1.7661, "accuracy": 0.46875, "lr": 8.336706454168699e-07, "epoch": 0.56, "percentage": 55.85, "elapsed_time": "0:16:11", "remaining_time": "0:12:47"}
106
+ {"current_steps": 106, "total_steps": 188, "loss": 1.5891, "accuracy": 0.5625, "lr": 8.172168101843099e-07, "epoch": 0.5653333333333334, "percentage": 56.38, "elapsed_time": "0:16:20", "remaining_time": "0:12:38"}
107
+ {"current_steps": 107, "total_steps": 188, "loss": 1.6186, "accuracy": 0.5, "lr": 8.008140148961641e-07, "epoch": 0.5706666666666667, "percentage": 56.91, "elapsed_time": "0:16:30", "remaining_time": "0:12:29"}
108
+ {"current_steps": 108, "total_steps": 188, "loss": 1.5772, "accuracy": 0.546875, "lr": 7.844668398300864e-07, "epoch": 0.576, "percentage": 57.45, "elapsed_time": "0:16:41", "remaining_time": "0:12:22"}
109
+ {"current_steps": 109, "total_steps": 188, "loss": 1.5248, "accuracy": 0.515625, "lr": 7.681798497324716e-07, "epoch": 0.5813333333333334, "percentage": 57.98, "elapsed_time": "0:16:50", "remaining_time": "0:12:12"}
110
+ {"current_steps": 110, "total_steps": 188, "loss": 1.8438, "accuracy": 0.453125, "lr": 7.519575925438067e-07, "epoch": 0.5866666666666667, "percentage": 58.51, "elapsed_time": "0:16:58", "remaining_time": "0:12:02"}
111
+ {"current_steps": 111, "total_steps": 188, "loss": 1.6551, "accuracy": 0.546875, "lr": 7.35804598128714e-07, "epoch": 0.592, "percentage": 59.04, "elapsed_time": "0:17:08", "remaining_time": "0:11:53"}
112
+ {"current_steps": 112, "total_steps": 188, "loss": 1.7068, "accuracy": 0.578125, "lr": 7.197253770110437e-07, "epoch": 0.5973333333333334, "percentage": 59.57, "elapsed_time": "0:17:17", "remaining_time": "0:11:43"}
113
+ {"current_steps": 113, "total_steps": 188, "loss": 1.8298, "accuracy": 0.4375, "lr": 7.037244191143661e-07, "epoch": 0.6026666666666667, "percentage": 60.11, "elapsed_time": "0:17:25", "remaining_time": "0:11:34"}
114
+ {"current_steps": 114, "total_steps": 188, "loss": 1.4577, "accuracy": 0.625, "lr": 6.878061925082138e-07, "epoch": 0.608, "percentage": 60.64, "elapsed_time": "0:17:35", "remaining_time": "0:11:25"}
115
+ {"current_steps": 115, "total_steps": 188, "loss": 1.4686, "accuracy": 0.578125, "lr": 6.719751421604308e-07, "epoch": 0.6133333333333333, "percentage": 61.17, "elapsed_time": "0:17:46", "remaining_time": "0:11:16"}
116
+ {"current_steps": 116, "total_steps": 188, "loss": 1.7617, "accuracy": 0.515625, "lr": 6.562356886959704e-07, "epoch": 0.6186666666666667, "percentage": 61.7, "elapsed_time": "0:17:55", "remaining_time": "0:11:07"}
117
+ {"current_steps": 117, "total_steps": 188, "loss": 1.8709, "accuracy": 0.453125, "lr": 6.405922271624873e-07, "epoch": 0.624, "percentage": 62.23, "elapsed_time": "0:18:06", "remaining_time": "0:10:59"}
118
+ {"current_steps": 118, "total_steps": 188, "loss": 1.7076, "accuracy": 0.578125, "lr": 6.25049125803079e-07, "epoch": 0.6293333333333333, "percentage": 62.77, "elapsed_time": "0:18:16", "remaining_time": "0:10:50"}
119
+ {"current_steps": 119, "total_steps": 188, "loss": 1.7807, "accuracy": 0.5, "lr": 6.096107248365052e-07, "epoch": 0.6346666666666667, "percentage": 63.3, "elapsed_time": "0:18:25", "remaining_time": "0:10:40"}
120
+ {"current_steps": 120, "total_steps": 188, "loss": 1.5245, "accuracy": 0.640625, "lr": 5.942813352452364e-07, "epoch": 0.64, "percentage": 63.83, "elapsed_time": "0:18:34", "remaining_time": "0:10:31"}
121
+ {"current_steps": 121, "total_steps": 188, "loss": 1.7885, "accuracy": 0.453125, "lr": 5.790652375716652e-07, "epoch": 0.6453333333333333, "percentage": 64.36, "elapsed_time": "0:18:42", "remaining_time": "0:10:21"}
122
+ {"current_steps": 122, "total_steps": 188, "loss": 1.7987, "accuracy": 0.453125, "lr": 5.639666807228174e-07, "epoch": 0.6506666666666666, "percentage": 64.89, "elapsed_time": "0:18:52", "remaining_time": "0:10:12"}
123
+ {"current_steps": 123, "total_steps": 188, "loss": 1.7203, "accuracy": 0.515625, "lr": 5.48989880783898e-07, "epoch": 0.656, "percentage": 65.43, "elapsed_time": "0:19:02", "remaining_time": "0:10:03"}
124
+ {"current_steps": 124, "total_steps": 188, "loss": 1.8513, "accuracy": 0.484375, "lr": 5.341390198410018e-07, "epoch": 0.6613333333333333, "percentage": 65.96, "elapsed_time": "0:19:11", "remaining_time": "0:09:54"}
125
+ {"current_steps": 125, "total_steps": 188, "loss": 1.6818, "accuracy": 0.5625, "lr": 5.194182448133162e-07, "epoch": 0.6666666666666666, "percentage": 66.49, "elapsed_time": "0:19:19", "remaining_time": "0:09:44"}
126
+ {"current_steps": 126, "total_steps": 188, "loss": 1.53, "accuracy": 0.609375, "lr": 5.048316662951465e-07, "epoch": 0.672, "percentage": 67.02, "elapsed_time": "0:19:28", "remaining_time": "0:09:35"}
127
+ {"current_steps": 127, "total_steps": 188, "loss": 1.6266, "accuracy": 0.609375, "lr": 4.903833574080825e-07, "epoch": 0.6773333333333333, "percentage": 67.55, "elapsed_time": "0:19:37", "remaining_time": "0:09:25"}
128
+ {"current_steps": 128, "total_steps": 188, "loss": 1.6502, "accuracy": 0.53125, "lr": 4.7607735266363146e-07, "epoch": 0.6826666666666666, "percentage": 68.09, "elapsed_time": "0:19:47", "remaining_time": "0:09:16"}
129
+ {"current_steps": 129, "total_steps": 188, "loss": 1.392, "accuracy": 0.53125, "lr": 4.619176468366274e-07, "epoch": 0.688, "percentage": 68.62, "elapsed_time": "0:19:57", "remaining_time": "0:09:07"}
130
+ {"current_steps": 130, "total_steps": 188, "loss": 1.6432, "accuracy": 0.578125, "lr": 4.4790819384974345e-07, "epoch": 0.6933333333333334, "percentage": 69.15, "elapsed_time": "0:20:05", "remaining_time": "0:08:58"}
131
+ {"current_steps": 131, "total_steps": 188, "loss": 1.7429, "accuracy": 0.484375, "lr": 4.340529056694047e-07, "epoch": 0.6986666666666667, "percentage": 69.68, "elapsed_time": "0:20:14", "remaining_time": "0:08:48"}
132
+ {"current_steps": 132, "total_steps": 188, "loss": 1.5342, "accuracy": 0.59375, "lr": 4.2035565121342243e-07, "epoch": 0.704, "percentage": 70.21, "elapsed_time": "0:20:22", "remaining_time": "0:08:38"}
133
+ {"current_steps": 133, "total_steps": 188, "loss": 1.4928, "accuracy": 0.640625, "lr": 4.0682025527064477e-07, "epoch": 0.7093333333333334, "percentage": 70.74, "elapsed_time": "0:20:32", "remaining_time": "0:08:29"}
134
+ {"current_steps": 134, "total_steps": 188, "loss": 1.4017, "accuracy": 0.703125, "lr": 3.934504974329326e-07, "epoch": 0.7146666666666667, "percentage": 71.28, "elapsed_time": "0:20:41", "remaining_time": "0:08:20"}
135
+ {"current_steps": 135, "total_steps": 188, "loss": 1.7, "accuracy": 0.5, "lr": 3.8025011103975524e-07, "epoch": 0.72, "percentage": 71.81, "elapsed_time": "0:20:50", "remaining_time": "0:08:10"}
136
+ {"current_steps": 136, "total_steps": 188, "loss": 1.7133, "accuracy": 0.453125, "lr": 3.6722278213570136e-07, "epoch": 0.7253333333333334, "percentage": 72.34, "elapsed_time": "0:20:59", "remaining_time": "0:08:01"}
137
+ {"current_steps": 137, "total_steps": 188, "loss": 1.9142, "accuracy": 0.46875, "lr": 3.5437214844119757e-07, "epoch": 0.7306666666666667, "percentage": 72.87, "elapsed_time": "0:21:08", "remaining_time": "0:07:52"}
138
+ {"current_steps": 138, "total_steps": 188, "loss": 1.5914, "accuracy": 0.5, "lr": 3.417017983367184e-07, "epoch": 0.736, "percentage": 73.4, "elapsed_time": "0:21:17", "remaining_time": "0:07:42"}
139
+ {"current_steps": 139, "total_steps": 188, "loss": 1.6486, "accuracy": 0.640625, "lr": 3.2921526986077677e-07, "epoch": 0.7413333333333333, "percentage": 73.94, "elapsed_time": "0:21:26", "remaining_time": "0:07:33"}
140
+ {"current_steps": 140, "total_steps": 188, "loss": 1.6391, "accuracy": 0.578125, "lr": 3.169160497219692e-07, "epoch": 0.7466666666666667, "percentage": 74.47, "elapsed_time": "0:21:33", "remaining_time": "0:07:23"}
141
+ {"current_steps": 141, "total_steps": 188, "loss": 1.687, "accuracy": 0.546875, "lr": 3.048075723253577e-07, "epoch": 0.752, "percentage": 75.0, "elapsed_time": "0:21:43", "remaining_time": "0:07:14"}
142
+ {"current_steps": 142, "total_steps": 188, "loss": 1.9397, "accuracy": 0.484375, "lr": 2.9289321881345254e-07, "epoch": 0.7573333333333333, "percentage": 75.53, "elapsed_time": "0:21:52", "remaining_time": "0:07:05"}
143
+ {"current_steps": 143, "total_steps": 188, "loss": 1.6817, "accuracy": 0.515625, "lr": 2.811763161220708e-07, "epoch": 0.7626666666666667, "percentage": 76.06, "elapsed_time": "0:22:02", "remaining_time": "0:06:56"}
144
+ {"current_steps": 144, "total_steps": 188, "loss": 1.5122, "accuracy": 0.578125, "lr": 2.6966013605133084e-07, "epoch": 0.768, "percentage": 76.6, "elapsed_time": "0:22:12", "remaining_time": "0:06:47"}
145
+ {"current_steps": 145, "total_steps": 188, "loss": 1.6209, "accuracy": 0.578125, "lr": 2.583478943520424e-07, "epoch": 0.7733333333333333, "percentage": 77.13, "elapsed_time": "0:22:21", "remaining_time": "0:06:37"}
146
+ {"current_steps": 146, "total_steps": 188, "loss": 1.3693, "accuracy": 0.703125, "lr": 2.472427498277486e-07, "epoch": 0.7786666666666666, "percentage": 77.66, "elapsed_time": "0:22:30", "remaining_time": "0:06:28"}
147
+ {"current_steps": 147, "total_steps": 188, "loss": 1.6764, "accuracy": 0.546875, "lr": 2.3634780345266803e-07, "epoch": 0.784, "percentage": 78.19, "elapsed_time": "0:22:39", "remaining_time": "0:06:19"}
148
+ {"current_steps": 148, "total_steps": 188, "loss": 1.6071, "accuracy": 0.53125, "lr": 2.2566609750578668e-07, "epoch": 0.7893333333333333, "percentage": 78.72, "elapsed_time": "0:22:48", "remaining_time": "0:06:09"}
149
+ {"current_steps": 149, "total_steps": 188, "loss": 1.6005, "accuracy": 0.53125, "lr": 2.15200614721339e-07, "epoch": 0.7946666666666666, "percentage": 79.26, "elapsed_time": "0:22:57", "remaining_time": "0:06:00"}
150
+ {"current_steps": 150, "total_steps": 188, "loss": 1.5617, "accuracy": 0.546875, "lr": 2.04954277455917e-07, "epoch": 0.8, "percentage": 79.79, "elapsed_time": "0:23:06", "remaining_time": "0:05:51"}
151
+ {"current_steps": 151, "total_steps": 188, "loss": 1.566, "accuracy": 0.625, "lr": 1.9492994687243713e-07, "epoch": 0.8053333333333333, "percentage": 80.32, "elapsed_time": "0:23:16", "remaining_time": "0:05:42"}
152
+ {"current_steps": 152, "total_steps": 188, "loss": 1.7248, "accuracy": 0.53125, "lr": 1.8513042214119667e-07, "epoch": 0.8106666666666666, "percentage": 80.85, "elapsed_time": "0:23:25", "remaining_time": "0:05:32"}
153
+ {"current_steps": 153, "total_steps": 188, "loss": 1.7366, "accuracy": 0.46875, "lr": 1.755584396582399e-07, "epoch": 0.816, "percentage": 81.38, "elapsed_time": "0:23:34", "remaining_time": "0:05:23"}
154
+ {"current_steps": 154, "total_steps": 188, "loss": 1.6937, "accuracy": 0.546875, "lr": 1.6621667228125302e-07, "epoch": 0.8213333333333334, "percentage": 81.91, "elapsed_time": "0:23:43", "remaining_time": "0:05:14"}
155
+ {"current_steps": 155, "total_steps": 188, "loss": 1.8771, "accuracy": 0.4375, "lr": 1.57107728583203e-07, "epoch": 0.8266666666666667, "percentage": 82.45, "elapsed_time": "0:23:53", "remaining_time": "0:05:05"}
156
+ {"current_steps": 156, "total_steps": 188, "loss": 1.6005, "accuracy": 0.515625, "lr": 1.4823415212392375e-07, "epoch": 0.832, "percentage": 82.98, "elapsed_time": "0:24:03", "remaining_time": "0:04:56"}
157
+ {"current_steps": 157, "total_steps": 188, "loss": 1.4883, "accuracy": 0.71875, "lr": 1.3959842073986083e-07, "epoch": 0.8373333333333334, "percentage": 83.51, "elapsed_time": "0:24:13", "remaining_time": "0:04:46"}
158
+ {"current_steps": 158, "total_steps": 188, "loss": 1.5267, "accuracy": 0.625, "lr": 1.312029458521635e-07, "epoch": 0.8426666666666667, "percentage": 84.04, "elapsed_time": "0:24:23", "remaining_time": "0:04:37"}
159
+ {"current_steps": 159, "total_steps": 188, "loss": 1.6305, "accuracy": 0.484375, "lr": 1.230500717933285e-07, "epoch": 0.848, "percentage": 84.57, "elapsed_time": "0:24:32", "remaining_time": "0:04:28"}
160
+ {"current_steps": 160, "total_steps": 188, "loss": 2.0496, "accuracy": 0.53125, "lr": 1.1514207515257146e-07, "epoch": 0.8533333333333334, "percentage": 85.11, "elapsed_time": "0:24:41", "remaining_time": "0:04:19"}
161
+ {"current_steps": 161, "total_steps": 188, "loss": 1.9803, "accuracy": 0.484375, "lr": 1.0748116414011887e-07, "epoch": 0.8586666666666667, "percentage": 85.64, "elapsed_time": "0:24:50", "remaining_time": "0:04:10"}
162
+ {"current_steps": 162, "total_steps": 188, "loss": 1.7001, "accuracy": 0.484375, "lr": 1.0006947797059218e-07, "epoch": 0.864, "percentage": 86.17, "elapsed_time": "0:24:59", "remaining_time": "0:04:00"}
163
+ {"current_steps": 163, "total_steps": 188, "loss": 1.4682, "accuracy": 0.5625, "lr": 9.29090862656593e-08, "epoch": 0.8693333333333333, "percentage": 86.7, "elapsed_time": "0:25:08", "remaining_time": "0:03:51"}
164
+ {"current_steps": 164, "total_steps": 188, "loss": 2.0651, "accuracy": 0.40625, "lr": 8.600198847611729e-08, "epoch": 0.8746666666666667, "percentage": 87.23, "elapsed_time": "0:25:18", "remaining_time": "0:03:42"}
165
+ {"current_steps": 165, "total_steps": 188, "loss": 1.828, "accuracy": 0.53125, "lr": 7.93501133235711e-08, "epoch": 0.88, "percentage": 87.77, "elapsed_time": "0:25:26", "remaining_time": "0:03:32"}
166
+ {"current_steps": 166, "total_steps": 188, "loss": 1.8165, "accuracy": 0.515625, "lr": 7.295531826186263e-08, "epoch": 0.8853333333333333, "percentage": 88.3, "elapsed_time": "0:25:34", "remaining_time": "0:03:23"}
167
+ {"current_steps": 167, "total_steps": 188, "loss": 1.8147, "accuracy": 0.515625, "lr": 6.681938895839745e-08, "epoch": 0.8906666666666667, "percentage": 88.83, "elapsed_time": "0:25:43", "remaining_time": "0:03:14"}
168
+ {"current_steps": 168, "total_steps": 188, "loss": 1.8423, "accuracy": 0.484375, "lr": 6.094403879552212e-08, "epoch": 0.896, "percentage": 89.36, "elapsed_time": "0:25:52", "remaining_time": "0:03:04"}
169
+ {"current_steps": 169, "total_steps": 188, "loss": 1.7592, "accuracy": 0.46875, "lr": 5.533090839208132e-08, "epoch": 0.9013333333333333, "percentage": 89.89, "elapsed_time": "0:26:00", "remaining_time": "0:02:55"}
170
+ {"current_steps": 170, "total_steps": 188, "loss": 1.652, "accuracy": 0.609375, "lr": 4.998156514529594e-08, "epoch": 0.9066666666666666, "percentage": 90.43, "elapsed_time": "0:26:08", "remaining_time": "0:02:46"}
171
+ {"current_steps": 171, "total_steps": 188, "loss": 1.8636, "accuracy": 0.46875, "lr": 4.489750279308757e-08, "epoch": 0.912, "percentage": 90.96, "elapsed_time": "0:26:18", "remaining_time": "0:02:36"}
172
+ {"current_steps": 172, "total_steps": 188, "loss": 1.6156, "accuracy": 0.5625, "lr": 4.008014099696921e-08, "epoch": 0.9173333333333333, "percentage": 91.49, "elapsed_time": "0:26:27", "remaining_time": "0:02:27"}
173
+ {"current_steps": 173, "total_steps": 188, "loss": 1.4643, "accuracy": 0.625, "lr": 3.553082494562354e-08, "epoch": 0.9226666666666666, "percentage": 92.02, "elapsed_time": "0:26:37", "remaining_time": "0:02:18"}
174
+ {"current_steps": 174, "total_steps": 188, "loss": 1.6145, "accuracy": 0.46875, "lr": 3.125082497927467e-08, "epoch": 0.928, "percentage": 92.55, "elapsed_time": "0:26:47", "remaining_time": "0:02:09"}
175
+ {"current_steps": 175, "total_steps": 188, "loss": 1.2331, "accuracy": 0.640625, "lr": 2.7241336234962943e-08, "epoch": 0.9333333333333333, "percentage": 93.09, "elapsed_time": "0:26:58", "remaining_time": "0:02:00"}
176
+ {"current_steps": 176, "total_steps": 188, "loss": 1.467, "accuracy": 0.59375, "lr": 2.3503478312815295e-08, "epoch": 0.9386666666666666, "percentage": 93.62, "elapsed_time": "0:27:07", "remaining_time": "0:01:50"}
177
+ {"current_steps": 177, "total_steps": 188, "loss": 1.886, "accuracy": 0.515625, "lr": 2.003829496341325e-08, "epoch": 0.944, "percentage": 94.15, "elapsed_time": "0:27:16", "remaining_time": "0:01:41"}
178
+ {"current_steps": 178, "total_steps": 188, "loss": 1.699, "accuracy": 0.515625, "lr": 1.684675379633649e-08, "epoch": 0.9493333333333334, "percentage": 94.68, "elapsed_time": "0:27:26", "remaining_time": "0:01:32"}
179
+ {"current_steps": 179, "total_steps": 188, "loss": 1.5248, "accuracy": 0.53125, "lr": 1.3929746009971432e-08, "epoch": 0.9546666666666667, "percentage": 95.21, "elapsed_time": "0:27:35", "remaining_time": "0:01:23"}
180
+ {"current_steps": 180, "total_steps": 188, "loss": 1.4797, "accuracy": 0.578125, "lr": 1.1288086142653864e-08, "epoch": 0.96, "percentage": 95.74, "elapsed_time": "0:27:44", "remaining_time": "0:01:13"}
181
+ {"current_steps": 181, "total_steps": 188, "loss": 1.8814, "accuracy": 0.515625, "lr": 8.92251184521997e-09, "epoch": 0.9653333333333334, "percentage": 96.28, "elapsed_time": "0:27:54", "remaining_time": "0:01:04"}
182
+ {"current_steps": 182, "total_steps": 188, "loss": 1.9529, "accuracy": 0.5625, "lr": 6.833683675025903e-09, "epoch": 0.9706666666666667, "percentage": 96.81, "elapsed_time": "0:28:03", "remaining_time": "0:00:55"}
183
+ {"current_steps": 183, "total_steps": 188, "loss": 1.406, "accuracy": 0.65625, "lr": 5.022184911495864e-09, "epoch": 0.976, "percentage": 97.34, "elapsed_time": "0:28:12", "remaining_time": "0:00:46"}
184
+ {"current_steps": 184, "total_steps": 188, "loss": 1.8277, "accuracy": 0.53125, "lr": 3.4885213932484004e-09, "epoch": 0.9813333333333333, "percentage": 97.87, "elapsed_time": "0:28:21", "remaining_time": "0:00:36"}
185
+ {"current_steps": 185, "total_steps": 188, "loss": 1.7893, "accuracy": 0.515625, "lr": 2.233121376846836e-09, "epoch": 0.9866666666666667, "percentage": 98.4, "elapsed_time": "0:28:30", "remaining_time": "0:00:27"}
186
+ {"current_steps": 186, "total_steps": 188, "loss": 1.6867, "accuracy": 0.578125, "lr": 1.2563354172142603e-09, "epoch": 0.992, "percentage": 98.94, "elapsed_time": "0:28:39", "remaining_time": "0:00:18"}
187
+ {"current_steps": 187, "total_steps": 188, "loss": 1.7193, "accuracy": 0.53125, "lr": 5.584362697453881e-10, "epoch": 0.9973333333333333, "percentage": 99.47, "elapsed_time": "0:28:49", "remaining_time": "0:00:09"}
188
+ {"current_steps": 188, "total_steps": 188, "loss": 1.6741, "accuracy": 0.5, "lr": 1.3961881414292774e-10, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:28:54", "remaining_time": "0:00:00"}
189
+ {"current_steps": 188, "total_steps": 188, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "0:29:00", "remaining_time": "0:00:00"}
SimPO_Beta_2.5_Gamma_1.5/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
SimPO_Beta_2.5_Gamma_1.5/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:072dc1577b49f316c877d47cf323accc4220b8bfe183d5066fa7fba65f1d6d9e
3
+ size 7800
SimPO_Beta_2.5_Gamma_1.5/training_loss.png ADDED
SimPO_Beta_2.5_Gamma_1.5/training_rewards_accuracies.png ADDED
SimPO_Beta_2.5_Gamma_1.5/video_preprocessor_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": null,
3
+ "data_format": "channels_first",
4
+ "default_to_square": true,
5
+ "device": null,
6
+ "do_center_crop": null,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "do_sample_frames": false,
12
+ "fps": null,
13
+ "image_mean": [
14
+ 0.48145466,
15
+ 0.4578275,
16
+ 0.40821073
17
+ ],
18
+ "image_std": [
19
+ 0.26862954,
20
+ 0.26130258,
21
+ 0.27577711
22
+ ],
23
+ "input_data_format": null,
24
+ "max_frames": 768,
25
+ "max_pixels": 12845056,
26
+ "merge_size": 2,
27
+ "min_frames": 4,
28
+ "min_pixels": 3136,
29
+ "num_frames": null,
30
+ "pad_size": null,
31
+ "patch_size": 14,
32
+ "processor_class": "Qwen2VLProcessor",
33
+ "resample": 3,
34
+ "rescale_factor": 0.00392156862745098,
35
+ "return_metadata": false,
36
+ "size": {
37
+ "longest_edge": 12845056,
38
+ "shortest_edge": 3136
39
+ },
40
+ "temporal_patch_size": 2,
41
+ "video_metadata": null,
42
+ "video_processor_type": "Qwen2VLVideoProcessor"
43
+ }
SimPO_Beta_2.5_Gamma_1.5/vocab.json ADDED
The diff for this file is too large to render. See raw diff