furproxy commited on
Commit
5fa1c56
·
verified ·
1 Parent(s): f1626bb

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
.ipynb_checkpoints/README-checkpoint.md ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: other
4
+ base_model: Qwen3.5-9B
5
+ tags:
6
+ - llama-factory
7
+ - full
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: qwen35_caption_galore
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # qwen35_caption_galore
18
+
19
+ This model is a fine-tuned version of [/workspace/models/Qwen3.5-9B](https://huggingface.co//workspace/models/Qwen3.5-9B) on the my_caption dataset.
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - family_to_muon_lr = {
39
+ "language": _fallback(getattr(training_args, "language_muon_lr", 1e-1), language_lr),
40
+ "vision": _fallback(getattr(training_args, "vision_muon_lr", 4e-5), vision_lr),
41
+ "merger": _fallback(getattr(training_args, "merger_muon_lr", 4e-5), merger_lr),
42
+ }
43
+
44
+ family_to_adamw_lr = {
45
+ "language": _fallback(getattr(training_args, "language_adamw_lr", 2e-6), language_lr),
46
+ "vision": _fallback(getattr(training_args, "vision_adamw_lr", 3e-6), vision_lr),
47
+ "merger": _fallback(getattr(training_args, "merger_adamw_lr", 1e-5), merger_lr),
48
+ }
49
+ - train_batch_size: 2
50
+ - eval_batch_size: 8
51
+ - seed: 42
52
+ - distributed_type: multi-GPU
53
+ - num_devices: 2
54
+ - gradient_accumulation_steps: 4
55
+ - total_train_batch_size: 16
56
+ - total_eval_batch_size: 16
57
+ - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
58
+ - lr_scheduler_type: cosine_with_min_lr
59
+ - lr_scheduler_warmup_steps: 0.05
60
+ - num_epochs: 3
61
+
62
+ ### Training results
63
+
64
+
65
+
66
+ ### Framework versions
67
+
68
+ - Transformers 5.5.3
69
+ - Pytorch 2.11.0+cu130
70
+ - Datasets 4.0.0
71
+ - Tokenizers 0.22.2
README.md ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: other
4
+ base_model: Qwen3.5-9B
5
+ tags:
6
+ - llama-factory
7
+ - full
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: qwen35_caption_galore
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # qwen35_caption_galore
18
+
19
+ This model is a fine-tuned version of [/workspace/models/Qwen3.5-9B](https://huggingface.co//workspace/models/Qwen3.5-9B) on the my_caption dataset.
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - family_to_muon_lr = {
39
+ "language": _fallback(getattr(training_args, "language_muon_lr", 1e-1), language_lr),
40
+ "vision": _fallback(getattr(training_args, "vision_muon_lr", 4e-5), vision_lr),
41
+ "merger": _fallback(getattr(training_args, "merger_muon_lr", 4e-5), merger_lr),
42
+ }
43
+
44
+ family_to_adamw_lr = {
45
+ "language": _fallback(getattr(training_args, "language_adamw_lr", 2e-6), language_lr),
46
+ "vision": _fallback(getattr(training_args, "vision_adamw_lr", 3e-6), vision_lr),
47
+ "merger": _fallback(getattr(training_args, "merger_adamw_lr", 1e-5), merger_lr),
48
+ }
49
+ - train_batch_size: 2
50
+ - eval_batch_size: 8
51
+ - seed: 42
52
+ - distributed_type: multi-GPU
53
+ - num_devices: 2
54
+ - gradient_accumulation_steps: 4
55
+ - total_train_batch_size: 16
56
+ - total_eval_batch_size: 16
57
+ - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
58
+ - lr_scheduler_type: cosine_with_min_lr
59
+ - lr_scheduler_warmup_steps: 0.05
60
+ - num_epochs: 3
61
+
62
+ ### Training results
63
+
64
+
65
+
66
+ ### Framework versions
67
+
68
+ - Transformers 5.5.3
69
+ - Pytorch 2.11.0+cu130
70
+ - Datasets 4.0.0
71
+ - Tokenizers 0.22.2
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "effective_tokens_per_sec": 6675.040163681728,
3
+ "epoch": 3.0,
4
+ "total_flos": 4.2988160857187287e+18,
5
+ "train_loss": 0.7978645538875685,
6
+ "train_runtime": 6311.8591,
7
+ "train_samples_per_second": 9.034,
8
+ "train_steps_per_second": 0.565
9
+ }
chat_template.jinja ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- set enable_thinking = false %}
2
+
3
+ {%- set image_count = namespace(value=0) %}
4
+ {%- set video_count = namespace(value=0) %}
5
+ {%- macro render_content(content, do_vision_count, is_system_content=false) %}
6
+ {%- if content is string %}
7
+ {{- content }}
8
+ {%- elif content is iterable and content is not mapping %}
9
+ {%- for item in content %}
10
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
11
+ {%- if is_system_content %}
12
+ {{- raise_exception('System message cannot contain images.') }}
13
+ {%- endif %}
14
+ {%- if do_vision_count %}
15
+ {%- set image_count.value = image_count.value + 1 %}
16
+ {%- endif %}
17
+ {%- if add_vision_id %}
18
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
19
+ {%- endif %}
20
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
21
+ {%- elif 'video' in item or item.type == 'video' %}
22
+ {%- if is_system_content %}
23
+ {{- raise_exception('System message cannot contain videos.') }}
24
+ {%- endif %}
25
+ {%- if do_vision_count %}
26
+ {%- set video_count.value = video_count.value + 1 %}
27
+ {%- endif %}
28
+ {%- if add_vision_id %}
29
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
30
+ {%- endif %}
31
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
32
+ {%- elif 'text' in item %}
33
+ {{- item.text }}
34
+ {%- else %}
35
+ {{- raise_exception('Unexpected item type in content.') }}
36
+ {%- endif %}
37
+ {%- endfor %}
38
+ {%- elif content is none or content is undefined %}
39
+ {{- '' }}
40
+ {%- else %}
41
+ {{- raise_exception('Unexpected content type.') }}
42
+ {%- endif %}
43
+ {%- endmacro %}
44
+ {%- if not messages %}
45
+ {{- raise_exception('No messages provided.') }}
46
+ {%- endif %}
47
+ {%- if tools and tools is iterable and tools is not mapping %}
48
+ {{- '<|im_start|>system\n' }}
49
+ {{- "# Tools\n\nYou have access to the following functions:\n\n<tools>" }}
50
+ {%- for tool in tools %}
51
+ {{- "\n" }}
52
+ {{- tool | tojson }}
53
+ {%- endfor %}
54
+ {{- "\n</tools>" }}
55
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
56
+ {%- if messages[0].role == 'system' %}
57
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
58
+ {%- if content %}
59
+ {{- '\n\n' + content }}
60
+ {%- endif %}
61
+ {%- endif %}
62
+ {{- '<|im_end|>\n' }}
63
+ {%- else %}
64
+ {%- if messages[0].role == 'system' %}
65
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
66
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
67
+ {%- endif %}
68
+ {%- endif %}
69
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
70
+ {%- for message in messages[::-1] %}
71
+ {%- set index = (messages|length - 1) - loop.index0 %}
72
+ {%- if ns.multi_step_tool and message.role == "user" %}
73
+ {%- set content = render_content(message.content, false)|trim %}
74
+ {%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
75
+ {%- set ns.multi_step_tool = false %}
76
+ {%- set ns.last_query_index = index %}
77
+ {%- endif %}
78
+ {%- endif %}
79
+ {%- endfor %}
80
+ {%- if ns.multi_step_tool %}
81
+ {{- raise_exception('No user query found in messages.') }}
82
+ {%- endif %}
83
+ {%- for message in messages %}
84
+ {%- set content = render_content(message.content, true)|trim %}
85
+ {%- if message.role == "system" %}
86
+ {%- if not loop.first %}
87
+ {{- raise_exception('System message must be at the beginning.') }}
88
+ {%- endif %}
89
+ {%- elif message.role == "user" %}
90
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
91
+ {%- elif message.role == "assistant" %}
92
+ {%- set reasoning_content = '' %}
93
+ {%- if message.reasoning_content is string %}
94
+ {%- set reasoning_content = message.reasoning_content %}
95
+ {%- else %}
96
+ {%- if '</think>' in content %}
97
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
98
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
99
+ {%- endif %}
100
+ {%- endif %}
101
+ {%- set reasoning_content = reasoning_content|trim %}
102
+ {%- if loop.index0 > ns.last_query_index %}
103
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n\n' + content }}
104
+ {%- else %}
105
+ {{- '<|im_start|>' + message.role + '\n' + content }}
106
+ {%- endif %}
107
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
108
+ {%- for tool_call in message.tool_calls %}
109
+ {%- if tool_call.function is defined %}
110
+ {%- set tool_call = tool_call.function %}
111
+ {%- endif %}
112
+ {%- if loop.first %}
113
+ {%- if content|trim %}
114
+ {{- '\n\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
115
+ {%- else %}
116
+ {{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
117
+ {%- endif %}
118
+ {%- else %}
119
+ {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
120
+ {%- endif %}
121
+ {%- if tool_call.arguments is defined %}
122
+ {%- for args_name, args_value in tool_call.arguments|items %}
123
+ {{- '<parameter=' + args_name + '>\n' }}
124
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
125
+ {{- args_value }}
126
+ {{- '\n</parameter>\n' }}
127
+ {%- endfor %}
128
+ {%- endif %}
129
+ {{- '</function>\n</tool_call>' }}
130
+ {%- endfor %}
131
+ {%- endif %}
132
+ {{- '<|im_end|>\n' }}
133
+ {%- elif message.role == "tool" %}
134
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
135
+ {{- '<|im_start|>user' }}
136
+ {%- endif %}
137
+ {{- '\n<tool_response>\n' }}
138
+ {{- content }}
139
+ {{- '\n</tool_response>' }}
140
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
141
+ {{- '<|im_end|>\n' }}
142
+ {%- elif loop.last %}
143
+ {{- '<|im_end|>\n' }}
144
+ {%- endif %}
145
+ {%- else %}
146
+ {{- raise_exception('Unexpected message role.') }}
147
+ {%- endif %}
148
+ {%- endfor %}
149
+ {%- if add_generation_prompt %}
150
+ {{- '<|im_start|>assistant\n' }}
151
+ {%- if enable_thinking is defined and enable_thinking is false %}
152
+ {{- '<think>\n\n</think>\n\n' }}
153
+ {%- else %}
154
+ {{- '<think>\n' }}
155
+ {%- endif %}
156
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3_5ForConditionalGeneration"
4
+ ],
5
+ "dtype": "bfloat16",
6
+ "eos_token_id": 248046,
7
+ "hidden_size": 4096,
8
+ "image_token_id": 248056,
9
+ "model_type": "qwen3_5",
10
+ "pad_token_id": 248044,
11
+ "text_config": {
12
+ "attention_bias": false,
13
+ "attention_dropout": 0.0,
14
+ "attn_output_gate": true,
15
+ "bos_token_id": null,
16
+ "dtype": "bfloat16",
17
+ "eos_token_id": 248044,
18
+ "full_attention_interval": 4,
19
+ "head_dim": 256,
20
+ "hidden_act": "silu",
21
+ "hidden_size": 4096,
22
+ "initializer_range": 0.02,
23
+ "intermediate_size": 12288,
24
+ "layer_types": [
25
+ "linear_attention",
26
+ "linear_attention",
27
+ "linear_attention",
28
+ "full_attention",
29
+ "linear_attention",
30
+ "linear_attention",
31
+ "linear_attention",
32
+ "full_attention",
33
+ "linear_attention",
34
+ "linear_attention",
35
+ "linear_attention",
36
+ "full_attention",
37
+ "linear_attention",
38
+ "linear_attention",
39
+ "linear_attention",
40
+ "full_attention",
41
+ "linear_attention",
42
+ "linear_attention",
43
+ "linear_attention",
44
+ "full_attention",
45
+ "linear_attention",
46
+ "linear_attention",
47
+ "linear_attention",
48
+ "full_attention",
49
+ "linear_attention",
50
+ "linear_attention",
51
+ "linear_attention",
52
+ "full_attention",
53
+ "linear_attention",
54
+ "linear_attention",
55
+ "linear_attention",
56
+ "full_attention"
57
+ ],
58
+ "linear_conv_kernel_dim": 4,
59
+ "linear_key_head_dim": 128,
60
+ "linear_num_key_heads": 16,
61
+ "linear_num_value_heads": 32,
62
+ "linear_value_head_dim": 128,
63
+ "mamba_ssm_dtype": "float32",
64
+ "max_position_embeddings": 262144,
65
+ "mlp_only_layers": [],
66
+ "model_type": "qwen3_5_text",
67
+ "mtp_num_hidden_layers": 1,
68
+ "mtp_use_dedicated_embeddings": false,
69
+ "num_attention_heads": 16,
70
+ "num_hidden_layers": 32,
71
+ "num_key_value_heads": 4,
72
+ "pad_token_id": null,
73
+ "partial_rotary_factor": 0.25,
74
+ "rms_norm_eps": 1e-06,
75
+ "rope_parameters": {
76
+ "mrope_interleaved": true,
77
+ "mrope_section": [
78
+ 11,
79
+ 11,
80
+ 10
81
+ ],
82
+ "partial_rotary_factor": 0.25,
83
+ "rope_theta": 10000000,
84
+ "rope_type": "default"
85
+ },
86
+ "tie_word_embeddings": false,
87
+ "use_cache": false,
88
+ "vocab_size": 248320
89
+ },
90
+ "tie_word_embeddings": false,
91
+ "transformers_version": "5.5.3",
92
+ "use_cache": false,
93
+ "video_token_id": 248057,
94
+ "vision_config": {
95
+ "deepstack_visual_indexes": [],
96
+ "depth": 27,
97
+ "dtype": "bfloat16",
98
+ "hidden_act": "gelu_pytorch_tanh",
99
+ "hidden_size": 1152,
100
+ "in_channels": 3,
101
+ "initializer_range": 0.02,
102
+ "intermediate_size": 4304,
103
+ "model_type": "qwen3_5",
104
+ "num_heads": 16,
105
+ "num_position_embeddings": 2304,
106
+ "out_hidden_size": 4096,
107
+ "patch_size": 16,
108
+ "spatial_merge_size": 2,
109
+ "temporal_patch_size": 2
110
+ },
111
+ "vision_end_token_id": 248054,
112
+ "vision_start_token_id": 248053
113
+ }
generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "eos_token_id": [
4
+ 248046,
5
+ 248044
6
+ ],
7
+ "pad_token_id": 248044,
8
+ "transformers_version": "5.5.3",
9
+ "use_cache": true
10
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e552b3ad68ae708a8fd442ecec0b39e433995d919c9bbe2bb10ca3c9f918e128
3
+ size 20859268376
processor_config.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_processor": {
3
+ "data_format": "channels_first",
4
+ "do_convert_rgb": true,
5
+ "do_normalize": true,
6
+ "do_rescale": true,
7
+ "do_resize": true,
8
+ "image_mean": [
9
+ 0.5,
10
+ 0.5,
11
+ 0.5
12
+ ],
13
+ "image_processor_type": "Qwen2VLImageProcessor",
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "merge_size": 2,
20
+ "patch_size": 16,
21
+ "resample": 3,
22
+ "rescale_factor": 0.00392156862745098,
23
+ "size": {
24
+ "longest_edge": 16777216,
25
+ "shortest_edge": 65536
26
+ },
27
+ "temporal_patch_size": 2
28
+ },
29
+ "processor_class": "Qwen3VLProcessor",
30
+ "video_processor": {
31
+ "data_format": "channels_first",
32
+ "default_to_square": true,
33
+ "do_convert_rgb": true,
34
+ "do_normalize": true,
35
+ "do_rescale": true,
36
+ "do_resize": true,
37
+ "do_sample_frames": true,
38
+ "fps": 2,
39
+ "image_mean": [
40
+ 0.5,
41
+ 0.5,
42
+ 0.5
43
+ ],
44
+ "image_std": [
45
+ 0.5,
46
+ 0.5,
47
+ 0.5
48
+ ],
49
+ "max_frames": 768,
50
+ "merge_size": 2,
51
+ "min_frames": 4,
52
+ "patch_size": 16,
53
+ "resample": 3,
54
+ "rescale_factor": 0.00392156862745098,
55
+ "return_metadata": false,
56
+ "size": {
57
+ "longest_edge": 25165824,
58
+ "shortest_edge": 4096
59
+ },
60
+ "temporal_patch_size": 2,
61
+ "video_processor_type": "Qwen3VLVideoProcessor"
62
+ }
63
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
3
+ size 19989343
tokenizer_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "audio_bos_token": "<|audio_start|>",
4
+ "audio_eos_token": "<|audio_end|>",
5
+ "audio_token": "<|audio_pad|>",
6
+ "backend": "tokenizers",
7
+ "bos_token": null,
8
+ "clean_up_tokenization_spaces": false,
9
+ "eos_token": "<|im_end|>",
10
+ "errors": "replace",
11
+ "image_token": "<|image_pad|>",
12
+ "is_local": true,
13
+ "model_max_length": 262144,
14
+ "model_specific_special_tokens": {
15
+ "audio_bos_token": "<|audio_start|>",
16
+ "audio_eos_token": "<|audio_end|>",
17
+ "audio_token": "<|audio_pad|>",
18
+ "image_token": "<|image_pad|>",
19
+ "video_token": "<|video_pad|>",
20
+ "vision_bos_token": "<|vision_start|>",
21
+ "vision_eos_token": "<|vision_end|>"
22
+ },
23
+ "pad_token": "<|endoftext|>",
24
+ "padding_side": "right",
25
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
26
+ "processor_class": "Qwen3VLProcessor",
27
+ "split_special_tokens": false,
28
+ "tokenizer_class": "TokenizersBackend",
29
+ "unk_token": null,
30
+ "video_token": "<|video_pad|>",
31
+ "vision_bos_token": "<|vision_start|>",
32
+ "vision_eos_token": "<|vision_end|>"
33
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "effective_tokens_per_sec": 6675.040163681728,
3
+ "epoch": 3.0,
4
+ "total_flos": 4.2988160857187287e+18,
5
+ "train_loss": 0.7978645538875685,
6
+ "train_runtime": 6311.8591,
7
+ "train_samples_per_second": 9.034,
8
+ "train_steps_per_second": 0.565
9
+ }
trainer_log.jsonl ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 2638, "total_steps": 3564, "loss": 1.07930326461792, "lr": 5.13039318915663e-07, "epoch": 2.2205387205387206, "percentage": 74.02, "elapsed_time": "1:17:36", "remaining_time": "0:27:14"}
2
+ {"current_steps": 2640, "total_steps": 3564, "loss": 0.982938289642334, "lr": 5.117739323198067e-07, "epoch": 2.2222222222222223, "percentage": 74.07, "elapsed_time": "1:17:40", "remaining_time": "0:27:11"}
3
+ {"current_steps": 2642, "total_steps": 3564, "loss": 0.5647614002227783, "lr": 5.105105724102547e-07, "epoch": 2.223905723905724, "percentage": 74.13, "elapsed_time": "1:17:44", "remaining_time": "0:27:07"}
4
+ {"current_steps": 2644, "total_steps": 3564, "loss": 0.5829119086265564, "lr": 5.092492435398137e-07, "epoch": 2.225589225589226, "percentage": 74.19, "elapsed_time": "1:17:47", "remaining_time": "0:27:04"}
5
+ {"current_steps": 2646, "total_steps": 3564, "loss": 0.5897196531295776, "lr": 5.079899500542917e-07, "epoch": 2.227272727272727, "percentage": 74.24, "elapsed_time": "1:17:51", "remaining_time": "0:27:00"}
6
+ {"current_steps": 2648, "total_steps": 3564, "loss": 0.2728573977947235, "lr": 5.067326962924848e-07, "epoch": 2.228956228956229, "percentage": 74.3, "elapsed_time": "1:17:54", "remaining_time": "0:26:57"}
7
+ {"current_steps": 2650, "total_steps": 3564, "loss": 0.9227702617645264, "lr": 5.054774865861617e-07, "epoch": 2.2306397306397305, "percentage": 74.35, "elapsed_time": "1:17:58", "remaining_time": "0:26:53"}
8
+ {"current_steps": 2652, "total_steps": 3564, "loss": 0.5031465888023376, "lr": 5.042243252600475e-07, "epoch": 2.2323232323232323, "percentage": 74.41, "elapsed_time": "1:18:02", "remaining_time": "0:26:50"}
9
+ {"current_steps": 2654, "total_steps": 3564, "loss": 0.49748843908309937, "lr": 5.029732166318106e-07, "epoch": 2.234006734006734, "percentage": 74.47, "elapsed_time": "1:18:06", "remaining_time": "0:26:46"}
10
+ {"current_steps": 2656, "total_steps": 3564, "loss": 0.585181713104248, "lr": 5.017241650120462e-07, "epoch": 2.2356902356902357, "percentage": 74.52, "elapsed_time": "1:18:10", "remaining_time": "0:26:43"}
11
+ {"current_steps": 2658, "total_steps": 3564, "loss": 0.7983870506286621, "lr": 5.004771747042631e-07, "epoch": 2.2373737373737375, "percentage": 74.58, "elapsed_time": "1:18:14", "remaining_time": "0:26:40"}
12
+ {"current_steps": 2660, "total_steps": 3564, "loss": 0.6713172197341919, "lr": 4.992322500048673e-07, "epoch": 2.239057239057239, "percentage": 74.64, "elapsed_time": "1:18:17", "remaining_time": "0:26:36"}
13
+ {"current_steps": 2662, "total_steps": 3564, "loss": 0.7296475768089294, "lr": 4.979893952031483e-07, "epoch": 2.240740740740741, "percentage": 74.69, "elapsed_time": "1:18:21", "remaining_time": "0:26:33"}
14
+ {"current_steps": 2664, "total_steps": 3564, "loss": 0.3102848529815674, "lr": 4.96748614581264e-07, "epoch": 2.242424242424242, "percentage": 74.75, "elapsed_time": "1:18:24", "remaining_time": "0:26:29"}
15
+ {"current_steps": 2666, "total_steps": 3564, "loss": 0.712740421295166, "lr": 4.955099124142251e-07, "epoch": 2.244107744107744, "percentage": 74.8, "elapsed_time": "1:18:28", "remaining_time": "0:26:26"}
16
+ {"current_steps": 2668, "total_steps": 3564, "loss": 0.5821852684020996, "lr": 4.942732929698827e-07, "epoch": 2.2457912457912457, "percentage": 74.86, "elapsed_time": "1:18:32", "remaining_time": "0:26:22"}
17
+ {"current_steps": 2670, "total_steps": 3564, "loss": 0.4474225640296936, "lr": 4.930387605089104e-07, "epoch": 2.2474747474747474, "percentage": 74.92, "elapsed_time": "1:18:35", "remaining_time": "0:26:18"}
18
+ {"current_steps": 2672, "total_steps": 3564, "loss": 0.33651861548423767, "lr": 4.918063192847921e-07, "epoch": 2.249158249158249, "percentage": 74.97, "elapsed_time": "1:18:39", "remaining_time": "0:26:15"}
19
+ {"current_steps": 2674, "total_steps": 3564, "loss": 0.5961496829986572, "lr": 4.905759735438068e-07, "epoch": 2.250841750841751, "percentage": 75.03, "elapsed_time": "1:18:42", "remaining_time": "0:26:11"}
20
+ {"current_steps": 2676, "total_steps": 3564, "loss": 0.6518359184265137, "lr": 4.893477275250127e-07, "epoch": 2.2525252525252526, "percentage": 75.08, "elapsed_time": "1:18:46", "remaining_time": "0:26:08"}
21
+ {"current_steps": 2678, "total_steps": 3564, "loss": 0.4896303117275238, "lr": 4.881215854602342e-07, "epoch": 2.2542087542087543, "percentage": 75.14, "elapsed_time": "1:18:50", "remaining_time": "0:26:04"}
22
+ {"current_steps": 2680, "total_steps": 3564, "loss": 0.8590680956840515, "lr": 4.868975515740471e-07, "epoch": 2.255892255892256, "percentage": 75.2, "elapsed_time": "1:18:53", "remaining_time": "0:26:01"}
23
+ {"current_steps": 2682, "total_steps": 3564, "loss": 0.18953704833984375, "lr": 4.856756300837625e-07, "epoch": 2.257575757575758, "percentage": 75.25, "elapsed_time": "1:18:57", "remaining_time": "0:25:57"}
24
+ {"current_steps": 2684, "total_steps": 3564, "loss": 0.12749773263931274, "lr": 4.844558251994146e-07, "epoch": 2.259259259259259, "percentage": 75.31, "elapsed_time": "1:19:01", "remaining_time": "0:25:54"}
25
+ {"current_steps": 2686, "total_steps": 3564, "loss": 0.6111665964126587, "lr": 4.832381411237444e-07, "epoch": 2.260942760942761, "percentage": 75.36, "elapsed_time": "1:19:05", "remaining_time": "0:25:51"}
26
+ {"current_steps": 2688, "total_steps": 3564, "loss": 0.36922651529312134, "lr": 4.820225820521855e-07, "epoch": 2.2626262626262625, "percentage": 75.42, "elapsed_time": "1:19:08", "remaining_time": "0:25:47"}
27
+ {"current_steps": 2690, "total_steps": 3564, "loss": 0.9025669097900391, "lr": 4.808091521728506e-07, "epoch": 2.2643097643097643, "percentage": 75.48, "elapsed_time": "1:19:12", "remaining_time": "0:25:44"}
28
+ {"current_steps": 2692, "total_steps": 3564, "loss": 0.8429475426673889, "lr": 4.795978556665165e-07, "epoch": 2.265993265993266, "percentage": 75.53, "elapsed_time": "1:19:16", "remaining_time": "0:25:40"}
29
+ {"current_steps": 2694, "total_steps": 3564, "loss": 0.6566574573516846, "lr": 4.783886967066088e-07, "epoch": 2.2676767676767677, "percentage": 75.59, "elapsed_time": "1:19:20", "remaining_time": "0:25:37"}
30
+ {"current_steps": 2696, "total_steps": 3564, "loss": 0.5327779054641724, "lr": 4.77181679459189e-07, "epoch": 2.2693602693602695, "percentage": 75.65, "elapsed_time": "1:19:24", "remaining_time": "0:25:33"}
31
+ {"current_steps": 2698, "total_steps": 3564, "loss": 0.624381959438324, "lr": 4.759768080829399e-07, "epoch": 2.271043771043771, "percentage": 75.7, "elapsed_time": "1:19:28", "remaining_time": "0:25:30"}
32
+ {"current_steps": 2700, "total_steps": 3564, "loss": 0.7681624889373779, "lr": 4.747740867291497e-07, "epoch": 2.2727272727272725, "percentage": 75.76, "elapsed_time": "1:19:32", "remaining_time": "0:25:27"}
33
+ {"current_steps": 2702, "total_steps": 3564, "loss": 0.49092429876327515, "lr": 4.7357351954169973e-07, "epoch": 2.274410774410774, "percentage": 75.81, "elapsed_time": "1:19:35", "remaining_time": "0:25:23"}
34
+ {"current_steps": 2704, "total_steps": 3564, "loss": 0.8667645454406738, "lr": 4.7237511065704933e-07, "epoch": 2.276094276094276, "percentage": 75.87, "elapsed_time": "1:19:39", "remaining_time": "0:25:20"}
35
+ {"current_steps": 2706, "total_steps": 3564, "loss": 0.9094717502593994, "lr": 4.7117886420422094e-07, "epoch": 2.2777777777777777, "percentage": 75.93, "elapsed_time": "1:19:43", "remaining_time": "0:25:16"}
36
+ {"current_steps": 2708, "total_steps": 3564, "loss": 0.351574569940567, "lr": 4.6998478430478714e-07, "epoch": 2.2794612794612794, "percentage": 75.98, "elapsed_time": "1:19:46", "remaining_time": "0:25:12"}
37
+ {"current_steps": 2710, "total_steps": 3564, "loss": 0.5877597332000732, "lr": 4.6879287507285596e-07, "epoch": 2.281144781144781, "percentage": 76.04, "elapsed_time": "1:19:49", "remaining_time": "0:25:09"}
38
+ {"current_steps": 2712, "total_steps": 3564, "loss": 0.5526677370071411, "lr": 4.676031406150555e-07, "epoch": 2.282828282828283, "percentage": 76.09, "elapsed_time": "1:19:53", "remaining_time": "0:25:05"}
39
+ {"current_steps": 2714, "total_steps": 3564, "loss": 0.4332752227783203, "lr": 4.66415585030522e-07, "epoch": 2.2845117845117846, "percentage": 76.15, "elapsed_time": "1:19:57", "remaining_time": "0:25:02"}
40
+ {"current_steps": 2716, "total_steps": 3564, "loss": 0.7148293256759644, "lr": 4.6523021241088416e-07, "epoch": 2.2861952861952863, "percentage": 76.21, "elapsed_time": "1:20:00", "remaining_time": "0:24:58"}
41
+ {"current_steps": 2718, "total_steps": 3564, "loss": 0.5515605807304382, "lr": 4.6404702684024905e-07, "epoch": 2.287878787878788, "percentage": 76.26, "elapsed_time": "1:20:04", "remaining_time": "0:24:55"}
42
+ {"current_steps": 2720, "total_steps": 3564, "loss": 0.5390480160713196, "lr": 4.628660323951891e-07, "epoch": 2.28956228956229, "percentage": 76.32, "elapsed_time": "1:20:07", "remaining_time": "0:24:51"}
43
+ {"current_steps": 2722, "total_steps": 3564, "loss": 0.63498854637146, "lr": 4.616872331447272e-07, "epoch": 2.291245791245791, "percentage": 76.37, "elapsed_time": "1:20:11", "remaining_time": "0:24:48"}
44
+ {"current_steps": 2724, "total_steps": 3564, "loss": 0.6880998611450195, "lr": 4.605106331503223e-07, "epoch": 2.292929292929293, "percentage": 76.43, "elapsed_time": "1:20:15", "remaining_time": "0:24:44"}
45
+ {"current_steps": 2726, "total_steps": 3564, "loss": 0.6316101551055908, "lr": 4.5933623646585683e-07, "epoch": 2.2946127946127945, "percentage": 76.49, "elapsed_time": "1:20:19", "remaining_time": "0:24:41"}
46
+ {"current_steps": 2728, "total_steps": 3564, "loss": 0.5416774749755859, "lr": 4.581640471376215e-07, "epoch": 2.2962962962962963, "percentage": 76.54, "elapsed_time": "1:20:23", "remaining_time": "0:24:38"}
47
+ {"current_steps": 2730, "total_steps": 3564, "loss": 0.972043514251709, "lr": 4.5699406920430155e-07, "epoch": 2.297979797979798, "percentage": 76.6, "elapsed_time": "1:20:26", "remaining_time": "0:24:34"}
48
+ {"current_steps": 2732, "total_steps": 3564, "loss": 0.5268035531044006, "lr": 4.5582630669696324e-07, "epoch": 2.2996632996632997, "percentage": 76.66, "elapsed_time": "1:20:30", "remaining_time": "0:24:31"}
49
+ {"current_steps": 2734, "total_steps": 3564, "loss": 0.4689450263977051, "lr": 4.5466076363904e-07, "epoch": 2.3013468013468015, "percentage": 76.71, "elapsed_time": "1:20:34", "remaining_time": "0:24:27"}
50
+ {"current_steps": 2736, "total_steps": 3564, "loss": 0.43555888533592224, "lr": 4.5349744404631785e-07, "epoch": 2.303030303030303, "percentage": 76.77, "elapsed_time": "1:20:38", "remaining_time": "0:24:24"}
51
+ {"current_steps": 2738, "total_steps": 3564, "loss": 0.5540938377380371, "lr": 4.5233635192692206e-07, "epoch": 2.3047138047138045, "percentage": 76.82, "elapsed_time": "1:20:42", "remaining_time": "0:24:20"}
52
+ {"current_steps": 2740, "total_steps": 3564, "loss": 0.4014560580253601, "lr": 4.511774912813043e-07, "epoch": 2.3063973063973062, "percentage": 76.88, "elapsed_time": "1:20:45", "remaining_time": "0:24:17"}
53
+ {"current_steps": 2742, "total_steps": 3564, "loss": 0.7727656364440918, "lr": 4.5002086610222626e-07, "epoch": 2.308080808080808, "percentage": 76.94, "elapsed_time": "1:20:49", "remaining_time": "0:24:13"}
54
+ {"current_steps": 2744, "total_steps": 3564, "loss": 0.7189053297042847, "lr": 4.488664803747487e-07, "epoch": 2.3097643097643097, "percentage": 76.99, "elapsed_time": "1:20:53", "remaining_time": "0:24:10"}
55
+ {"current_steps": 2746, "total_steps": 3564, "loss": 0.7668474912643433, "lr": 4.4771433807621644e-07, "epoch": 2.3114478114478114, "percentage": 77.05, "elapsed_time": "1:20:57", "remaining_time": "0:24:06"}
56
+ {"current_steps": 2748, "total_steps": 3564, "loss": 0.6078014373779297, "lr": 4.4656444317624397e-07, "epoch": 2.313131313131313, "percentage": 77.1, "elapsed_time": "1:21:01", "remaining_time": "0:24:03"}
57
+ {"current_steps": 2750, "total_steps": 3564, "loss": 0.10793264210224152, "lr": 4.454167996367032e-07, "epoch": 2.314814814814815, "percentage": 77.16, "elapsed_time": "1:21:04", "remaining_time": "0:23:59"}
58
+ {"current_steps": 2752, "total_steps": 3564, "loss": 0.33263859152793884, "lr": 4.442714114117092e-07, "epoch": 2.3164983164983166, "percentage": 77.22, "elapsed_time": "1:21:08", "remaining_time": "0:23:56"}
59
+ {"current_steps": 2754, "total_steps": 3564, "loss": 0.39961159229278564, "lr": 4.4312828244760613e-07, "epoch": 2.3181818181818183, "percentage": 77.27, "elapsed_time": "1:21:11", "remaining_time": "0:23:52"}
60
+ {"current_steps": 2756, "total_steps": 3564, "loss": 0.8770014047622681, "lr": 4.4198741668295425e-07, "epoch": 2.31986531986532, "percentage": 77.33, "elapsed_time": "1:21:15", "remaining_time": "0:23:49"}
61
+ {"current_steps": 2758, "total_steps": 3564, "loss": 0.5539072751998901, "lr": 4.4084881804851644e-07, "epoch": 2.3215488215488214, "percentage": 77.38, "elapsed_time": "1:21:19", "remaining_time": "0:23:45"}
62
+ {"current_steps": 2760, "total_steps": 3564, "loss": 0.6975724697113037, "lr": 4.397124904672437e-07, "epoch": 2.323232323232323, "percentage": 77.44, "elapsed_time": "1:21:22", "remaining_time": "0:23:42"}
63
+ {"current_steps": 2762, "total_steps": 3564, "loss": 0.5050334334373474, "lr": 4.3857843785426263e-07, "epoch": 2.324915824915825, "percentage": 77.5, "elapsed_time": "1:21:26", "remaining_time": "0:23:38"}
64
+ {"current_steps": 2764, "total_steps": 3564, "loss": 0.8777497410774231, "lr": 4.374466641168622e-07, "epoch": 2.3265993265993266, "percentage": 77.55, "elapsed_time": "1:21:29", "remaining_time": "0:23:35"}
65
+ {"current_steps": 2766, "total_steps": 3564, "loss": 0.7257252931594849, "lr": 4.363171731544786e-07, "epoch": 2.3282828282828283, "percentage": 77.61, "elapsed_time": "1:21:32", "remaining_time": "0:23:31"}
66
+ {"current_steps": 2768, "total_steps": 3564, "loss": 0.5315639972686768, "lr": 4.351899688586834e-07, "epoch": 2.32996632996633, "percentage": 77.67, "elapsed_time": "1:21:36", "remaining_time": "0:23:28"}
67
+ {"current_steps": 2770, "total_steps": 3564, "loss": 0.6226543188095093, "lr": 4.3406505511317025e-07, "epoch": 2.3316498316498318, "percentage": 77.72, "elapsed_time": "1:21:40", "remaining_time": "0:23:24"}
68
+ {"current_steps": 2772, "total_steps": 3564, "loss": 0.5986767411231995, "lr": 4.329424357937397e-07, "epoch": 2.3333333333333335, "percentage": 77.78, "elapsed_time": "1:21:44", "remaining_time": "0:23:21"}
69
+ {"current_steps": 2774, "total_steps": 3564, "loss": 0.693830132484436, "lr": 4.318221147682879e-07, "epoch": 2.3350168350168348, "percentage": 77.83, "elapsed_time": "1:21:48", "remaining_time": "0:23:17"}
70
+ {"current_steps": 2776, "total_steps": 3564, "loss": 0.6411426663398743, "lr": 4.307040958967924e-07, "epoch": 2.3367003367003365, "percentage": 77.89, "elapsed_time": "1:21:51", "remaining_time": "0:23:14"}
71
+ {"current_steps": 2778, "total_steps": 3564, "loss": 0.45083481073379517, "lr": 4.2958838303129817e-07, "epoch": 2.3383838383838382, "percentage": 77.95, "elapsed_time": "1:21:55", "remaining_time": "0:23:10"}
72
+ {"current_steps": 2780, "total_steps": 3564, "loss": 0.6881177425384521, "lr": 4.2847498001590573e-07, "epoch": 2.34006734006734, "percentage": 78.0, "elapsed_time": "1:21:58", "remaining_time": "0:23:07"}
73
+ {"current_steps": 2782, "total_steps": 3564, "loss": 0.5657017230987549, "lr": 4.273638906867573e-07, "epoch": 2.3417508417508417, "percentage": 78.06, "elapsed_time": "1:22:02", "remaining_time": "0:23:03"}
74
+ {"current_steps": 2784, "total_steps": 3564, "loss": 0.7839221954345703, "lr": 4.2625511887202225e-07, "epoch": 2.3434343434343434, "percentage": 78.11, "elapsed_time": "1:22:06", "remaining_time": "0:23:00"}
75
+ {"current_steps": 2786, "total_steps": 3564, "loss": 0.5463940501213074, "lr": 4.2514866839188657e-07, "epoch": 2.345117845117845, "percentage": 78.17, "elapsed_time": "1:22:09", "remaining_time": "0:22:56"}
76
+ {"current_steps": 2788, "total_steps": 3564, "loss": 0.8763151168823242, "lr": 4.2404454305853796e-07, "epoch": 2.346801346801347, "percentage": 78.23, "elapsed_time": "1:22:13", "remaining_time": "0:22:53"}
77
+ {"current_steps": 2790, "total_steps": 3564, "loss": 0.7232416868209839, "lr": 4.229427466761522e-07, "epoch": 2.3484848484848486, "percentage": 78.28, "elapsed_time": "1:22:17", "remaining_time": "0:22:49"}
78
+ {"current_steps": 2792, "total_steps": 3564, "loss": 0.5656273365020752, "lr": 4.2184328304088164e-07, "epoch": 2.3501683501683504, "percentage": 78.34, "elapsed_time": "1:22:20", "remaining_time": "0:22:46"}
79
+ {"current_steps": 2794, "total_steps": 3564, "loss": 0.6187400817871094, "lr": 4.2074615594084146e-07, "epoch": 2.351851851851852, "percentage": 78.4, "elapsed_time": "1:22:24", "remaining_time": "0:22:42"}
80
+ {"current_steps": 2796, "total_steps": 3564, "loss": 0.9885926246643066, "lr": 4.1965136915609543e-07, "epoch": 2.3535353535353534, "percentage": 78.45, "elapsed_time": "1:22:28", "remaining_time": "0:22:39"}
81
+ {"current_steps": 2798, "total_steps": 3564, "loss": 0.45941799879074097, "lr": 4.1855892645864513e-07, "epoch": 2.355218855218855, "percentage": 78.51, "elapsed_time": "1:22:31", "remaining_time": "0:22:35"}
82
+ {"current_steps": 2800, "total_steps": 3564, "loss": 0.9851700067520142, "lr": 4.1746883161241555e-07, "epoch": 2.356902356902357, "percentage": 78.56, "elapsed_time": "1:22:35", "remaining_time": "0:22:32"}
83
+ {"current_steps": 2802, "total_steps": 3564, "loss": 0.9169178009033203, "lr": 4.1638108837324137e-07, "epoch": 2.3585858585858586, "percentage": 78.62, "elapsed_time": "1:22:39", "remaining_time": "0:22:28"}
84
+ {"current_steps": 2804, "total_steps": 3564, "loss": 0.7946122884750366, "lr": 4.152957004888563e-07, "epoch": 2.3602693602693603, "percentage": 78.68, "elapsed_time": "1:22:43", "remaining_time": "0:22:25"}
85
+ {"current_steps": 2806, "total_steps": 3564, "loss": 0.7735965847969055, "lr": 4.142126716988784e-07, "epoch": 2.361952861952862, "percentage": 78.73, "elapsed_time": "1:22:47", "remaining_time": "0:22:21"}
86
+ {"current_steps": 2808, "total_steps": 3564, "loss": 0.802727460861206, "lr": 4.131320057347969e-07, "epoch": 2.3636363636363638, "percentage": 78.79, "elapsed_time": "1:22:50", "remaining_time": "0:22:18"}
87
+ {"current_steps": 2810, "total_steps": 3564, "loss": 1.0042896270751953, "lr": 4.120537063199612e-07, "epoch": 2.3653198653198655, "percentage": 78.84, "elapsed_time": "1:22:54", "remaining_time": "0:22:14"}
88
+ {"current_steps": 2812, "total_steps": 3564, "loss": 0.7024844288825989, "lr": 4.109777771695663e-07, "epoch": 2.3670033670033668, "percentage": 78.9, "elapsed_time": "1:22:57", "remaining_time": "0:22:11"}
89
+ {"current_steps": 2814, "total_steps": 3564, "loss": 0.6036837100982666, "lr": 4.0990422199064103e-07, "epoch": 2.3686868686868685, "percentage": 78.96, "elapsed_time": "1:23:01", "remaining_time": "0:22:07"}
90
+ {"current_steps": 2816, "total_steps": 3564, "loss": 0.484286904335022, "lr": 4.0883304448203477e-07, "epoch": 2.3703703703703702, "percentage": 79.01, "elapsed_time": "1:23:04", "remaining_time": "0:22:04"}
91
+ {"current_steps": 2818, "total_steps": 3564, "loss": 0.5557587146759033, "lr": 4.077642483344044e-07, "epoch": 2.372053872053872, "percentage": 79.07, "elapsed_time": "1:23:08", "remaining_time": "0:22:00"}
92
+ {"current_steps": 2820, "total_steps": 3564, "loss": 0.6941782236099243, "lr": 4.066978372302025e-07, "epoch": 2.3737373737373737, "percentage": 79.12, "elapsed_time": "1:23:11", "remaining_time": "0:21:56"}
93
+ {"current_steps": 2822, "total_steps": 3564, "loss": 0.4251060485839844, "lr": 4.056338148436643e-07, "epoch": 2.3754208754208754, "percentage": 79.18, "elapsed_time": "1:23:14", "remaining_time": "0:21:53"}
94
+ {"current_steps": 2824, "total_steps": 3564, "loss": 0.9760651588439941, "lr": 4.0457218484079414e-07, "epoch": 2.377104377104377, "percentage": 79.24, "elapsed_time": "1:23:18", "remaining_time": "0:21:49"}
95
+ {"current_steps": 2826, "total_steps": 3564, "loss": 0.8394796848297119, "lr": 4.035129508793542e-07, "epoch": 2.378787878787879, "percentage": 79.29, "elapsed_time": "1:23:21", "remaining_time": "0:21:46"}
96
+ {"current_steps": 2828, "total_steps": 3564, "loss": 0.4385402798652649, "lr": 4.024561166088516e-07, "epoch": 2.3804713804713806, "percentage": 79.35, "elapsed_time": "1:23:24", "remaining_time": "0:21:42"}
97
+ {"current_steps": 2830, "total_steps": 3564, "loss": 0.932929277420044, "lr": 4.0140168567052447e-07, "epoch": 2.3821548821548824, "percentage": 79.41, "elapsed_time": "1:23:28", "remaining_time": "0:21:38"}
98
+ {"current_steps": 2832, "total_steps": 3564, "loss": 0.6770232915878296, "lr": 4.003496616973312e-07, "epoch": 2.3838383838383836, "percentage": 79.46, "elapsed_time": "1:23:31", "remaining_time": "0:21:35"}
99
+ {"current_steps": 2834, "total_steps": 3564, "loss": 0.5193581581115723, "lr": 3.9930004831393757e-07, "epoch": 2.3855218855218854, "percentage": 79.52, "elapsed_time": "1:23:35", "remaining_time": "0:21:31"}
100
+ {"current_steps": 2836, "total_steps": 3564, "loss": 0.5733506679534912, "lr": 3.982528491367025e-07, "epoch": 2.387205387205387, "percentage": 79.57, "elapsed_time": "1:23:38", "remaining_time": "0:21:28"}
101
+ {"current_steps": 2838, "total_steps": 3564, "loss": 0.47218313813209534, "lr": 3.9720806777366817e-07, "epoch": 2.388888888888889, "percentage": 79.63, "elapsed_time": "1:23:42", "remaining_time": "0:21:24"}
102
+ {"current_steps": 2840, "total_steps": 3564, "loss": 0.8041648864746094, "lr": 3.961657078245462e-07, "epoch": 2.3905723905723906, "percentage": 79.69, "elapsed_time": "1:23:45", "remaining_time": "0:21:21"}
103
+ {"current_steps": 2842, "total_steps": 3564, "loss": 0.3452025055885315, "lr": 3.9512577288070487e-07, "epoch": 2.3922558922558923, "percentage": 79.74, "elapsed_time": "1:23:48", "remaining_time": "0:21:17"}
104
+ {"current_steps": 2844, "total_steps": 3564, "loss": 0.9638313055038452, "lr": 3.940882665251576e-07, "epoch": 2.393939393939394, "percentage": 79.8, "elapsed_time": "1:23:52", "remaining_time": "0:21:14"}
105
+ {"current_steps": 2846, "total_steps": 3564, "loss": 0.7442007064819336, "lr": 3.930531923325506e-07, "epoch": 2.3956228956228958, "percentage": 79.85, "elapsed_time": "1:23:55", "remaining_time": "0:21:10"}
106
+ {"current_steps": 2848, "total_steps": 3564, "loss": 0.953087329864502, "lr": 3.920205538691497e-07, "epoch": 2.3973063973063975, "percentage": 79.91, "elapsed_time": "1:23:58", "remaining_time": "0:21:06"}
107
+ {"current_steps": 2850, "total_steps": 3564, "loss": 0.7336077094078064, "lr": 3.9099035469282906e-07, "epoch": 2.398989898989899, "percentage": 79.97, "elapsed_time": "1:24:02", "remaining_time": "0:21:03"}
108
+ {"current_steps": 2852, "total_steps": 3564, "loss": 0.390910804271698, "lr": 3.8996259835305835e-07, "epoch": 2.4006734006734005, "percentage": 80.02, "elapsed_time": "1:24:06", "remaining_time": "0:20:59"}
109
+ {"current_steps": 2854, "total_steps": 3564, "loss": 0.609326958656311, "lr": 3.8893728839089035e-07, "epoch": 2.4023569023569022, "percentage": 80.08, "elapsed_time": "1:24:10", "remaining_time": "0:20:56"}
110
+ {"current_steps": 2856, "total_steps": 3564, "loss": 0.5054650902748108, "lr": 3.879144283389495e-07, "epoch": 2.404040404040404, "percentage": 80.13, "elapsed_time": "1:24:13", "remaining_time": "0:20:52"}
111
+ {"current_steps": 2858, "total_steps": 3564, "loss": 0.6514500975608826, "lr": 3.8689402172141915e-07, "epoch": 2.4057239057239057, "percentage": 80.19, "elapsed_time": "1:24:17", "remaining_time": "0:20:49"}
112
+ {"current_steps": 2860, "total_steps": 3564, "loss": 0.41622331738471985, "lr": 3.8587607205402916e-07, "epoch": 2.4074074074074074, "percentage": 80.25, "elapsed_time": "1:24:21", "remaining_time": "0:20:45"}
113
+ {"current_steps": 2862, "total_steps": 3564, "loss": 0.7136590480804443, "lr": 3.848605828440444e-07, "epoch": 2.409090909090909, "percentage": 80.3, "elapsed_time": "1:24:25", "remaining_time": "0:20:42"}
114
+ {"current_steps": 2864, "total_steps": 3564, "loss": 0.4541894793510437, "lr": 3.8384755759025313e-07, "epoch": 2.410774410774411, "percentage": 80.36, "elapsed_time": "1:24:28", "remaining_time": "0:20:38"}
115
+ {"current_steps": 2866, "total_steps": 3564, "loss": 0.6994350552558899, "lr": 3.828369997829528e-07, "epoch": 2.4124579124579126, "percentage": 80.42, "elapsed_time": "1:24:31", "remaining_time": "0:20:35"}
116
+ {"current_steps": 2868, "total_steps": 3564, "loss": 0.8106458187103271, "lr": 3.818289129039405e-07, "epoch": 2.4141414141414144, "percentage": 80.47, "elapsed_time": "1:24:35", "remaining_time": "0:20:31"}
117
+ {"current_steps": 2870, "total_steps": 3564, "loss": 0.5665256977081299, "lr": 3.808233004264997e-07, "epoch": 2.4158249158249157, "percentage": 80.53, "elapsed_time": "1:24:38", "remaining_time": "0:20:28"}
118
+ {"current_steps": 2872, "total_steps": 3564, "loss": 0.44936102628707886, "lr": 3.79820165815389e-07, "epoch": 2.4175084175084174, "percentage": 80.58, "elapsed_time": "1:24:41", "remaining_time": "0:20:24"}
119
+ {"current_steps": 2874, "total_steps": 3564, "loss": 0.8391485214233398, "lr": 3.788195125268284e-07, "epoch": 2.419191919191919, "percentage": 80.64, "elapsed_time": "1:24:45", "remaining_time": "0:20:20"}
120
+ {"current_steps": 2876, "total_steps": 3564, "loss": 0.7489950656890869, "lr": 3.7782134400848995e-07, "epoch": 2.420875420875421, "percentage": 80.7, "elapsed_time": "1:24:48", "remaining_time": "0:20:17"}
121
+ {"current_steps": 2878, "total_steps": 3564, "loss": 0.4590849280357361, "lr": 3.768256636994843e-07, "epoch": 2.4225589225589226, "percentage": 80.75, "elapsed_time": "1:24:52", "remaining_time": "0:20:13"}
122
+ {"current_steps": 2880, "total_steps": 3564, "loss": 0.7745201587677002, "lr": 3.7583247503034864e-07, "epoch": 2.4242424242424243, "percentage": 80.81, "elapsed_time": "1:24:56", "remaining_time": "0:20:10"}
123
+ {"current_steps": 2882, "total_steps": 3564, "loss": 0.5334046483039856, "lr": 3.7484178142303625e-07, "epoch": 2.425925925925926, "percentage": 80.86, "elapsed_time": "1:25:00", "remaining_time": "0:20:06"}
124
+ {"current_steps": 2884, "total_steps": 3564, "loss": 0.5028021335601807, "lr": 3.738535862909031e-07, "epoch": 2.4276094276094278, "percentage": 80.92, "elapsed_time": "1:25:03", "remaining_time": "0:20:03"}
125
+ {"current_steps": 2886, "total_steps": 3564, "loss": 0.5118685960769653, "lr": 3.7286789303869735e-07, "epoch": 2.429292929292929, "percentage": 80.98, "elapsed_time": "1:25:06", "remaining_time": "0:19:59"}
126
+ {"current_steps": 2888, "total_steps": 3564, "loss": 0.5720535516738892, "lr": 3.7188470506254744e-07, "epoch": 2.430976430976431, "percentage": 81.03, "elapsed_time": "1:25:10", "remaining_time": "0:19:56"}
127
+ {"current_steps": 2890, "total_steps": 3564, "loss": 0.5391176342964172, "lr": 3.7090402574994885e-07, "epoch": 2.4326599326599325, "percentage": 81.09, "elapsed_time": "1:25:14", "remaining_time": "0:19:52"}
128
+ {"current_steps": 2892, "total_steps": 3564, "loss": 0.6294881105422974, "lr": 3.699258584797548e-07, "epoch": 2.4343434343434343, "percentage": 81.14, "elapsed_time": "1:25:18", "remaining_time": "0:19:49"}
129
+ {"current_steps": 2894, "total_steps": 3564, "loss": 0.9022385478019714, "lr": 3.6895020662216326e-07, "epoch": 2.436026936026936, "percentage": 81.2, "elapsed_time": "1:25:22", "remaining_time": "0:19:45"}
130
+ {"current_steps": 2896, "total_steps": 3564, "loss": 0.720146656036377, "lr": 3.679770735387052e-07, "epoch": 2.4377104377104377, "percentage": 81.26, "elapsed_time": "1:25:26", "remaining_time": "0:19:42"}
131
+ {"current_steps": 2898, "total_steps": 3564, "loss": 0.6195645332336426, "lr": 3.6700646258223343e-07, "epoch": 2.4393939393939394, "percentage": 81.31, "elapsed_time": "1:25:29", "remaining_time": "0:19:38"}
132
+ {"current_steps": 2900, "total_steps": 3564, "loss": 0.43182432651519775, "lr": 3.6603837709691153e-07, "epoch": 2.441077441077441, "percentage": 81.37, "elapsed_time": "1:25:33", "remaining_time": "0:19:35"}
133
+ {"current_steps": 2902, "total_steps": 3564, "loss": 0.7789742350578308, "lr": 3.6507282041820085e-07, "epoch": 2.442760942760943, "percentage": 81.43, "elapsed_time": "1:25:36", "remaining_time": "0:19:31"}
134
+ {"current_steps": 2904, "total_steps": 3564, "loss": 0.48242291808128357, "lr": 3.641097958728506e-07, "epoch": 2.4444444444444446, "percentage": 81.48, "elapsed_time": "1:25:40", "remaining_time": "0:19:28"}
135
+ {"current_steps": 2906, "total_steps": 3564, "loss": 0.3829724192619324, "lr": 3.631493067788858e-07, "epoch": 2.4461279461279464, "percentage": 81.54, "elapsed_time": "1:25:43", "remaining_time": "0:19:24"}
136
+ {"current_steps": 2908, "total_steps": 3564, "loss": 0.5261117815971375, "lr": 3.6219135644559506e-07, "epoch": 2.4478114478114477, "percentage": 81.59, "elapsed_time": "1:25:47", "remaining_time": "0:19:21"}
137
+ {"current_steps": 2910, "total_steps": 3564, "loss": 0.6702965497970581, "lr": 3.6123594817352046e-07, "epoch": 2.4494949494949494, "percentage": 81.65, "elapsed_time": "1:25:51", "remaining_time": "0:19:17"}
138
+ {"current_steps": 2912, "total_steps": 3564, "loss": 0.4730827212333679, "lr": 3.602830852544458e-07, "epoch": 2.451178451178451, "percentage": 81.71, "elapsed_time": "1:25:54", "remaining_time": "0:19:14"}
139
+ {"current_steps": 2914, "total_steps": 3564, "loss": 0.7823283076286316, "lr": 3.593327709713844e-07, "epoch": 2.452861952861953, "percentage": 81.76, "elapsed_time": "1:25:58", "remaining_time": "0:19:10"}
140
+ {"current_steps": 2916, "total_steps": 3564, "loss": 0.6686667203903198, "lr": 3.5838500859856893e-07, "epoch": 2.4545454545454546, "percentage": 81.82, "elapsed_time": "1:26:01", "remaining_time": "0:19:07"}
141
+ {"current_steps": 2918, "total_steps": 3564, "loss": 0.3113139867782593, "lr": 3.5743980140143975e-07, "epoch": 2.4562289562289563, "percentage": 81.87, "elapsed_time": "1:26:05", "remaining_time": "0:19:03"}
142
+ {"current_steps": 2920, "total_steps": 3564, "loss": 0.7965060472488403, "lr": 3.5649715263663297e-07, "epoch": 2.457912457912458, "percentage": 81.93, "elapsed_time": "1:26:08", "remaining_time": "0:18:59"}
143
+ {"current_steps": 2922, "total_steps": 3564, "loss": 0.43743637204170227, "lr": 3.5555706555197043e-07, "epoch": 2.45959595959596, "percentage": 81.99, "elapsed_time": "1:26:12", "remaining_time": "0:18:56"}
144
+ {"current_steps": 2924, "total_steps": 3564, "loss": 0.30664563179016113, "lr": 3.5461954338644795e-07, "epoch": 2.461279461279461, "percentage": 82.04, "elapsed_time": "1:26:15", "remaining_time": "0:18:52"}
145
+ {"current_steps": 2926, "total_steps": 3564, "loss": 0.5530849695205688, "lr": 3.536845893702234e-07, "epoch": 2.462962962962963, "percentage": 82.1, "elapsed_time": "1:26:19", "remaining_time": "0:18:49"}
146
+ {"current_steps": 2928, "total_steps": 3564, "loss": 0.5903668403625488, "lr": 3.527522067246068e-07, "epoch": 2.4646464646464645, "percentage": 82.15, "elapsed_time": "1:26:22", "remaining_time": "0:18:45"}
147
+ {"current_steps": 2930, "total_steps": 3564, "loss": 0.24971121549606323, "lr": 3.518223986620491e-07, "epoch": 2.4663299663299663, "percentage": 82.21, "elapsed_time": "1:26:25", "remaining_time": "0:18:42"}
148
+ {"current_steps": 2932, "total_steps": 3564, "loss": 0.654639482498169, "lr": 3.5089516838612986e-07, "epoch": 2.468013468013468, "percentage": 82.27, "elapsed_time": "1:26:29", "remaining_time": "0:18:38"}
149
+ {"current_steps": 2934, "total_steps": 3564, "loss": 0.6544331312179565, "lr": 3.499705190915476e-07, "epoch": 2.4696969696969697, "percentage": 82.32, "elapsed_time": "1:26:32", "remaining_time": "0:18:35"}
150
+ {"current_steps": 2936, "total_steps": 3564, "loss": 0.4527553915977478, "lr": 3.4904845396410854e-07, "epoch": 2.4713804713804715, "percentage": 82.38, "elapsed_time": "1:26:36", "remaining_time": "0:18:31"}
151
+ {"current_steps": 2938, "total_steps": 3564, "loss": 0.5435815453529358, "lr": 3.4812897618071445e-07, "epoch": 2.473063973063973, "percentage": 82.44, "elapsed_time": "1:26:39", "remaining_time": "0:18:27"}
152
+ {"current_steps": 2940, "total_steps": 3564, "loss": 0.4773102402687073, "lr": 3.472120889093536e-07, "epoch": 2.474747474747475, "percentage": 82.49, "elapsed_time": "1:26:43", "remaining_time": "0:18:24"}
153
+ {"current_steps": 2942, "total_steps": 3564, "loss": 0.40418028831481934, "lr": 3.462977953090884e-07, "epoch": 2.4764309764309766, "percentage": 82.55, "elapsed_time": "1:26:46", "remaining_time": "0:18:20"}
154
+ {"current_steps": 2944, "total_steps": 3564, "loss": 0.43912988901138306, "lr": 3.453860985300446e-07, "epoch": 2.478114478114478, "percentage": 82.6, "elapsed_time": "1:26:50", "remaining_time": "0:18:17"}
155
+ {"current_steps": 2946, "total_steps": 3564, "loss": 0.9208707213401794, "lr": 3.4447700171340164e-07, "epoch": 2.4797979797979797, "percentage": 82.66, "elapsed_time": "1:26:54", "remaining_time": "0:18:13"}
156
+ {"current_steps": 2948, "total_steps": 3564, "loss": 0.9445154666900635, "lr": 3.4357050799138053e-07, "epoch": 2.4814814814814814, "percentage": 82.72, "elapsed_time": "1:26:57", "remaining_time": "0:18:10"}
157
+ {"current_steps": 2950, "total_steps": 3564, "loss": 0.9850308895111084, "lr": 3.4266662048723337e-07, "epoch": 2.483164983164983, "percentage": 82.77, "elapsed_time": "1:27:01", "remaining_time": "0:18:06"}
158
+ {"current_steps": 2952, "total_steps": 3564, "loss": 0.8890873193740845, "lr": 3.417653423152329e-07, "epoch": 2.484848484848485, "percentage": 82.83, "elapsed_time": "1:27:05", "remaining_time": "0:18:03"}
159
+ {"current_steps": 2954, "total_steps": 3564, "loss": 0.5936705470085144, "lr": 3.4086667658066186e-07, "epoch": 2.4865319865319866, "percentage": 82.88, "elapsed_time": "1:27:08", "remaining_time": "0:17:59"}
160
+ {"current_steps": 2956, "total_steps": 3564, "loss": 0.8404591083526611, "lr": 3.3997062637980167e-07, "epoch": 2.4882154882154883, "percentage": 82.94, "elapsed_time": "1:27:12", "remaining_time": "0:17:56"}
161
+ {"current_steps": 2958, "total_steps": 3564, "loss": 0.5225011110305786, "lr": 3.390771947999224e-07, "epoch": 2.48989898989899, "percentage": 83.0, "elapsed_time": "1:27:15", "remaining_time": "0:17:52"}
162
+ {"current_steps": 2960, "total_steps": 3564, "loss": 0.8342874050140381, "lr": 3.381863849192718e-07, "epoch": 2.4915824915824913, "percentage": 83.05, "elapsed_time": "1:27:18", "remaining_time": "0:17:49"}
163
+ {"current_steps": 2962, "total_steps": 3564, "loss": 0.5838370323181152, "lr": 3.3729819980706444e-07, "epoch": 2.493265993265993, "percentage": 83.11, "elapsed_time": "1:27:22", "remaining_time": "0:17:45"}
164
+ {"current_steps": 2964, "total_steps": 3564, "loss": 0.7112206220626831, "lr": 3.364126425234719e-07, "epoch": 2.494949494949495, "percentage": 83.16, "elapsed_time": "1:27:25", "remaining_time": "0:17:41"}
165
+ {"current_steps": 2966, "total_steps": 3564, "loss": 0.5937138199806213, "lr": 3.3552971611961187e-07, "epoch": 2.4966329966329965, "percentage": 83.22, "elapsed_time": "1:27:28", "remaining_time": "0:17:38"}
166
+ {"current_steps": 2968, "total_steps": 3564, "loss": 0.81259685754776, "lr": 3.34649423637537e-07, "epoch": 2.4983164983164983, "percentage": 83.28, "elapsed_time": "1:27:32", "remaining_time": "0:17:34"}
167
+ {"current_steps": 2970, "total_steps": 3564, "loss": 0.8419524431228638, "lr": 3.337717681102253e-07, "epoch": 2.5, "percentage": 83.33, "elapsed_time": "1:27:35", "remaining_time": "0:17:31"}
168
+ {"current_steps": 2972, "total_steps": 3564, "loss": 0.36146029829978943, "lr": 3.328967525615697e-07, "epoch": 2.5016835016835017, "percentage": 83.39, "elapsed_time": "1:27:39", "remaining_time": "0:17:27"}
169
+ {"current_steps": 2974, "total_steps": 3564, "loss": 0.5271892547607422, "lr": 3.3202438000636634e-07, "epoch": 2.5033670033670035, "percentage": 83.45, "elapsed_time": "1:27:43", "remaining_time": "0:17:24"}
170
+ {"current_steps": 2976, "total_steps": 3564, "loss": 0.6813575029373169, "lr": 3.311546534503061e-07, "epoch": 2.505050505050505, "percentage": 83.5, "elapsed_time": "1:27:47", "remaining_time": "0:17:20"}
171
+ {"current_steps": 2978, "total_steps": 3564, "loss": 0.3660055994987488, "lr": 3.3028757588996303e-07, "epoch": 2.506734006734007, "percentage": 83.56, "elapsed_time": "1:27:50", "remaining_time": "0:17:17"}
172
+ {"current_steps": 2980, "total_steps": 3564, "loss": 0.7575110197067261, "lr": 3.294231503127839e-07, "epoch": 2.5084175084175087, "percentage": 83.61, "elapsed_time": "1:27:54", "remaining_time": "0:17:13"}
173
+ {"current_steps": 2982, "total_steps": 3564, "loss": 0.788750171661377, "lr": 3.2856137969707847e-07, "epoch": 2.51010101010101, "percentage": 83.67, "elapsed_time": "1:27:58", "remaining_time": "0:17:10"}
174
+ {"current_steps": 2984, "total_steps": 3564, "loss": 0.4518158435821533, "lr": 3.277022670120095e-07, "epoch": 2.5117845117845117, "percentage": 83.73, "elapsed_time": "1:28:01", "remaining_time": "0:17:06"}
175
+ {"current_steps": 2986, "total_steps": 3564, "loss": 0.7932558059692383, "lr": 3.268458152175813e-07, "epoch": 2.5134680134680134, "percentage": 83.78, "elapsed_time": "1:28:05", "remaining_time": "0:17:03"}
176
+ {"current_steps": 2988, "total_steps": 3564, "loss": 0.61873459815979, "lr": 3.2599202726463084e-07, "epoch": 2.515151515151515, "percentage": 83.84, "elapsed_time": "1:28:09", "remaining_time": "0:16:59"}
177
+ {"current_steps": 2990, "total_steps": 3564, "loss": 0.10597741603851318, "lr": 3.2514090609481683e-07, "epoch": 2.516835016835017, "percentage": 83.89, "elapsed_time": "1:28:12", "remaining_time": "0:16:56"}
178
+ {"current_steps": 2992, "total_steps": 3564, "loss": 0.8708055019378662, "lr": 3.2429245464060965e-07, "epoch": 2.5185185185185186, "percentage": 83.95, "elapsed_time": "1:28:16", "remaining_time": "0:16:52"}
179
+ {"current_steps": 2994, "total_steps": 3564, "loss": 0.5630843043327332, "lr": 3.234466758252818e-07, "epoch": 2.5202020202020203, "percentage": 84.01, "elapsed_time": "1:28:20", "remaining_time": "0:16:49"}
180
+ {"current_steps": 2996, "total_steps": 3564, "loss": 0.6830452084541321, "lr": 3.2260357256289715e-07, "epoch": 2.5218855218855216, "percentage": 84.06, "elapsed_time": "1:28:24", "remaining_time": "0:16:45"}
181
+ {"current_steps": 2998, "total_steps": 3564, "loss": 0.5143815875053406, "lr": 3.217631477583009e-07, "epoch": 2.5235690235690234, "percentage": 84.12, "elapsed_time": "1:28:27", "remaining_time": "0:16:42"}
182
+ {"current_steps": 3000, "total_steps": 3564, "loss": 0.5180540084838867, "lr": 3.2092540430711044e-07, "epoch": 2.525252525252525, "percentage": 84.18, "elapsed_time": "1:28:31", "remaining_time": "0:16:38"}
183
+ {"current_steps": 3002, "total_steps": 3564, "loss": 0.49375149607658386, "lr": 3.200903450957044e-07, "epoch": 2.526936026936027, "percentage": 84.23, "elapsed_time": "1:28:35", "remaining_time": "0:16:35"}
184
+ {"current_steps": 3004, "total_steps": 3564, "loss": 0.9845426082611084, "lr": 3.192579730012129e-07, "epoch": 2.5286195286195285, "percentage": 84.29, "elapsed_time": "1:28:39", "remaining_time": "0:16:31"}
185
+ {"current_steps": 3006, "total_steps": 3564, "loss": 0.7751657962799072, "lr": 3.184282908915081e-07, "epoch": 2.5303030303030303, "percentage": 84.34, "elapsed_time": "1:28:42", "remaining_time": "0:16:28"}
186
+ {"current_steps": 3008, "total_steps": 3564, "loss": 0.6437252759933472, "lr": 3.1760130162519427e-07, "epoch": 2.531986531986532, "percentage": 84.4, "elapsed_time": "1:28:45", "remaining_time": "0:16:24"}
187
+ {"current_steps": 3010, "total_steps": 3564, "loss": 0.33099907636642456, "lr": 3.16777008051597e-07, "epoch": 2.5336700336700337, "percentage": 84.46, "elapsed_time": "1:28:49", "remaining_time": "0:16:20"}
188
+ {"current_steps": 3012, "total_steps": 3564, "loss": 0.7693390846252441, "lr": 3.159554130107546e-07, "epoch": 2.5353535353535355, "percentage": 84.51, "elapsed_time": "1:28:52", "remaining_time": "0:16:17"}
189
+ {"current_steps": 3014, "total_steps": 3564, "loss": 0.6058576107025146, "lr": 3.1513651933340797e-07, "epoch": 2.537037037037037, "percentage": 84.57, "elapsed_time": "1:28:56", "remaining_time": "0:16:13"}
190
+ {"current_steps": 3016, "total_steps": 3564, "loss": 0.5138027667999268, "lr": 3.143203298409899e-07, "epoch": 2.538720538720539, "percentage": 84.62, "elapsed_time": "1:28:59", "remaining_time": "0:16:10"}
191
+ {"current_steps": 3018, "total_steps": 3564, "loss": 0.8655276298522949, "lr": 3.1350684734561676e-07, "epoch": 2.5404040404040407, "percentage": 84.68, "elapsed_time": "1:29:03", "remaining_time": "0:16:06"}
192
+ {"current_steps": 3020, "total_steps": 3564, "loss": 0.7289071083068848, "lr": 3.126960746500784e-07, "epoch": 2.542087542087542, "percentage": 84.74, "elapsed_time": "1:29:07", "remaining_time": "0:16:03"}
193
+ {"current_steps": 3022, "total_steps": 3564, "loss": 0.8041051030158997, "lr": 3.118880145478274e-07, "epoch": 2.5437710437710437, "percentage": 84.79, "elapsed_time": "1:29:10", "remaining_time": "0:15:59"}
194
+ {"current_steps": 3024, "total_steps": 3564, "loss": 0.978661835193634, "lr": 3.110826698229711e-07, "epoch": 2.5454545454545454, "percentage": 84.85, "elapsed_time": "1:29:14", "remaining_time": "0:15:56"}
195
+ {"current_steps": 3026, "total_steps": 3564, "loss": 0.2467118501663208, "lr": 3.102800432502607e-07, "epoch": 2.547138047138047, "percentage": 84.9, "elapsed_time": "1:29:17", "remaining_time": "0:15:52"}
196
+ {"current_steps": 3028, "total_steps": 3564, "loss": 0.522205114364624, "lr": 3.0948013759508274e-07, "epoch": 2.548821548821549, "percentage": 84.96, "elapsed_time": "1:29:21", "remaining_time": "0:15:49"}
197
+ {"current_steps": 3030, "total_steps": 3564, "loss": 0.4860239624977112, "lr": 3.0868295561344874e-07, "epoch": 2.5505050505050506, "percentage": 85.02, "elapsed_time": "1:29:25", "remaining_time": "0:15:45"}
198
+ {"current_steps": 3032, "total_steps": 3564, "loss": 0.4318680763244629, "lr": 3.078885000519858e-07, "epoch": 2.5521885521885523, "percentage": 85.07, "elapsed_time": "1:29:28", "remaining_time": "0:15:42"}
199
+ {"current_steps": 3034, "total_steps": 3564, "loss": 0.8540394306182861, "lr": 3.0709677364792767e-07, "epoch": 2.5538720538720536, "percentage": 85.13, "elapsed_time": "1:29:32", "remaining_time": "0:15:38"}
200
+ {"current_steps": 3036, "total_steps": 3564, "loss": 0.9184716939926147, "lr": 3.0630777912910533e-07, "epoch": 2.5555555555555554, "percentage": 85.19, "elapsed_time": "1:29:36", "remaining_time": "0:15:35"}
201
+ {"current_steps": 3038, "total_steps": 3564, "loss": 0.6098148822784424, "lr": 3.0552151921393633e-07, "epoch": 2.557239057239057, "percentage": 85.24, "elapsed_time": "1:29:40", "remaining_time": "0:15:31"}
202
+ {"current_steps": 3040, "total_steps": 3564, "loss": 0.9494307041168213, "lr": 3.0473799661141707e-07, "epoch": 2.558922558922559, "percentage": 85.3, "elapsed_time": "1:29:43", "remaining_time": "0:15:27"}
203
+ {"current_steps": 3042, "total_steps": 3564, "loss": 0.6524157524108887, "lr": 3.0395721402111286e-07, "epoch": 2.5606060606060606, "percentage": 85.35, "elapsed_time": "1:29:46", "remaining_time": "0:15:24"}
204
+ {"current_steps": 3044, "total_steps": 3564, "loss": 0.8453473448753357, "lr": 3.031791741331478e-07, "epoch": 2.5622895622895623, "percentage": 85.41, "elapsed_time": "1:29:50", "remaining_time": "0:15:20"}
205
+ {"current_steps": 3046, "total_steps": 3564, "loss": 0.6964143514633179, "lr": 3.0240387962819695e-07, "epoch": 2.563973063973064, "percentage": 85.47, "elapsed_time": "1:29:53", "remaining_time": "0:15:17"}
206
+ {"current_steps": 3048, "total_steps": 3564, "loss": 0.8597656488418579, "lr": 3.016313331774762e-07, "epoch": 2.5656565656565657, "percentage": 85.52, "elapsed_time": "1:29:57", "remaining_time": "0:15:13"}
207
+ {"current_steps": 3050, "total_steps": 3564, "loss": 0.3663683533668518, "lr": 3.008615374427329e-07, "epoch": 2.5673400673400675, "percentage": 85.58, "elapsed_time": "1:30:01", "remaining_time": "0:15:10"}
208
+ {"current_steps": 3052, "total_steps": 3564, "loss": 0.9516968131065369, "lr": 3.000944950762373e-07, "epoch": 2.569023569023569, "percentage": 85.63, "elapsed_time": "1:30:04", "remaining_time": "0:15:06"}
209
+ {"current_steps": 3054, "total_steps": 3564, "loss": 0.07853099703788757, "lr": 2.993302087207732e-07, "epoch": 2.570707070707071, "percentage": 85.69, "elapsed_time": "1:30:08", "remaining_time": "0:15:03"}
210
+ {"current_steps": 3056, "total_steps": 3564, "loss": 0.5600473284721375, "lr": 2.985686810096285e-07, "epoch": 2.5723905723905722, "percentage": 85.75, "elapsed_time": "1:30:11", "remaining_time": "0:14:59"}
211
+ {"current_steps": 3058, "total_steps": 3564, "loss": 0.3351885974407196, "lr": 2.978099145665867e-07, "epoch": 2.574074074074074, "percentage": 85.8, "elapsed_time": "1:30:15", "remaining_time": "0:14:56"}
212
+ {"current_steps": 3060, "total_steps": 3564, "loss": 0.6371778249740601, "lr": 2.970539120059174e-07, "epoch": 2.5757575757575757, "percentage": 85.86, "elapsed_time": "1:30:19", "remaining_time": "0:14:52"}
213
+ {"current_steps": 3062, "total_steps": 3564, "loss": 0.5941987037658691, "lr": 2.963006759323676e-07, "epoch": 2.5774410774410774, "percentage": 85.91, "elapsed_time": "1:30:22", "remaining_time": "0:14:49"}
214
+ {"current_steps": 3064, "total_steps": 3564, "loss": 0.424297571182251, "lr": 2.955502089411523e-07, "epoch": 2.579124579124579, "percentage": 85.97, "elapsed_time": "1:30:26", "remaining_time": "0:14:45"}
215
+ {"current_steps": 3066, "total_steps": 3564, "loss": 0.5996015667915344, "lr": 2.9480251361794656e-07, "epoch": 2.580808080808081, "percentage": 86.03, "elapsed_time": "1:30:29", "remaining_time": "0:14:41"}
216
+ {"current_steps": 3068, "total_steps": 3564, "loss": 0.3746086657047272, "lr": 2.940575925388746e-07, "epoch": 2.5824915824915826, "percentage": 86.08, "elapsed_time": "1:30:32", "remaining_time": "0:14:38"}
217
+ {"current_steps": 3070, "total_steps": 3564, "loss": 0.17353637516498566, "lr": 2.933154482705035e-07, "epoch": 2.584175084175084, "percentage": 86.14, "elapsed_time": "1:30:36", "remaining_time": "0:14:34"}
218
+ {"current_steps": 3072, "total_steps": 3564, "loss": 0.43435174226760864, "lr": 2.925760833698327e-07, "epoch": 2.5858585858585856, "percentage": 86.2, "elapsed_time": "1:30:39", "remaining_time": "0:14:31"}
219
+ {"current_steps": 3074, "total_steps": 3564, "loss": 0.8951042890548706, "lr": 2.9183950038428475e-07, "epoch": 2.5875420875420874, "percentage": 86.25, "elapsed_time": "1:30:42", "remaining_time": "0:14:27"}
220
+ {"current_steps": 3076, "total_steps": 3564, "loss": 0.35531511902809143, "lr": 2.9110570185169834e-07, "epoch": 2.589225589225589, "percentage": 86.31, "elapsed_time": "1:30:46", "remaining_time": "0:14:24"}
221
+ {"current_steps": 3078, "total_steps": 3564, "loss": 0.8299113512039185, "lr": 2.903746903003184e-07, "epoch": 2.590909090909091, "percentage": 86.36, "elapsed_time": "1:30:50", "remaining_time": "0:14:20"}
222
+ {"current_steps": 3080, "total_steps": 3564, "loss": 0.6478674411773682, "lr": 2.896464682487866e-07, "epoch": 2.5925925925925926, "percentage": 86.42, "elapsed_time": "1:30:53", "remaining_time": "0:14:17"}
223
+ {"current_steps": 3082, "total_steps": 3564, "loss": 0.9649114012718201, "lr": 2.8892103820613487e-07, "epoch": 2.5942760942760943, "percentage": 86.48, "elapsed_time": "1:30:57", "remaining_time": "0:14:13"}
224
+ {"current_steps": 3084, "total_steps": 3564, "loss": 0.5619069337844849, "lr": 2.88198402671775e-07, "epoch": 2.595959595959596, "percentage": 86.53, "elapsed_time": "1:31:01", "remaining_time": "0:14:10"}
225
+ {"current_steps": 3086, "total_steps": 3564, "loss": 0.5941061973571777, "lr": 2.874785641354901e-07, "epoch": 2.5976430976430978, "percentage": 86.59, "elapsed_time": "1:31:05", "remaining_time": "0:14:06"}
226
+ {"current_steps": 3088, "total_steps": 3564, "loss": 0.7975903153419495, "lr": 2.867615250774269e-07, "epoch": 2.5993265993265995, "percentage": 86.64, "elapsed_time": "1:31:08", "remaining_time": "0:14:02"}
227
+ {"current_steps": 3090, "total_steps": 3564, "loss": 0.8723431825637817, "lr": 2.860472879680869e-07, "epoch": 2.601010101010101, "percentage": 86.7, "elapsed_time": "1:31:12", "remaining_time": "0:13:59"}
228
+ {"current_steps": 3092, "total_steps": 3564, "loss": 0.6906735897064209, "lr": 2.8533585526831726e-07, "epoch": 2.602693602693603, "percentage": 86.76, "elapsed_time": "1:31:16", "remaining_time": "0:13:55"}
229
+ {"current_steps": 3094, "total_steps": 3564, "loss": 0.5048916339874268, "lr": 2.8462722942930286e-07, "epoch": 2.6043771043771042, "percentage": 86.81, "elapsed_time": "1:31:19", "remaining_time": "0:13:52"}
230
+ {"current_steps": 3096, "total_steps": 3564, "loss": 0.660202145576477, "lr": 2.8392141289255806e-07, "epoch": 2.606060606060606, "percentage": 86.87, "elapsed_time": "1:31:23", "remaining_time": "0:13:48"}
231
+ {"current_steps": 3098, "total_steps": 3564, "loss": 0.5634772777557373, "lr": 2.8321840808991775e-07, "epoch": 2.6077441077441077, "percentage": 86.92, "elapsed_time": "1:31:27", "remaining_time": "0:13:45"}
232
+ {"current_steps": 3100, "total_steps": 3564, "loss": 0.5956814289093018, "lr": 2.8251821744352933e-07, "epoch": 2.6094276094276094, "percentage": 86.98, "elapsed_time": "1:31:30", "remaining_time": "0:13:41"}
233
+ {"current_steps": 3102, "total_steps": 3564, "loss": 0.5830974578857422, "lr": 2.8182084336584423e-07, "epoch": 2.611111111111111, "percentage": 87.04, "elapsed_time": "1:31:34", "remaining_time": "0:13:38"}
234
+ {"current_steps": 3104, "total_steps": 3564, "loss": 0.8090439438819885, "lr": 2.8112628825960926e-07, "epoch": 2.612794612794613, "percentage": 87.09, "elapsed_time": "1:31:38", "remaining_time": "0:13:34"}
235
+ {"current_steps": 3106, "total_steps": 3564, "loss": 0.7719713449478149, "lr": 2.804345545178594e-07, "epoch": 2.6144781144781146, "percentage": 87.15, "elapsed_time": "1:31:41", "remaining_time": "0:13:31"}
236
+ {"current_steps": 3108, "total_steps": 3564, "loss": 0.18324008584022522, "lr": 2.7974564452390833e-07, "epoch": 2.616161616161616, "percentage": 87.21, "elapsed_time": "1:31:45", "remaining_time": "0:13:27"}
237
+ {"current_steps": 3110, "total_steps": 3564, "loss": 0.7723451852798462, "lr": 2.790595606513406e-07, "epoch": 2.6178451178451176, "percentage": 87.26, "elapsed_time": "1:31:48", "remaining_time": "0:13:24"}
238
+ {"current_steps": 3112, "total_steps": 3564, "loss": 0.39754652976989746, "lr": 2.78376305264004e-07, "epoch": 2.6195286195286194, "percentage": 87.32, "elapsed_time": "1:31:51", "remaining_time": "0:13:20"}
239
+ {"current_steps": 3114, "total_steps": 3564, "loss": 0.4727073609828949, "lr": 2.776958807160011e-07, "epoch": 2.621212121212121, "percentage": 87.37, "elapsed_time": "1:31:55", "remaining_time": "0:13:17"}
240
+ {"current_steps": 3116, "total_steps": 3564, "loss": 0.8447589874267578, "lr": 2.7701828935168026e-07, "epoch": 2.622895622895623, "percentage": 87.43, "elapsed_time": "1:31:59", "remaining_time": "0:13:13"}
241
+ {"current_steps": 3118, "total_steps": 3564, "loss": 1.0325953960418701, "lr": 2.763435335056291e-07, "epoch": 2.6245791245791246, "percentage": 87.49, "elapsed_time": "1:32:03", "remaining_time": "0:13:10"}
242
+ {"current_steps": 3120, "total_steps": 3564, "loss": 0.5554063320159912, "lr": 2.756716155026656e-07, "epoch": 2.6262626262626263, "percentage": 87.54, "elapsed_time": "1:32:06", "remaining_time": "0:13:06"}
243
+ {"current_steps": 3122, "total_steps": 3564, "loss": 0.9207072854042053, "lr": 2.750025376578295e-07, "epoch": 2.627946127946128, "percentage": 87.6, "elapsed_time": "1:32:10", "remaining_time": "0:13:02"}
244
+ {"current_steps": 3124, "total_steps": 3564, "loss": 0.8367090225219727, "lr": 2.743363022763758e-07, "epoch": 2.6296296296296298, "percentage": 87.65, "elapsed_time": "1:32:14", "remaining_time": "0:12:59"}
245
+ {"current_steps": 3126, "total_steps": 3564, "loss": 0.6048181056976318, "lr": 2.7367291165376593e-07, "epoch": 2.6313131313131315, "percentage": 87.71, "elapsed_time": "1:32:18", "remaining_time": "0:12:55"}
246
+ {"current_steps": 3128, "total_steps": 3564, "loss": 0.808570384979248, "lr": 2.7301236807565925e-07, "epoch": 2.6329966329966332, "percentage": 87.77, "elapsed_time": "1:32:21", "remaining_time": "0:12:52"}
247
+ {"current_steps": 3130, "total_steps": 3564, "loss": 0.49354591965675354, "lr": 2.7235467381790654e-07, "epoch": 2.634680134680135, "percentage": 87.82, "elapsed_time": "1:32:25", "remaining_time": "0:12:48"}
248
+ {"current_steps": 3132, "total_steps": 3564, "loss": 0.2983268201351166, "lr": 2.716998311465415e-07, "epoch": 2.6363636363636362, "percentage": 87.88, "elapsed_time": "1:32:29", "remaining_time": "0:12:45"}
249
+ {"current_steps": 3134, "total_steps": 3564, "loss": 0.8370668888092041, "lr": 2.710478423177722e-07, "epoch": 2.638047138047138, "percentage": 87.93, "elapsed_time": "1:32:33", "remaining_time": "0:12:41"}
250
+ {"current_steps": 3136, "total_steps": 3564, "loss": 0.7652538418769836, "lr": 2.7039870957797464e-07, "epoch": 2.6397306397306397, "percentage": 87.99, "elapsed_time": "1:32:37", "remaining_time": "0:12:38"}
251
+ {"current_steps": 3138, "total_steps": 3564, "loss": 0.4114927649497986, "lr": 2.697524351636844e-07, "epoch": 2.6414141414141414, "percentage": 88.05, "elapsed_time": "1:32:41", "remaining_time": "0:12:34"}
252
+ {"current_steps": 3140, "total_steps": 3564, "loss": 0.8686310052871704, "lr": 2.691090213015886e-07, "epoch": 2.643097643097643, "percentage": 88.1, "elapsed_time": "1:32:44", "remaining_time": "0:12:31"}
253
+ {"current_steps": 3142, "total_steps": 3564, "loss": 0.5540004372596741, "lr": 2.6846847020851884e-07, "epoch": 2.644781144781145, "percentage": 88.16, "elapsed_time": "1:32:47", "remaining_time": "0:12:27"}
254
+ {"current_steps": 3144, "total_steps": 3564, "loss": 0.8333272933959961, "lr": 2.678307840914431e-07, "epoch": 2.6464646464646466, "percentage": 88.22, "elapsed_time": "1:32:51", "remaining_time": "0:12:24"}
255
+ {"current_steps": 3146, "total_steps": 3564, "loss": 0.9629621505737305, "lr": 2.6719596514745826e-07, "epoch": 2.648148148148148, "percentage": 88.27, "elapsed_time": "1:32:55", "remaining_time": "0:12:20"}
256
+ {"current_steps": 3148, "total_steps": 3564, "loss": 0.5129526853561401, "lr": 2.665640155637828e-07, "epoch": 2.6498316498316496, "percentage": 88.33, "elapsed_time": "1:32:58", "remaining_time": "0:12:17"}
257
+ {"current_steps": 3150, "total_steps": 3564, "loss": 0.8636926412582397, "lr": 2.659349375177489e-07, "epoch": 2.6515151515151514, "percentage": 88.38, "elapsed_time": "1:33:01", "remaining_time": "0:12:13"}
258
+ {"current_steps": 3152, "total_steps": 3564, "loss": 0.20498168468475342, "lr": 2.6530873317679515e-07, "epoch": 2.653198653198653, "percentage": 88.44, "elapsed_time": "1:33:05", "remaining_time": "0:12:10"}
259
+ {"current_steps": 3154, "total_steps": 3564, "loss": 0.9441362619400024, "lr": 2.6468540469845895e-07, "epoch": 2.654882154882155, "percentage": 88.5, "elapsed_time": "1:33:08", "remaining_time": "0:12:06"}
260
+ {"current_steps": 3156, "total_steps": 3564, "loss": 0.5518494844436646, "lr": 2.640649542303693e-07, "epoch": 2.6565656565656566, "percentage": 88.55, "elapsed_time": "1:33:12", "remaining_time": "0:12:03"}
261
+ {"current_steps": 3158, "total_steps": 3564, "loss": 0.35931962728500366, "lr": 2.634473839102389e-07, "epoch": 2.6582491582491583, "percentage": 88.61, "elapsed_time": "1:33:15", "remaining_time": "0:11:59"}
262
+ {"current_steps": 3160, "total_steps": 3564, "loss": 0.44168537855148315, "lr": 2.6283269586585737e-07, "epoch": 2.65993265993266, "percentage": 88.66, "elapsed_time": "1:33:19", "remaining_time": "0:11:55"}
263
+ {"current_steps": 3162, "total_steps": 3564, "loss": 0.6104831695556641, "lr": 2.6222089221508404e-07, "epoch": 2.6616161616161618, "percentage": 88.72, "elapsed_time": "1:33:23", "remaining_time": "0:11:52"}
264
+ {"current_steps": 3164, "total_steps": 3564, "loss": 0.8378016352653503, "lr": 2.6161197506583944e-07, "epoch": 2.6632996632996635, "percentage": 88.78, "elapsed_time": "1:33:26", "remaining_time": "0:11:48"}
265
+ {"current_steps": 3166, "total_steps": 3564, "loss": 0.6439419984817505, "lr": 2.610059465160995e-07, "epoch": 2.6649831649831652, "percentage": 88.83, "elapsed_time": "1:33:30", "remaining_time": "0:11:45"}
266
+ {"current_steps": 3168, "total_steps": 3564, "loss": 0.7727220058441162, "lr": 2.6040280865388773e-07, "epoch": 2.6666666666666665, "percentage": 88.89, "elapsed_time": "1:33:34", "remaining_time": "0:11:41"}
267
+ {"current_steps": 3170, "total_steps": 3564, "loss": 0.6320611834526062, "lr": 2.5980256355726744e-07, "epoch": 2.6683501683501682, "percentage": 88.95, "elapsed_time": "1:33:37", "remaining_time": "0:11:38"}
268
+ {"current_steps": 3172, "total_steps": 3564, "loss": 1.043792963027954, "lr": 2.5920521329433606e-07, "epoch": 2.67003367003367, "percentage": 89.0, "elapsed_time": "1:33:40", "remaining_time": "0:11:34"}
269
+ {"current_steps": 3174, "total_steps": 3564, "loss": 0.9384379386901855, "lr": 2.586107599232164e-07, "epoch": 2.6717171717171717, "percentage": 89.06, "elapsed_time": "1:33:44", "remaining_time": "0:11:31"}
270
+ {"current_steps": 3176, "total_steps": 3564, "loss": 0.4818713068962097, "lr": 2.5801920549205023e-07, "epoch": 2.6734006734006734, "percentage": 89.11, "elapsed_time": "1:33:48", "remaining_time": "0:11:27"}
271
+ {"current_steps": 3178, "total_steps": 3564, "loss": 0.9861509799957275, "lr": 2.5743055203899167e-07, "epoch": 2.675084175084175, "percentage": 89.17, "elapsed_time": "1:33:52", "remaining_time": "0:11:24"}
272
+ {"current_steps": 3180, "total_steps": 3564, "loss": 0.6932214498519897, "lr": 2.568448015921996e-07, "epoch": 2.676767676767677, "percentage": 89.23, "elapsed_time": "1:33:55", "remaining_time": "0:11:20"}
273
+ {"current_steps": 3182, "total_steps": 3564, "loss": 0.7709292769432068, "lr": 2.562619561698306e-07, "epoch": 2.678451178451178, "percentage": 89.28, "elapsed_time": "1:33:59", "remaining_time": "0:11:16"}
274
+ {"current_steps": 3184, "total_steps": 3564, "loss": 0.8786018490791321, "lr": 2.556820177800324e-07, "epoch": 2.68013468013468, "percentage": 89.34, "elapsed_time": "1:34:02", "remaining_time": "0:11:13"}
275
+ {"current_steps": 3186, "total_steps": 3564, "loss": 0.7843552827835083, "lr": 2.551049884209371e-07, "epoch": 2.6818181818181817, "percentage": 89.39, "elapsed_time": "1:34:05", "remaining_time": "0:11:09"}
276
+ {"current_steps": 3188, "total_steps": 3564, "loss": 0.7388215661048889, "lr": 2.5453087008065307e-07, "epoch": 2.6835016835016834, "percentage": 89.45, "elapsed_time": "1:34:09", "remaining_time": "0:11:06"}
277
+ {"current_steps": 3190, "total_steps": 3564, "loss": 0.552982747554779, "lr": 2.5395966473725994e-07, "epoch": 2.685185185185185, "percentage": 89.51, "elapsed_time": "1:34:12", "remaining_time": "0:11:02"}
278
+ {"current_steps": 3192, "total_steps": 3564, "loss": 0.617717981338501, "lr": 2.5339137435880043e-07, "epoch": 2.686868686868687, "percentage": 89.56, "elapsed_time": "1:34:16", "remaining_time": "0:10:59"}
279
+ {"current_steps": 3194, "total_steps": 3564, "loss": 0.7265998125076294, "lr": 2.5282600090327383e-07, "epoch": 2.6885521885521886, "percentage": 89.62, "elapsed_time": "1:34:20", "remaining_time": "0:10:55"}
280
+ {"current_steps": 3196, "total_steps": 3564, "loss": 0.6202006340026855, "lr": 2.5226354631862966e-07, "epoch": 2.6902356902356903, "percentage": 89.67, "elapsed_time": "1:34:23", "remaining_time": "0:10:52"}
281
+ {"current_steps": 3198, "total_steps": 3564, "loss": 0.741972804069519, "lr": 2.517040125427608e-07, "epoch": 2.691919191919192, "percentage": 89.73, "elapsed_time": "1:34:27", "remaining_time": "0:10:48"}
282
+ {"current_steps": 3200, "total_steps": 3564, "loss": 0.8759193420410156, "lr": 2.511474015034964e-07, "epoch": 2.6936026936026938, "percentage": 89.79, "elapsed_time": "1:34:31", "remaining_time": "0:10:45"}
283
+ {"current_steps": 3202, "total_steps": 3564, "loss": 0.6976549625396729, "lr": 2.5059371511859557e-07, "epoch": 2.6952861952861955, "percentage": 89.84, "elapsed_time": "1:34:35", "remaining_time": "0:10:41"}
284
+ {"current_steps": 3204, "total_steps": 3564, "loss": 0.6694223880767822, "lr": 2.50042955295741e-07, "epoch": 2.6969696969696972, "percentage": 89.9, "elapsed_time": "1:34:38", "remaining_time": "0:10:38"}
285
+ {"current_steps": 3206, "total_steps": 3564, "loss": 0.7830284833908081, "lr": 2.494951239325321e-07, "epoch": 2.6986531986531985, "percentage": 89.96, "elapsed_time": "1:34:42", "remaining_time": "0:10:34"}
286
+ {"current_steps": 3208, "total_steps": 3564, "loss": 0.5429476499557495, "lr": 2.489502229164781e-07, "epoch": 2.7003367003367003, "percentage": 90.01, "elapsed_time": "1:34:45", "remaining_time": "0:10:30"}
287
+ {"current_steps": 3210, "total_steps": 3564, "loss": 0.8423386812210083, "lr": 2.4840825412499274e-07, "epoch": 2.702020202020202, "percentage": 90.07, "elapsed_time": "1:34:49", "remaining_time": "0:10:27"}
288
+ {"current_steps": 3212, "total_steps": 3564, "loss": 0.4965520203113556, "lr": 2.478692194253861e-07, "epoch": 2.7037037037037037, "percentage": 90.12, "elapsed_time": "1:34:52", "remaining_time": "0:10:23"}
289
+ {"current_steps": 3214, "total_steps": 3564, "loss": 0.6127833127975464, "lr": 2.473331206748597e-07, "epoch": 2.7053872053872055, "percentage": 90.18, "elapsed_time": "1:34:56", "remaining_time": "0:10:20"}
290
+ {"current_steps": 3216, "total_steps": 3564, "loss": 0.2938854694366455, "lr": 2.467999597204996e-07, "epoch": 2.707070707070707, "percentage": 90.24, "elapsed_time": "1:34:59", "remaining_time": "0:10:16"}
291
+ {"current_steps": 3218, "total_steps": 3564, "loss": 0.7545672655105591, "lr": 2.462697383992691e-07, "epoch": 2.708754208754209, "percentage": 90.29, "elapsed_time": "1:35:03", "remaining_time": "0:10:13"}
292
+ {"current_steps": 3220, "total_steps": 3564, "loss": 0.3368055820465088, "lr": 2.457424585380041e-07, "epoch": 2.71043771043771, "percentage": 90.35, "elapsed_time": "1:35:06", "remaining_time": "0:10:09"}
293
+ {"current_steps": 3222, "total_steps": 3564, "loss": 0.7228003144264221, "lr": 2.4521812195340544e-07, "epoch": 2.712121212121212, "percentage": 90.4, "elapsed_time": "1:35:10", "remaining_time": "0:10:06"}
294
+ {"current_steps": 3224, "total_steps": 3564, "loss": 0.39306753873825073, "lr": 2.4469673045203333e-07, "epoch": 2.7138047138047137, "percentage": 90.46, "elapsed_time": "1:35:13", "remaining_time": "0:10:02"}
295
+ {"current_steps": 3226, "total_steps": 3564, "loss": 0.388794481754303, "lr": 2.441782858303007e-07, "epoch": 2.7154882154882154, "percentage": 90.52, "elapsed_time": "1:35:16", "remaining_time": "0:09:58"}
296
+ {"current_steps": 3228, "total_steps": 3564, "loss": 0.7990210056304932, "lr": 2.436627898744678e-07, "epoch": 2.717171717171717, "percentage": 90.57, "elapsed_time": "1:35:19", "remaining_time": "0:09:55"}
297
+ {"current_steps": 3230, "total_steps": 3564, "loss": 0.3864361643791199, "lr": 2.4315024436063464e-07, "epoch": 2.718855218855219, "percentage": 90.63, "elapsed_time": "1:35:23", "remaining_time": "0:09:51"}
298
+ {"current_steps": 3232, "total_steps": 3564, "loss": 0.8147022724151611, "lr": 2.4264065105473637e-07, "epoch": 2.7205387205387206, "percentage": 90.68, "elapsed_time": "1:35:26", "remaining_time": "0:09:48"}
299
+ {"current_steps": 3234, "total_steps": 3564, "loss": 0.6463346481323242, "lr": 2.4213401171253656e-07, "epoch": 2.7222222222222223, "percentage": 90.74, "elapsed_time": "1:35:30", "remaining_time": "0:09:44"}
300
+ {"current_steps": 3236, "total_steps": 3564, "loss": 0.7769128084182739, "lr": 2.416303280796206e-07, "epoch": 2.723905723905724, "percentage": 90.8, "elapsed_time": "1:35:33", "remaining_time": "0:09:41"}
301
+ {"current_steps": 3238, "total_steps": 3564, "loss": 0.7157000303268433, "lr": 2.411296018913907e-07, "epoch": 2.725589225589226, "percentage": 90.85, "elapsed_time": "1:35:37", "remaining_time": "0:09:37"}
302
+ {"current_steps": 3240, "total_steps": 3564, "loss": 0.7306414842605591, "lr": 2.406318348730592e-07, "epoch": 2.7272727272727275, "percentage": 90.91, "elapsed_time": "1:35:41", "remaining_time": "0:09:34"}
303
+ {"current_steps": 3242, "total_steps": 3564, "loss": 0.8304033279418945, "lr": 2.401370287396428e-07, "epoch": 2.728956228956229, "percentage": 90.97, "elapsed_time": "1:35:45", "remaining_time": "0:09:30"}
304
+ {"current_steps": 3244, "total_steps": 3564, "loss": 0.5530973672866821, "lr": 2.396451851959571e-07, "epoch": 2.7306397306397305, "percentage": 91.02, "elapsed_time": "1:35:48", "remaining_time": "0:09:27"}
305
+ {"current_steps": 3246, "total_steps": 3564, "loss": 0.7806906700134277, "lr": 2.391563059366099e-07, "epoch": 2.7323232323232323, "percentage": 91.08, "elapsed_time": "1:35:52", "remaining_time": "0:09:23"}
306
+ {"current_steps": 3248, "total_steps": 3564, "loss": 0.805009126663208, "lr": 2.3867039264599587e-07, "epoch": 2.734006734006734, "percentage": 91.13, "elapsed_time": "1:35:56", "remaining_time": "0:09:20"}
307
+ {"current_steps": 3250, "total_steps": 3564, "loss": 0.6719311475753784, "lr": 2.3818744699829105e-07, "epoch": 2.7356902356902357, "percentage": 91.19, "elapsed_time": "1:35:59", "remaining_time": "0:09:16"}
308
+ {"current_steps": 3252, "total_steps": 3564, "loss": 0.37460649013519287, "lr": 2.3770747065744594e-07, "epoch": 2.7373737373737375, "percentage": 91.25, "elapsed_time": "1:36:02", "remaining_time": "0:09:12"}
309
+ {"current_steps": 3254, "total_steps": 3564, "loss": 0.528462290763855, "lr": 2.3723046527718137e-07, "epoch": 2.739057239057239, "percentage": 91.3, "elapsed_time": "1:36:06", "remaining_time": "0:09:09"}
310
+ {"current_steps": 3256, "total_steps": 3564, "loss": 0.4876176714897156, "lr": 2.367564325009815e-07, "epoch": 2.7407407407407405, "percentage": 91.36, "elapsed_time": "1:36:09", "remaining_time": "0:09:05"}
311
+ {"current_steps": 3258, "total_steps": 3564, "loss": 0.6226130723953247, "lr": 2.362853739620885e-07, "epoch": 2.742424242424242, "percentage": 91.41, "elapsed_time": "1:36:13", "remaining_time": "0:09:02"}
312
+ {"current_steps": 3260, "total_steps": 3564, "loss": 0.4137502908706665, "lr": 2.3581729128349745e-07, "epoch": 2.744107744107744, "percentage": 91.47, "elapsed_time": "1:36:16", "remaining_time": "0:08:58"}
313
+ {"current_steps": 3262, "total_steps": 3564, "loss": 0.6418605446815491, "lr": 2.3535218607795013e-07, "epoch": 2.7457912457912457, "percentage": 91.53, "elapsed_time": "1:36:20", "remaining_time": "0:08:55"}
314
+ {"current_steps": 3264, "total_steps": 3564, "loss": 0.857982337474823, "lr": 2.3489005994792948e-07, "epoch": 2.7474747474747474, "percentage": 91.58, "elapsed_time": "1:36:23", "remaining_time": "0:08:51"}
315
+ {"current_steps": 3266, "total_steps": 3564, "loss": 0.958759605884552, "lr": 2.3443091448565454e-07, "epoch": 2.749158249158249, "percentage": 91.64, "elapsed_time": "1:36:26", "remaining_time": "0:08:48"}
316
+ {"current_steps": 3268, "total_steps": 3564, "loss": 0.4375573396682739, "lr": 2.339747512730749e-07, "epoch": 2.750841750841751, "percentage": 91.69, "elapsed_time": "1:36:30", "remaining_time": "0:08:44"}
317
+ {"current_steps": 3270, "total_steps": 3564, "loss": 0.9555472135543823, "lr": 2.3352157188186424e-07, "epoch": 2.7525252525252526, "percentage": 91.75, "elapsed_time": "1:36:33", "remaining_time": "0:08:40"}
318
+ {"current_steps": 3272, "total_steps": 3564, "loss": 0.5092712044715881, "lr": 2.3307137787341667e-07, "epoch": 2.7542087542087543, "percentage": 91.81, "elapsed_time": "1:36:37", "remaining_time": "0:08:37"}
319
+ {"current_steps": 3274, "total_steps": 3564, "loss": 0.7026905417442322, "lr": 2.3262417079883986e-07, "epoch": 2.755892255892256, "percentage": 91.86, "elapsed_time": "1:36:41", "remaining_time": "0:08:33"}
320
+ {"current_steps": 3276, "total_steps": 3564, "loss": 0.3385421633720398, "lr": 2.3217995219895016e-07, "epoch": 2.757575757575758, "percentage": 91.92, "elapsed_time": "1:36:44", "remaining_time": "0:08:30"}
321
+ {"current_steps": 3278, "total_steps": 3564, "loss": 0.03149527311325073, "lr": 2.317387236042678e-07, "epoch": 2.7592592592592595, "percentage": 91.98, "elapsed_time": "1:36:48", "remaining_time": "0:08:26"}
322
+ {"current_steps": 3280, "total_steps": 3564, "loss": 1.0571789741516113, "lr": 2.313004865350109e-07, "epoch": 2.760942760942761, "percentage": 92.03, "elapsed_time": "1:36:52", "remaining_time": "0:08:23"}
323
+ {"current_steps": 3282, "total_steps": 3564, "loss": 1.0254530906677246, "lr": 2.3086524250109045e-07, "epoch": 2.7626262626262625, "percentage": 92.09, "elapsed_time": "1:36:56", "remaining_time": "0:08:19"}
324
+ {"current_steps": 3284, "total_steps": 3564, "loss": 0.2718232274055481, "lr": 2.3043299300210528e-07, "epoch": 2.7643097643097643, "percentage": 92.14, "elapsed_time": "1:36:59", "remaining_time": "0:08:16"}
325
+ {"current_steps": 3286, "total_steps": 3564, "loss": 0.7651864290237427, "lr": 2.30003739527337e-07, "epoch": 2.765993265993266, "percentage": 92.2, "elapsed_time": "1:37:03", "remaining_time": "0:08:12"}
326
+ {"current_steps": 3288, "total_steps": 3564, "loss": 0.7020351886749268, "lr": 2.2957748355574408e-07, "epoch": 2.7676767676767677, "percentage": 92.26, "elapsed_time": "1:37:06", "remaining_time": "0:08:09"}
327
+ {"current_steps": 3290, "total_steps": 3564, "loss": 0.20551855862140656, "lr": 2.2915422655595795e-07, "epoch": 2.7693602693602695, "percentage": 92.31, "elapsed_time": "1:37:09", "remaining_time": "0:08:05"}
328
+ {"current_steps": 3292, "total_steps": 3564, "loss": 0.9749652147293091, "lr": 2.287339699862771e-07, "epoch": 2.771043771043771, "percentage": 92.37, "elapsed_time": "1:37:12", "remaining_time": "0:08:01"}
329
+ {"current_steps": 3294, "total_steps": 3564, "loss": 0.7997506260871887, "lr": 2.2831671529466205e-07, "epoch": 2.7727272727272725, "percentage": 92.42, "elapsed_time": "1:37:16", "remaining_time": "0:07:58"}
330
+ {"current_steps": 3296, "total_steps": 3564, "loss": 0.8032985925674438, "lr": 2.2790246391873086e-07, "epoch": 2.774410774410774, "percentage": 92.48, "elapsed_time": "1:37:20", "remaining_time": "0:07:54"}
331
+ {"current_steps": 3298, "total_steps": 3564, "loss": 0.23050040006637573, "lr": 2.2749121728575393e-07, "epoch": 2.776094276094276, "percentage": 92.54, "elapsed_time": "1:37:23", "remaining_time": "0:07:51"}
332
+ {"current_steps": 3300, "total_steps": 3564, "loss": 0.45907649397850037, "lr": 2.2708297681264874e-07, "epoch": 2.7777777777777777, "percentage": 92.59, "elapsed_time": "1:37:27", "remaining_time": "0:07:47"}
333
+ {"current_steps": 3302, "total_steps": 3564, "loss": 0.4696184992790222, "lr": 2.2667774390597562e-07, "epoch": 2.7794612794612794, "percentage": 92.65, "elapsed_time": "1:37:31", "remaining_time": "0:07:44"}
334
+ {"current_steps": 3304, "total_steps": 3564, "loss": 0.47576916217803955, "lr": 2.2627551996193247e-07, "epoch": 2.781144781144781, "percentage": 92.7, "elapsed_time": "1:37:34", "remaining_time": "0:07:40"}
335
+ {"current_steps": 3306, "total_steps": 3564, "loss": 0.6657184362411499, "lr": 2.2587630636634985e-07, "epoch": 2.782828282828283, "percentage": 92.76, "elapsed_time": "1:37:38", "remaining_time": "0:07:37"}
336
+ {"current_steps": 3308, "total_steps": 3564, "loss": 0.48266786336898804, "lr": 2.2548010449468676e-07, "epoch": 2.7845117845117846, "percentage": 92.82, "elapsed_time": "1:37:41", "remaining_time": "0:07:33"}
337
+ {"current_steps": 3310, "total_steps": 3564, "loss": 0.6634323596954346, "lr": 2.2508691571202528e-07, "epoch": 2.7861952861952863, "percentage": 92.87, "elapsed_time": "1:37:45", "remaining_time": "0:07:30"}
338
+ {"current_steps": 3312, "total_steps": 3564, "loss": 0.4185872972011566, "lr": 2.2469674137306627e-07, "epoch": 2.787878787878788, "percentage": 92.93, "elapsed_time": "1:37:49", "remaining_time": "0:07:26"}
339
+ {"current_steps": 3314, "total_steps": 3564, "loss": 0.6932981014251709, "lr": 2.2430958282212414e-07, "epoch": 2.78956228956229, "percentage": 92.99, "elapsed_time": "1:37:52", "remaining_time": "0:07:23"}
340
+ {"current_steps": 3316, "total_steps": 3564, "loss": 0.9720036387443542, "lr": 2.239254413931236e-07, "epoch": 2.791245791245791, "percentage": 93.04, "elapsed_time": "1:37:56", "remaining_time": "0:07:19"}
341
+ {"current_steps": 3318, "total_steps": 3564, "loss": 0.7453635334968567, "lr": 2.2354431840959307e-07, "epoch": 2.792929292929293, "percentage": 93.1, "elapsed_time": "1:37:59", "remaining_time": "0:07:15"}
342
+ {"current_steps": 3320, "total_steps": 3564, "loss": 0.3255777359008789, "lr": 2.2316621518466167e-07, "epoch": 2.7946127946127945, "percentage": 93.15, "elapsed_time": "1:38:03", "remaining_time": "0:07:12"}
343
+ {"current_steps": 3322, "total_steps": 3564, "loss": 0.6090131998062134, "lr": 2.227911330210542e-07, "epoch": 2.7962962962962963, "percentage": 93.21, "elapsed_time": "1:38:07", "remaining_time": "0:07:08"}
344
+ {"current_steps": 3324, "total_steps": 3564, "loss": 0.6710550785064697, "lr": 2.2241907321108638e-07, "epoch": 2.797979797979798, "percentage": 93.27, "elapsed_time": "1:38:10", "remaining_time": "0:07:05"}
345
+ {"current_steps": 3326, "total_steps": 3564, "loss": 0.30255502462387085, "lr": 2.22050037036661e-07, "epoch": 2.7996632996632997, "percentage": 93.32, "elapsed_time": "1:38:13", "remaining_time": "0:07:01"}
346
+ {"current_steps": 3328, "total_steps": 3564, "loss": 0.723252534866333, "lr": 2.216840257692628e-07, "epoch": 2.8013468013468015, "percentage": 93.38, "elapsed_time": "1:38:17", "remaining_time": "0:06:58"}
347
+ {"current_steps": 3330, "total_steps": 3564, "loss": 0.78731769323349, "lr": 2.213210406699547e-07, "epoch": 2.8030303030303028, "percentage": 93.43, "elapsed_time": "1:38:21", "remaining_time": "0:06:54"}
348
+ {"current_steps": 3332, "total_steps": 3564, "loss": 0.5705679655075073, "lr": 2.209610829893729e-07, "epoch": 2.8047138047138045, "percentage": 93.49, "elapsed_time": "1:38:24", "remaining_time": "0:06:51"}
349
+ {"current_steps": 3334, "total_steps": 3564, "loss": 0.4503876864910126, "lr": 2.2060415396772337e-07, "epoch": 2.8063973063973062, "percentage": 93.55, "elapsed_time": "1:38:27", "remaining_time": "0:06:47"}
350
+ {"current_steps": 3336, "total_steps": 3564, "loss": 0.5614144802093506, "lr": 2.2025025483477654e-07, "epoch": 2.808080808080808, "percentage": 93.6, "elapsed_time": "1:38:30", "remaining_time": "0:06:43"}
351
+ {"current_steps": 3338, "total_steps": 3564, "loss": 0.27632904052734375, "lr": 2.1989938680986382e-07, "epoch": 2.8097643097643097, "percentage": 93.66, "elapsed_time": "1:38:33", "remaining_time": "0:06:40"}
352
+ {"current_steps": 3340, "total_steps": 3564, "loss": 0.6297179460525513, "lr": 2.1955155110187344e-07, "epoch": 2.8114478114478114, "percentage": 93.71, "elapsed_time": "1:38:36", "remaining_time": "0:06:36"}
353
+ {"current_steps": 3342, "total_steps": 3564, "loss": 0.7801995873451233, "lr": 2.1920674890924545e-07, "epoch": 2.813131313131313, "percentage": 93.77, "elapsed_time": "1:38:40", "remaining_time": "0:06:33"}
354
+ {"current_steps": 3344, "total_steps": 3564, "loss": 0.3154934346675873, "lr": 2.1886498141996858e-07, "epoch": 2.814814814814815, "percentage": 93.83, "elapsed_time": "1:38:43", "remaining_time": "0:06:29"}
355
+ {"current_steps": 3346, "total_steps": 3564, "loss": 0.7565585374832153, "lr": 2.185262498115759e-07, "epoch": 2.8164983164983166, "percentage": 93.88, "elapsed_time": "1:38:47", "remaining_time": "0:06:26"}
356
+ {"current_steps": 3348, "total_steps": 3564, "loss": 0.5513463020324707, "lr": 2.1819055525113995e-07, "epoch": 2.8181818181818183, "percentage": 93.94, "elapsed_time": "1:38:50", "remaining_time": "0:06:22"}
357
+ {"current_steps": 3350, "total_steps": 3564, "loss": 0.8172674179077148, "lr": 2.178578988952698e-07, "epoch": 2.81986531986532, "percentage": 94.0, "elapsed_time": "1:38:54", "remaining_time": "0:06:19"}
358
+ {"current_steps": 3352, "total_steps": 3564, "loss": 0.7926508188247681, "lr": 2.1752828189010677e-07, "epoch": 2.821548821548822, "percentage": 94.05, "elapsed_time": "1:38:58", "remaining_time": "0:06:15"}
359
+ {"current_steps": 3354, "total_steps": 3564, "loss": 0.7785905599594116, "lr": 2.1720170537132003e-07, "epoch": 2.823232323232323, "percentage": 94.11, "elapsed_time": "1:39:02", "remaining_time": "0:06:12"}
360
+ {"current_steps": 3356, "total_steps": 3564, "loss": 0.8117780685424805, "lr": 2.16878170464103e-07, "epoch": 2.824915824915825, "percentage": 94.16, "elapsed_time": "1:39:05", "remaining_time": "0:06:08"}
361
+ {"current_steps": 3358, "total_steps": 3564, "loss": 0.4899190068244934, "lr": 2.1655767828316967e-07, "epoch": 2.8265993265993266, "percentage": 94.22, "elapsed_time": "1:39:09", "remaining_time": "0:06:04"}
362
+ {"current_steps": 3360, "total_steps": 3564, "loss": 0.481950581073761, "lr": 2.1624022993275042e-07, "epoch": 2.8282828282828283, "percentage": 94.28, "elapsed_time": "1:39:13", "remaining_time": "0:06:01"}
363
+ {"current_steps": 3362, "total_steps": 3564, "loss": 0.6889939308166504, "lr": 2.1592582650658838e-07, "epoch": 2.82996632996633, "percentage": 94.33, "elapsed_time": "1:39:16", "remaining_time": "0:05:57"}
364
+ {"current_steps": 3364, "total_steps": 3564, "loss": 0.5986655950546265, "lr": 2.1561446908793575e-07, "epoch": 2.8316498316498318, "percentage": 94.39, "elapsed_time": "1:39:19", "remaining_time": "0:05:54"}
365
+ {"current_steps": 3366, "total_steps": 3564, "loss": 0.4613681137561798, "lr": 2.1530615874954978e-07, "epoch": 2.8333333333333335, "percentage": 94.44, "elapsed_time": "1:39:23", "remaining_time": "0:05:50"}
366
+ {"current_steps": 3368, "total_steps": 3564, "loss": 0.35357874631881714, "lr": 2.1500089655368913e-07, "epoch": 2.8350168350168348, "percentage": 94.5, "elapsed_time": "1:39:26", "remaining_time": "0:05:47"}
367
+ {"current_steps": 3370, "total_steps": 3564, "loss": 0.815057635307312, "lr": 2.146986835521108e-07, "epoch": 2.8367003367003365, "percentage": 94.56, "elapsed_time": "1:39:30", "remaining_time": "0:05:43"}
368
+ {"current_steps": 3372, "total_steps": 3564, "loss": 0.6456162929534912, "lr": 2.143995207860655e-07, "epoch": 2.8383838383838382, "percentage": 94.61, "elapsed_time": "1:39:33", "remaining_time": "0:05:40"}
369
+ {"current_steps": 3374, "total_steps": 3564, "loss": 0.29310160875320435, "lr": 2.1410340928629483e-07, "epoch": 2.84006734006734, "percentage": 94.67, "elapsed_time": "1:39:37", "remaining_time": "0:05:36"}
370
+ {"current_steps": 3376, "total_steps": 3564, "loss": 0.851909875869751, "lr": 2.138103500730278e-07, "epoch": 2.8417508417508417, "percentage": 94.73, "elapsed_time": "1:39:41", "remaining_time": "0:05:33"}
371
+ {"current_steps": 3378, "total_steps": 3564, "loss": 0.7448092699050903, "lr": 2.1352034415597635e-07, "epoch": 2.8434343434343434, "percentage": 94.78, "elapsed_time": "1:39:45", "remaining_time": "0:05:29"}
372
+ {"current_steps": 3380, "total_steps": 3564, "loss": 0.5352383255958557, "lr": 2.1323339253433309e-07, "epoch": 2.845117845117845, "percentage": 94.84, "elapsed_time": "1:39:48", "remaining_time": "0:05:26"}
373
+ {"current_steps": 3382, "total_steps": 3564, "loss": 0.522847056388855, "lr": 2.1294949619676717e-07, "epoch": 2.846801346801347, "percentage": 94.89, "elapsed_time": "1:39:52", "remaining_time": "0:05:22"}
374
+ {"current_steps": 3384, "total_steps": 3564, "loss": 0.5352615118026733, "lr": 2.1266865612142064e-07, "epoch": 2.8484848484848486, "percentage": 94.95, "elapsed_time": "1:39:56", "remaining_time": "0:05:18"}
375
+ {"current_steps": 3386, "total_steps": 3564, "loss": 0.7238250970840454, "lr": 2.1239087327590582e-07, "epoch": 2.8501683501683504, "percentage": 95.01, "elapsed_time": "1:40:00", "remaining_time": "0:05:15"}
376
+ {"current_steps": 3388, "total_steps": 3564, "loss": 0.6121417284011841, "lr": 2.121161486173017e-07, "epoch": 2.851851851851852, "percentage": 95.06, "elapsed_time": "1:40:03", "remaining_time": "0:05:11"}
377
+ {"current_steps": 3390, "total_steps": 3564, "loss": 0.4724659025669098, "lr": 2.1184448309215015e-07, "epoch": 2.8535353535353534, "percentage": 95.12, "elapsed_time": "1:40:06", "remaining_time": "0:05:08"}
378
+ {"current_steps": 3392, "total_steps": 3564, "loss": 0.5098093748092651, "lr": 2.1157587763645322e-07, "epoch": 2.855218855218855, "percentage": 95.17, "elapsed_time": "1:40:10", "remaining_time": "0:05:04"}
379
+ {"current_steps": 3394, "total_steps": 3564, "loss": 0.9295372366905212, "lr": 2.113103331756698e-07, "epoch": 2.856902356902357, "percentage": 95.23, "elapsed_time": "1:40:13", "remaining_time": "0:05:01"}
380
+ {"current_steps": 3396, "total_steps": 3564, "loss": 0.9365147948265076, "lr": 2.110478506247122e-07, "epoch": 2.8585858585858586, "percentage": 95.29, "elapsed_time": "1:40:17", "remaining_time": "0:04:57"}
381
+ {"current_steps": 3398, "total_steps": 3564, "loss": 0.4805770516395569, "lr": 2.1078843088794325e-07, "epoch": 2.8602693602693603, "percentage": 95.34, "elapsed_time": "1:40:20", "remaining_time": "0:04:54"}
382
+ {"current_steps": 3400, "total_steps": 3564, "loss": 0.38062724471092224, "lr": 2.105320748591732e-07, "epoch": 2.861952861952862, "percentage": 95.4, "elapsed_time": "1:40:24", "remaining_time": "0:04:50"}
383
+ {"current_steps": 3402, "total_steps": 3564, "loss": 0.4569489359855652, "lr": 2.1027878342165624e-07, "epoch": 2.8636363636363638, "percentage": 95.45, "elapsed_time": "1:40:28", "remaining_time": "0:04:47"}
384
+ {"current_steps": 3404, "total_steps": 3564, "loss": 0.34320202469825745, "lr": 2.1002855744808815e-07, "epoch": 2.865319865319865, "percentage": 95.51, "elapsed_time": "1:40:31", "remaining_time": "0:04:43"}
385
+ {"current_steps": 3406, "total_steps": 3564, "loss": 0.7092417478561401, "lr": 2.0978139780060257e-07, "epoch": 2.8670033670033668, "percentage": 95.57, "elapsed_time": "1:40:35", "remaining_time": "0:04:39"}
386
+ {"current_steps": 3408, "total_steps": 3564, "loss": 0.29190459847450256, "lr": 2.0953730533076862e-07, "epoch": 2.8686868686868685, "percentage": 95.62, "elapsed_time": "1:40:38", "remaining_time": "0:04:36"}
387
+ {"current_steps": 3410, "total_steps": 3564, "loss": 0.7917627692222595, "lr": 2.0929628087958734e-07, "epoch": 2.8703703703703702, "percentage": 95.68, "elapsed_time": "1:40:42", "remaining_time": "0:04:32"}
388
+ {"current_steps": 3412, "total_steps": 3564, "loss": 0.43554821610450745, "lr": 2.0905832527748953e-07, "epoch": 2.872053872053872, "percentage": 95.74, "elapsed_time": "1:40:45", "remaining_time": "0:04:29"}
389
+ {"current_steps": 3414, "total_steps": 3564, "loss": 0.5983174443244934, "lr": 2.0882343934433236e-07, "epoch": 2.8737373737373737, "percentage": 95.79, "elapsed_time": "1:40:48", "remaining_time": "0:04:25"}
390
+ {"current_steps": 3416, "total_steps": 3564, "loss": 0.17676572501659393, "lr": 2.085916238893966e-07, "epoch": 2.8754208754208754, "percentage": 95.85, "elapsed_time": "1:40:51", "remaining_time": "0:04:22"}
391
+ {"current_steps": 3418, "total_steps": 3564, "loss": 0.6077107191085815, "lr": 2.0836287971138418e-07, "epoch": 2.877104377104377, "percentage": 95.9, "elapsed_time": "1:40:55", "remaining_time": "0:04:18"}
392
+ {"current_steps": 3420, "total_steps": 3564, "loss": 0.4146248400211334, "lr": 2.0813720759841492e-07, "epoch": 2.878787878787879, "percentage": 95.96, "elapsed_time": "1:40:59", "remaining_time": "0:04:15"}
393
+ {"current_steps": 3422, "total_steps": 3564, "loss": 0.7497705221176147, "lr": 2.0791460832802423e-07, "epoch": 2.8804713804713806, "percentage": 96.02, "elapsed_time": "1:41:02", "remaining_time": "0:04:11"}
394
+ {"current_steps": 3424, "total_steps": 3564, "loss": 0.5505831241607666, "lr": 2.0769508266716027e-07, "epoch": 2.8821548821548824, "percentage": 96.07, "elapsed_time": "1:41:06", "remaining_time": "0:04:08"}
395
+ {"current_steps": 3426, "total_steps": 3564, "loss": 0.6165893077850342, "lr": 2.0747863137218126e-07, "epoch": 2.883838383838384, "percentage": 96.13, "elapsed_time": "1:41:09", "remaining_time": "0:04:04"}
396
+ {"current_steps": 3428, "total_steps": 3564, "loss": 0.5343178510665894, "lr": 2.0726525518885308e-07, "epoch": 2.8855218855218854, "percentage": 96.18, "elapsed_time": "1:41:13", "remaining_time": "0:04:00"}
397
+ {"current_steps": 3430, "total_steps": 3564, "loss": 0.3310260772705078, "lr": 2.0705495485234653e-07, "epoch": 2.887205387205387, "percentage": 96.24, "elapsed_time": "1:41:16", "remaining_time": "0:03:57"}
398
+ {"current_steps": 3432, "total_steps": 3564, "loss": 0.5320956707000732, "lr": 2.0684773108723455e-07, "epoch": 2.888888888888889, "percentage": 96.3, "elapsed_time": "1:41:20", "remaining_time": "0:03:53"}
399
+ {"current_steps": 3434, "total_steps": 3564, "loss": 0.29516857862472534, "lr": 2.0664358460749018e-07, "epoch": 2.8905723905723906, "percentage": 96.35, "elapsed_time": "1:41:23", "remaining_time": "0:03:50"}
400
+ {"current_steps": 3436, "total_steps": 3564, "loss": 0.9136509895324707, "lr": 2.064425161164842e-07, "epoch": 2.8922558922558923, "percentage": 96.41, "elapsed_time": "1:41:27", "remaining_time": "0:03:46"}
401
+ {"current_steps": 3438, "total_steps": 3564, "loss": 0.8272508382797241, "lr": 2.0624452630698195e-07, "epoch": 2.893939393939394, "percentage": 96.46, "elapsed_time": "1:41:30", "remaining_time": "0:03:43"}
402
+ {"current_steps": 3440, "total_steps": 3564, "loss": 0.744123101234436, "lr": 2.0604961586114163e-07, "epoch": 2.8956228956228958, "percentage": 96.52, "elapsed_time": "1:41:34", "remaining_time": "0:03:39"}
403
+ {"current_steps": 3442, "total_steps": 3564, "loss": 0.8894016742706299, "lr": 2.0585778545051195e-07, "epoch": 2.897306397306397, "percentage": 96.58, "elapsed_time": "1:41:38", "remaining_time": "0:03:36"}
404
+ {"current_steps": 3444, "total_steps": 3564, "loss": 0.2585524320602417, "lr": 2.0566903573602913e-07, "epoch": 2.898989898989899, "percentage": 96.63, "elapsed_time": "1:41:41", "remaining_time": "0:03:32"}
405
+ {"current_steps": 3446, "total_steps": 3564, "loss": 0.5225664377212524, "lr": 2.0548336736801548e-07, "epoch": 2.9006734006734005, "percentage": 96.69, "elapsed_time": "1:41:45", "remaining_time": "0:03:29"}
406
+ {"current_steps": 3448, "total_steps": 3564, "loss": 1.000659704208374, "lr": 2.0530078098617668e-07, "epoch": 2.9023569023569022, "percentage": 96.75, "elapsed_time": "1:41:49", "remaining_time": "0:03:25"}
407
+ {"current_steps": 3450, "total_steps": 3564, "loss": 0.2958747446537018, "lr": 2.0512127721959954e-07, "epoch": 2.904040404040404, "percentage": 96.8, "elapsed_time": "1:41:52", "remaining_time": "0:03:21"}
408
+ {"current_steps": 3452, "total_steps": 3564, "loss": 0.5946668386459351, "lr": 2.0494485668675003e-07, "epoch": 2.9057239057239057, "percentage": 96.86, "elapsed_time": "1:41:56", "remaining_time": "0:03:18"}
409
+ {"current_steps": 3454, "total_steps": 3564, "loss": 0.6222255229949951, "lr": 2.0477151999547137e-07, "epoch": 2.9074074074074074, "percentage": 96.91, "elapsed_time": "1:42:00", "remaining_time": "0:03:14"}
410
+ {"current_steps": 3456, "total_steps": 3564, "loss": 0.9090818166732788, "lr": 2.0460126774298115e-07, "epoch": 2.909090909090909, "percentage": 96.97, "elapsed_time": "1:42:03", "remaining_time": "0:03:11"}
411
+ {"current_steps": 3458, "total_steps": 3564, "loss": 0.6454827785491943, "lr": 2.044341005158701e-07, "epoch": 2.910774410774411, "percentage": 97.03, "elapsed_time": "1:42:07", "remaining_time": "0:03:07"}
412
+ {"current_steps": 3460, "total_steps": 3564, "loss": 0.8902723789215088, "lr": 2.042700188900996e-07, "epoch": 2.9124579124579126, "percentage": 97.08, "elapsed_time": "1:42:11", "remaining_time": "0:03:04"}
413
+ {"current_steps": 3462, "total_steps": 3564, "loss": 0.9835023283958435, "lr": 2.0410902343099998e-07, "epoch": 2.9141414141414144, "percentage": 97.14, "elapsed_time": "1:42:14", "remaining_time": "0:03:00"}
414
+ {"current_steps": 3464, "total_steps": 3564, "loss": 0.725146472454071, "lr": 2.039511146932683e-07, "epoch": 2.915824915824916, "percentage": 97.19, "elapsed_time": "1:42:17", "remaining_time": "0:02:57"}
415
+ {"current_steps": 3466, "total_steps": 3564, "loss": 0.8742655515670776, "lr": 2.0379629322096658e-07, "epoch": 2.9175084175084174, "percentage": 97.25, "elapsed_time": "1:42:21", "remaining_time": "0:02:53"}
416
+ {"current_steps": 3468, "total_steps": 3564, "loss": 0.5896962881088257, "lr": 2.036445595475199e-07, "epoch": 2.919191919191919, "percentage": 97.31, "elapsed_time": "1:42:24", "remaining_time": "0:02:50"}
417
+ {"current_steps": 3470, "total_steps": 3564, "loss": 0.08913551270961761, "lr": 2.0349591419571473e-07, "epoch": 2.920875420875421, "percentage": 97.36, "elapsed_time": "1:42:27", "remaining_time": "0:02:46"}
418
+ {"current_steps": 3472, "total_steps": 3564, "loss": 0.5938529968261719, "lr": 2.0335035767769674e-07, "epoch": 2.9225589225589226, "percentage": 97.42, "elapsed_time": "1:42:30", "remaining_time": "0:02:42"}
419
+ {"current_steps": 3474, "total_steps": 3564, "loss": 0.607816755771637, "lr": 2.032078904949694e-07, "epoch": 2.9242424242424243, "percentage": 97.47, "elapsed_time": "1:42:34", "remaining_time": "0:02:39"}
420
+ {"current_steps": 3476, "total_steps": 3564, "loss": 0.26831308007240295, "lr": 2.0306851313839217e-07, "epoch": 2.925925925925926, "percentage": 97.53, "elapsed_time": "1:42:37", "remaining_time": "0:02:35"}
421
+ {"current_steps": 3478, "total_steps": 3564, "loss": 0.776150107383728, "lr": 2.0293222608817862e-07, "epoch": 2.9276094276094278, "percentage": 97.59, "elapsed_time": "1:42:41", "remaining_time": "0:02:32"}
422
+ {"current_steps": 3480, "total_steps": 3564, "loss": 0.44397690892219543, "lr": 2.0279902981389491e-07, "epoch": 2.929292929292929, "percentage": 97.64, "elapsed_time": "1:42:44", "remaining_time": "0:02:28"}
423
+ {"current_steps": 3482, "total_steps": 3564, "loss": 0.7775415182113647, "lr": 2.026689247744584e-07, "epoch": 2.930976430976431, "percentage": 97.7, "elapsed_time": "1:42:48", "remaining_time": "0:02:25"}
424
+ {"current_steps": 3484, "total_steps": 3564, "loss": 0.5349434614181519, "lr": 2.0254191141813563e-07, "epoch": 2.9326599326599325, "percentage": 97.76, "elapsed_time": "1:42:52", "remaining_time": "0:02:21"}
425
+ {"current_steps": 3486, "total_steps": 3564, "loss": 0.6211014986038208, "lr": 2.0241799018254102e-07, "epoch": 2.9343434343434343, "percentage": 97.81, "elapsed_time": "1:42:56", "remaining_time": "0:02:18"}
426
+ {"current_steps": 3488, "total_steps": 3564, "loss": 0.6584489345550537, "lr": 2.0229716149463543e-07, "epoch": 2.936026936026936, "percentage": 97.87, "elapsed_time": "1:42:58", "remaining_time": "0:02:14"}
427
+ {"current_steps": 3490, "total_steps": 3564, "loss": 0.5959441661834717, "lr": 2.0217942577072447e-07, "epoch": 2.9377104377104377, "percentage": 97.92, "elapsed_time": "1:43:02", "remaining_time": "0:02:11"}
428
+ {"current_steps": 3492, "total_steps": 3564, "loss": 0.8532196283340454, "lr": 2.0206478341645734e-07, "epoch": 2.9393939393939394, "percentage": 97.98, "elapsed_time": "1:43:05", "remaining_time": "0:02:07"}
429
+ {"current_steps": 3494, "total_steps": 3564, "loss": 0.3821958899497986, "lr": 2.0195323482682508e-07, "epoch": 2.941077441077441, "percentage": 98.04, "elapsed_time": "1:43:09", "remaining_time": "0:02:03"}
430
+ {"current_steps": 3496, "total_steps": 3564, "loss": 0.7394722700119019, "lr": 2.0184478038615948e-07, "epoch": 2.942760942760943, "percentage": 98.09, "elapsed_time": "1:43:12", "remaining_time": "0:02:00"}
431
+ {"current_steps": 3498, "total_steps": 3564, "loss": 0.2922773063182831, "lr": 2.0173942046813191e-07, "epoch": 2.9444444444444446, "percentage": 98.15, "elapsed_time": "1:43:16", "remaining_time": "0:01:56"}
432
+ {"current_steps": 3500, "total_steps": 3564, "loss": 0.608026385307312, "lr": 2.016371554357515e-07, "epoch": 2.9461279461279464, "percentage": 98.2, "elapsed_time": "1:43:19", "remaining_time": "0:01:53"}
433
+ {"current_steps": 3502, "total_steps": 3564, "loss": 0.684483528137207, "lr": 2.015379856413643e-07, "epoch": 2.9478114478114477, "percentage": 98.26, "elapsed_time": "1:43:23", "remaining_time": "0:01:49"}
434
+ {"current_steps": 3504, "total_steps": 3564, "loss": 0.36152565479278564, "lr": 2.01441911426652e-07, "epoch": 2.9494949494949494, "percentage": 98.32, "elapsed_time": "1:43:26", "remaining_time": "0:01:46"}
435
+ {"current_steps": 3506, "total_steps": 3564, "loss": 0.6608873009681702, "lr": 2.013489331226307e-07, "epoch": 2.951178451178451, "percentage": 98.37, "elapsed_time": "1:43:30", "remaining_time": "0:01:42"}
436
+ {"current_steps": 3508, "total_steps": 3564, "loss": 0.8101043701171875, "lr": 2.0125905104964978e-07, "epoch": 2.952861952861953, "percentage": 98.43, "elapsed_time": "1:43:33", "remaining_time": "0:01:39"}
437
+ {"current_steps": 3510, "total_steps": 3564, "loss": 0.7046741247177124, "lr": 2.0117226551739068e-07, "epoch": 2.9545454545454546, "percentage": 98.48, "elapsed_time": "1:43:37", "remaining_time": "0:01:35"}
438
+ {"current_steps": 3512, "total_steps": 3564, "loss": 0.7705718874931335, "lr": 2.0108857682486629e-07, "epoch": 2.9562289562289563, "percentage": 98.54, "elapsed_time": "1:43:40", "remaining_time": "0:01:32"}
439
+ {"current_steps": 3514, "total_steps": 3564, "loss": 0.31763288378715515, "lr": 2.0100798526041927e-07, "epoch": 2.957912457912458, "percentage": 98.6, "elapsed_time": "1:43:44", "remaining_time": "0:01:28"}
440
+ {"current_steps": 3516, "total_steps": 3564, "loss": 0.8195918202400208, "lr": 2.009304911017215e-07, "epoch": 2.9595959595959593, "percentage": 98.65, "elapsed_time": "1:43:47", "remaining_time": "0:01:25"}
441
+ {"current_steps": 3518, "total_steps": 3564, "loss": 0.871679425239563, "lr": 2.0085609461577295e-07, "epoch": 2.961279461279461, "percentage": 98.71, "elapsed_time": "1:43:51", "remaining_time": "0:01:21"}
442
+ {"current_steps": 3520, "total_steps": 3564, "loss": 0.3950427770614624, "lr": 2.0078479605890064e-07, "epoch": 2.962962962962963, "percentage": 98.77, "elapsed_time": "1:43:54", "remaining_time": "0:01:17"}
443
+ {"current_steps": 3522, "total_steps": 3564, "loss": 0.65765380859375, "lr": 2.007165956767584e-07, "epoch": 2.9646464646464645, "percentage": 98.82, "elapsed_time": "1:43:58", "remaining_time": "0:01:14"}
444
+ {"current_steps": 3524, "total_steps": 3564, "loss": 0.2477177381515503, "lr": 2.00651493704325e-07, "epoch": 2.9663299663299663, "percentage": 98.88, "elapsed_time": "1:44:01", "remaining_time": "0:01:10"}
445
+ {"current_steps": 3526, "total_steps": 3564, "loss": 0.8671658039093018, "lr": 2.0058949036590426e-07, "epoch": 2.968013468013468, "percentage": 98.93, "elapsed_time": "1:44:05", "remaining_time": "0:01:07"}
446
+ {"current_steps": 3528, "total_steps": 3564, "loss": 0.7299938201904297, "lr": 2.0053058587512378e-07, "epoch": 2.9696969696969697, "percentage": 98.99, "elapsed_time": "1:44:08", "remaining_time": "0:01:03"}
447
+ {"current_steps": 3530, "total_steps": 3564, "loss": 0.7638918161392212, "lr": 2.0047478043493418e-07, "epoch": 2.9713804713804715, "percentage": 99.05, "elapsed_time": "1:44:12", "remaining_time": "0:01:00"}
448
+ {"current_steps": 3532, "total_steps": 3564, "loss": 0.6010457873344421, "lr": 2.004220742376088e-07, "epoch": 2.973063973063973, "percentage": 99.1, "elapsed_time": "1:44:15", "remaining_time": "0:00:56"}
449
+ {"current_steps": 3534, "total_steps": 3564, "loss": 0.21666747331619263, "lr": 2.0037246746474277e-07, "epoch": 2.974747474747475, "percentage": 99.16, "elapsed_time": "1:44:19", "remaining_time": "0:00:53"}
450
+ {"current_steps": 3536, "total_steps": 3564, "loss": 0.828637421131134, "lr": 2.0032596028725204e-07, "epoch": 2.9764309764309766, "percentage": 99.21, "elapsed_time": "1:44:22", "remaining_time": "0:00:49"}
451
+ {"current_steps": 3538, "total_steps": 3564, "loss": 0.4242842197418213, "lr": 2.0028255286537355e-07, "epoch": 2.9781144781144784, "percentage": 99.27, "elapsed_time": "1:44:26", "remaining_time": "0:00:46"}
452
+ {"current_steps": 3540, "total_steps": 3564, "loss": 0.9581695795059204, "lr": 2.0024224534866408e-07, "epoch": 2.9797979797979797, "percentage": 99.33, "elapsed_time": "1:44:29", "remaining_time": "0:00:42"}
453
+ {"current_steps": 3542, "total_steps": 3564, "loss": 0.8976711630821228, "lr": 2.0020503787599998e-07, "epoch": 2.9814814814814814, "percentage": 99.38, "elapsed_time": "1:44:33", "remaining_time": "0:00:38"}
454
+ {"current_steps": 3544, "total_steps": 3564, "loss": 0.47080734372138977, "lr": 2.001709305755767e-07, "epoch": 2.983164983164983, "percentage": 99.44, "elapsed_time": "1:44:36", "remaining_time": "0:00:35"}
455
+ {"current_steps": 3546, "total_steps": 3564, "loss": 0.799166202545166, "lr": 2.0013992356490827e-07, "epoch": 2.984848484848485, "percentage": 99.49, "elapsed_time": "1:44:40", "remaining_time": "0:00:31"}
456
+ {"current_steps": 3548, "total_steps": 3564, "loss": 0.30166110396385193, "lr": 2.0011201695082687e-07, "epoch": 2.9865319865319866, "percentage": 99.55, "elapsed_time": "1:44:43", "remaining_time": "0:00:28"}
457
+ {"current_steps": 3550, "total_steps": 3564, "loss": 0.3377661108970642, "lr": 2.0008721082948243e-07, "epoch": 2.9882154882154883, "percentage": 99.61, "elapsed_time": "1:44:47", "remaining_time": "0:00:24"}
458
+ {"current_steps": 3552, "total_steps": 3564, "loss": 0.4944566488265991, "lr": 2.0006550528634258e-07, "epoch": 2.98989898989899, "percentage": 99.66, "elapsed_time": "1:44:51", "remaining_time": "0:00:21"}
459
+ {"current_steps": 3554, "total_steps": 3564, "loss": 0.9397309422492981, "lr": 2.00046900396192e-07, "epoch": 2.9915824915824913, "percentage": 99.72, "elapsed_time": "1:44:54", "remaining_time": "0:00:17"}
460
+ {"current_steps": 3556, "total_steps": 3564, "loss": 0.7814288139343262, "lr": 2.0003139622313241e-07, "epoch": 2.993265993265993, "percentage": 99.78, "elapsed_time": "1:44:58", "remaining_time": "0:00:14"}
461
+ {"current_steps": 3558, "total_steps": 3564, "loss": 0.6661207675933838, "lr": 2.0001899282058216e-07, "epoch": 2.994949494949495, "percentage": 99.83, "elapsed_time": "1:45:01", "remaining_time": "0:00:10"}
462
+ {"current_steps": 3560, "total_steps": 3564, "loss": 0.40893661975860596, "lr": 2.000096902312762e-07, "epoch": 2.9966329966329965, "percentage": 99.89, "elapsed_time": "1:45:05", "remaining_time": "0:00:07"}
463
+ {"current_steps": 3562, "total_steps": 3564, "loss": 0.5416642427444458, "lr": 2.0000348848726586e-07, "epoch": 2.9983164983164983, "percentage": 99.94, "elapsed_time": "1:45:08", "remaining_time": "0:00:03"}
464
+ {"current_steps": 3564, "total_steps": 3564, "loss": 0.361904501914978, "lr": 2.0000038760991877e-07, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:45:11", "remaining_time": "0:00:00"}
465
+ {"current_steps": 3564, "total_steps": 3564, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:45:11", "remaining_time": "0:00:00"}
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c48ca37f87877b77202dc9e74ac2f9263989b469fe6cce23cd6c393190d111d
3
+ size 5649
training_loss.png ADDED