Training in progress, step 150
Browse files- .gitattributes +1 -0
- config.json +28 -0
- merges.txt +0 -0
- model.safetensors +3 -0
- special_tokens_map.json +20 -0
- tokenizer.json +3 -0
- tokenizer_config.json +77 -0
- trainer_log.jsonl +152 -0
- training_args.bin +3 -0
- vocab.json +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen2ForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_dropout": 0.0,
|
| 6 |
+
"bos_token_id": 151643,
|
| 7 |
+
"eos_token_id": 151643,
|
| 8 |
+
"hidden_act": "silu",
|
| 9 |
+
"hidden_size": 896,
|
| 10 |
+
"initializer_range": 0.02,
|
| 11 |
+
"intermediate_size": 4864,
|
| 12 |
+
"max_position_embeddings": 131072,
|
| 13 |
+
"max_window_layers": 24,
|
| 14 |
+
"model_type": "qwen2",
|
| 15 |
+
"num_attention_heads": 14,
|
| 16 |
+
"num_hidden_layers": 24,
|
| 17 |
+
"num_key_value_heads": 2,
|
| 18 |
+
"rms_norm_eps": 1e-06,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 1000000.0,
|
| 21 |
+
"sliding_window": null,
|
| 22 |
+
"tie_word_embeddings": true,
|
| 23 |
+
"torch_dtype": "bfloat16",
|
| 24 |
+
"transformers_version": "4.51.2",
|
| 25 |
+
"use_cache": false,
|
| 26 |
+
"use_sliding_window": false,
|
| 27 |
+
"vocab_size": 194498
|
| 28 |
+
}
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48ba39a7956bd21790caf9e9b1ec7713721d6d9ec09f6197568e1cc7870db0ab
|
| 3 |
+
size 1064369000
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>"
|
| 5 |
+
],
|
| 6 |
+
"eos_token": {
|
| 7 |
+
"content": "<|im_end|>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false
|
| 12 |
+
},
|
| 13 |
+
"pad_token": {
|
| 14 |
+
"content": "<|endoftext|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false
|
| 19 |
+
}
|
| 20 |
+
}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8ec123df4ff5fd027294ce9931a2709a061268c399250056437627ea20e304f
|
| 3 |
+
size 27868597
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"11370": {
|
| 5 |
+
"content": "/************************************************************************",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": false,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
},
|
| 12 |
+
"16395": {
|
| 13 |
+
"content": "%%%%%%%%%%%%%%%%",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": false,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false,
|
| 18 |
+
"special": true
|
| 19 |
+
},
|
| 20 |
+
"33009": {
|
| 21 |
+
"content": "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%",
|
| 22 |
+
"lstrip": false,
|
| 23 |
+
"normalized": false,
|
| 24 |
+
"rstrip": false,
|
| 25 |
+
"single_word": false,
|
| 26 |
+
"special": true
|
| 27 |
+
},
|
| 28 |
+
"65080": {
|
| 29 |
+
"content": "//************************************************************************",
|
| 30 |
+
"lstrip": false,
|
| 31 |
+
"normalized": false,
|
| 32 |
+
"rstrip": false,
|
| 33 |
+
"single_word": false,
|
| 34 |
+
"special": true
|
| 35 |
+
},
|
| 36 |
+
"151643": {
|
| 37 |
+
"content": "<|endoftext|>",
|
| 38 |
+
"lstrip": false,
|
| 39 |
+
"normalized": false,
|
| 40 |
+
"rstrip": false,
|
| 41 |
+
"single_word": false,
|
| 42 |
+
"special": true
|
| 43 |
+
},
|
| 44 |
+
"151644": {
|
| 45 |
+
"content": "<|im_start|>",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": false,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false,
|
| 50 |
+
"special": true
|
| 51 |
+
},
|
| 52 |
+
"151645": {
|
| 53 |
+
"content": "<|im_end|>",
|
| 54 |
+
"lstrip": false,
|
| 55 |
+
"normalized": false,
|
| 56 |
+
"rstrip": false,
|
| 57 |
+
"single_word": false,
|
| 58 |
+
"special": true
|
| 59 |
+
}
|
| 60 |
+
},
|
| 61 |
+
"additional_special_tokens": [
|
| 62 |
+
"<|im_start|>",
|
| 63 |
+
"<|im_end|>"
|
| 64 |
+
],
|
| 65 |
+
"bos_token": null,
|
| 66 |
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are BanglaQwen, developed by AI4BD. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within /************************************************************************//************************************************************************ XML tags:\\n/************************************************************************\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n//************************************************************************<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are BanglaQwen, developed by AI4BD. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n/************************************************************************\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n//************************************************************************' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n%%%%%%%%%%%%%%%%\\n' }}\n {{- message.content }}\n {{- '\\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
| 67 |
+
"clean_up_tokenization_spaces": false,
|
| 68 |
+
"eos_token": "<|im_end|>",
|
| 69 |
+
"errors": "replace",
|
| 70 |
+
"extra_special_tokens": {},
|
| 71 |
+
"model_max_length": 4096,
|
| 72 |
+
"pad_token": "<|endoftext|>",
|
| 73 |
+
"padding_side": "right",
|
| 74 |
+
"split_special_tokens": false,
|
| 75 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 76 |
+
"unk_token": null
|
| 77 |
+
}
|
trainer_log.jsonl
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"current_steps": 1, "total_steps": 2920, "loss": 1.4935, "lr": 5e-05, "epoch": 0.003424657534246575, "percentage": 0.03, "elapsed_time": "0:00:29", "remaining_time": "23:54:03"}
|
| 2 |
+
{"current_steps": 2, "total_steps": 2920, "loss": 1.3436, "lr": 5e-05, "epoch": 0.00684931506849315, "percentage": 0.07, "elapsed_time": "0:00:47", "remaining_time": "19:21:11"}
|
| 3 |
+
{"current_steps": 3, "total_steps": 2920, "loss": 1.3261, "lr": 5e-05, "epoch": 0.010273972602739725, "percentage": 0.1, "elapsed_time": "0:01:06", "remaining_time": "17:51:23"}
|
| 4 |
+
{"current_steps": 4, "total_steps": 2920, "loss": 1.2239, "lr": 5e-05, "epoch": 0.0136986301369863, "percentage": 0.14, "elapsed_time": "0:01:24", "remaining_time": "17:06:50"}
|
| 5 |
+
{"current_steps": 5, "total_steps": 2920, "loss": 1.4355, "lr": 5e-05, "epoch": 0.017123287671232876, "percentage": 0.17, "elapsed_time": "0:01:42", "remaining_time": "16:40:05"}
|
| 6 |
+
{"current_steps": 6, "total_steps": 2920, "loss": 1.2159, "lr": 5e-05, "epoch": 0.02054794520547945, "percentage": 0.21, "elapsed_time": "0:02:01", "remaining_time": "16:22:18"}
|
| 7 |
+
{"current_steps": 7, "total_steps": 2920, "loss": 1.1885, "lr": 5e-05, "epoch": 0.023972602739726026, "percentage": 0.24, "elapsed_time": "0:02:19", "remaining_time": "16:09:43"}
|
| 8 |
+
{"current_steps": 8, "total_steps": 2920, "loss": 1.1773, "lr": 5e-05, "epoch": 0.0273972602739726, "percentage": 0.27, "elapsed_time": "0:02:38", "remaining_time": "15:59:57"}
|
| 9 |
+
{"current_steps": 9, "total_steps": 2920, "loss": 1.1569, "lr": 5e-05, "epoch": 0.030821917808219176, "percentage": 0.31, "elapsed_time": "0:02:56", "remaining_time": "15:52:16"}
|
| 10 |
+
{"current_steps": 10, "total_steps": 2920, "loss": 1.1633, "lr": 5e-05, "epoch": 0.03424657534246575, "percentage": 0.34, "elapsed_time": "0:03:15", "remaining_time": "15:46:04"}
|
| 11 |
+
{"current_steps": 11, "total_steps": 2920, "loss": 1.2799, "lr": 5e-05, "epoch": 0.03767123287671233, "percentage": 0.38, "elapsed_time": "0:03:33", "remaining_time": "15:41:05"}
|
| 12 |
+
{"current_steps": 12, "total_steps": 2920, "loss": 1.168, "lr": 5e-05, "epoch": 0.0410958904109589, "percentage": 0.41, "elapsed_time": "0:03:51", "remaining_time": "15:36:50"}
|
| 13 |
+
{"current_steps": 13, "total_steps": 2920, "loss": 1.2093, "lr": 5e-05, "epoch": 0.04452054794520548, "percentage": 0.45, "elapsed_time": "0:04:10", "remaining_time": "15:33:14"}
|
| 14 |
+
{"current_steps": 14, "total_steps": 2920, "loss": 1.1542, "lr": 5e-05, "epoch": 0.04794520547945205, "percentage": 0.48, "elapsed_time": "0:04:28", "remaining_time": "15:30:10"}
|
| 15 |
+
{"current_steps": 15, "total_steps": 2920, "loss": 1.1545, "lr": 5e-05, "epoch": 0.05136986301369863, "percentage": 0.51, "elapsed_time": "0:04:47", "remaining_time": "15:27:30"}
|
| 16 |
+
{"current_steps": 16, "total_steps": 2920, "loss": 1.1554, "lr": 5e-05, "epoch": 0.0547945205479452, "percentage": 0.55, "elapsed_time": "0:05:05", "remaining_time": "15:25:08"}
|
| 17 |
+
{"current_steps": 17, "total_steps": 2920, "loss": 1.143, "lr": 5e-05, "epoch": 0.05821917808219178, "percentage": 0.58, "elapsed_time": "0:05:24", "remaining_time": "15:22:53"}
|
| 18 |
+
{"current_steps": 18, "total_steps": 2920, "loss": 1.1375, "lr": 5e-05, "epoch": 0.06164383561643835, "percentage": 0.62, "elapsed_time": "0:05:42", "remaining_time": "15:20:53"}
|
| 19 |
+
{"current_steps": 19, "total_steps": 2920, "loss": 1.1397, "lr": 5e-05, "epoch": 0.06506849315068493, "percentage": 0.65, "elapsed_time": "0:06:01", "remaining_time": "15:18:59"}
|
| 20 |
+
{"current_steps": 20, "total_steps": 2920, "loss": 1.145, "lr": 5e-05, "epoch": 0.0684931506849315, "percentage": 0.68, "elapsed_time": "0:06:19", "remaining_time": "15:17:18"}
|
| 21 |
+
{"current_steps": 21, "total_steps": 2920, "loss": 1.2124, "lr": 5e-05, "epoch": 0.07191780821917808, "percentage": 0.72, "elapsed_time": "0:06:37", "remaining_time": "15:15:42"}
|
| 22 |
+
{"current_steps": 22, "total_steps": 2920, "loss": 1.1434, "lr": 5e-05, "epoch": 0.07534246575342465, "percentage": 0.75, "elapsed_time": "0:06:56", "remaining_time": "15:14:17"}
|
| 23 |
+
{"current_steps": 23, "total_steps": 2920, "loss": 1.1397, "lr": 5e-05, "epoch": 0.07876712328767123, "percentage": 0.79, "elapsed_time": "0:07:14", "remaining_time": "15:12:58"}
|
| 24 |
+
{"current_steps": 24, "total_steps": 2920, "loss": 1.122, "lr": 5e-05, "epoch": 0.0821917808219178, "percentage": 0.82, "elapsed_time": "0:07:33", "remaining_time": "15:11:42"}
|
| 25 |
+
{"current_steps": 25, "total_steps": 2920, "loss": 1.1376, "lr": 5e-05, "epoch": 0.08561643835616438, "percentage": 0.86, "elapsed_time": "0:07:51", "remaining_time": "15:10:31"}
|
| 26 |
+
{"current_steps": 26, "total_steps": 2920, "loss": 1.1414, "lr": 5e-05, "epoch": 0.08904109589041095, "percentage": 0.89, "elapsed_time": "0:08:10", "remaining_time": "15:09:26"}
|
| 27 |
+
{"current_steps": 27, "total_steps": 2920, "loss": 1.1469, "lr": 5e-05, "epoch": 0.09246575342465753, "percentage": 0.92, "elapsed_time": "0:08:28", "remaining_time": "15:08:28"}
|
| 28 |
+
{"current_steps": 28, "total_steps": 2920, "loss": 1.126, "lr": 5e-05, "epoch": 0.0958904109589041, "percentage": 0.96, "elapsed_time": "0:08:47", "remaining_time": "15:07:29"}
|
| 29 |
+
{"current_steps": 29, "total_steps": 2920, "loss": 1.145, "lr": 5e-05, "epoch": 0.09931506849315068, "percentage": 0.99, "elapsed_time": "0:09:05", "remaining_time": "15:06:34"}
|
| 30 |
+
{"current_steps": 30, "total_steps": 2920, "loss": 1.1656, "lr": 5e-05, "epoch": 0.10273972602739725, "percentage": 1.03, "elapsed_time": "0:09:24", "remaining_time": "15:05:44"}
|
| 31 |
+
{"current_steps": 31, "total_steps": 2920, "loss": 1.1404, "lr": 5e-05, "epoch": 0.10616438356164383, "percentage": 1.06, "elapsed_time": "0:09:42", "remaining_time": "15:04:56"}
|
| 32 |
+
{"current_steps": 32, "total_steps": 2920, "loss": 1.1347, "lr": 5e-05, "epoch": 0.1095890410958904, "percentage": 1.1, "elapsed_time": "0:10:01", "remaining_time": "15:04:11"}
|
| 33 |
+
{"current_steps": 33, "total_steps": 2920, "loss": 1.1156, "lr": 5e-05, "epoch": 0.11301369863013698, "percentage": 1.13, "elapsed_time": "0:10:19", "remaining_time": "15:03:24"}
|
| 34 |
+
{"current_steps": 34, "total_steps": 2920, "loss": 1.1382, "lr": 5e-05, "epoch": 0.11643835616438356, "percentage": 1.16, "elapsed_time": "0:10:38", "remaining_time": "15:02:41"}
|
| 35 |
+
{"current_steps": 35, "total_steps": 2920, "loss": 1.1639, "lr": 5e-05, "epoch": 0.11986301369863013, "percentage": 1.2, "elapsed_time": "0:10:56", "remaining_time": "15:01:55"}
|
| 36 |
+
{"current_steps": 36, "total_steps": 2920, "loss": 1.1248, "lr": 5e-05, "epoch": 0.1232876712328767, "percentage": 1.23, "elapsed_time": "0:11:14", "remaining_time": "15:01:14"}
|
| 37 |
+
{"current_steps": 37, "total_steps": 2920, "loss": 1.1312, "lr": 5e-05, "epoch": 0.1267123287671233, "percentage": 1.27, "elapsed_time": "0:11:33", "remaining_time": "15:00:34"}
|
| 38 |
+
{"current_steps": 38, "total_steps": 2920, "loss": 1.1198, "lr": 5e-05, "epoch": 0.13013698630136986, "percentage": 1.3, "elapsed_time": "0:11:51", "remaining_time": "14:59:57"}
|
| 39 |
+
{"current_steps": 39, "total_steps": 2920, "loss": 1.1168, "lr": 5e-05, "epoch": 0.13356164383561644, "percentage": 1.34, "elapsed_time": "0:12:10", "remaining_time": "14:59:21"}
|
| 40 |
+
{"current_steps": 40, "total_steps": 2920, "loss": 1.132, "lr": 5e-05, "epoch": 0.136986301369863, "percentage": 1.37, "elapsed_time": "0:12:28", "remaining_time": "14:58:43"}
|
| 41 |
+
{"current_steps": 41, "total_steps": 2920, "loss": 1.1054, "lr": 5e-05, "epoch": 0.1404109589041096, "percentage": 1.4, "elapsed_time": "0:12:47", "remaining_time": "14:58:07"}
|
| 42 |
+
{"current_steps": 42, "total_steps": 2920, "loss": 1.114, "lr": 5e-05, "epoch": 0.14383561643835616, "percentage": 1.44, "elapsed_time": "0:13:05", "remaining_time": "14:57:32"}
|
| 43 |
+
{"current_steps": 43, "total_steps": 2920, "loss": 1.1548, "lr": 5e-05, "epoch": 0.14726027397260275, "percentage": 1.47, "elapsed_time": "0:13:24", "remaining_time": "14:56:59"}
|
| 44 |
+
{"current_steps": 44, "total_steps": 2920, "loss": 1.1145, "lr": 5e-05, "epoch": 0.1506849315068493, "percentage": 1.51, "elapsed_time": "0:13:42", "remaining_time": "14:56:27"}
|
| 45 |
+
{"current_steps": 45, "total_steps": 2920, "loss": 1.1042, "lr": 5e-05, "epoch": 0.1541095890410959, "percentage": 1.54, "elapsed_time": "0:14:01", "remaining_time": "14:55:56"}
|
| 46 |
+
{"current_steps": 46, "total_steps": 2920, "loss": 1.1457, "lr": 5e-05, "epoch": 0.15753424657534246, "percentage": 1.58, "elapsed_time": "0:14:19", "remaining_time": "14:55:21"}
|
| 47 |
+
{"current_steps": 47, "total_steps": 2920, "loss": 1.1522, "lr": 5e-05, "epoch": 0.16095890410958905, "percentage": 1.61, "elapsed_time": "0:14:38", "remaining_time": "14:54:49"}
|
| 48 |
+
{"current_steps": 48, "total_steps": 2920, "loss": 1.0885, "lr": 5e-05, "epoch": 0.1643835616438356, "percentage": 1.64, "elapsed_time": "0:14:56", "remaining_time": "14:54:17"}
|
| 49 |
+
{"current_steps": 49, "total_steps": 2920, "loss": 1.1329, "lr": 5e-05, "epoch": 0.1678082191780822, "percentage": 1.68, "elapsed_time": "0:15:15", "remaining_time": "14:53:46"}
|
| 50 |
+
{"current_steps": 50, "total_steps": 2920, "loss": 1.1298, "lr": 5e-05, "epoch": 0.17123287671232876, "percentage": 1.71, "elapsed_time": "0:15:33", "remaining_time": "14:53:16"}
|
| 51 |
+
{"current_steps": 51, "total_steps": 2920, "loss": 1.1355, "lr": 5e-05, "epoch": 0.17465753424657535, "percentage": 1.75, "elapsed_time": "0:15:52", "remaining_time": "14:52:47"}
|
| 52 |
+
{"current_steps": 52, "total_steps": 2920, "loss": 1.1248, "lr": 5e-05, "epoch": 0.1780821917808219, "percentage": 1.78, "elapsed_time": "0:16:10", "remaining_time": "14:52:18"}
|
| 53 |
+
{"current_steps": 53, "total_steps": 2920, "loss": 1.1203, "lr": 5e-05, "epoch": 0.1815068493150685, "percentage": 1.82, "elapsed_time": "0:16:29", "remaining_time": "14:51:50"}
|
| 54 |
+
{"current_steps": 54, "total_steps": 2920, "loss": 1.1076, "lr": 5e-05, "epoch": 0.18493150684931506, "percentage": 1.85, "elapsed_time": "0:16:47", "remaining_time": "14:51:21"}
|
| 55 |
+
{"current_steps": 55, "total_steps": 2920, "loss": 1.1492, "lr": 5e-05, "epoch": 0.18835616438356165, "percentage": 1.88, "elapsed_time": "0:17:06", "remaining_time": "14:50:56"}
|
| 56 |
+
{"current_steps": 56, "total_steps": 2920, "loss": 1.2298, "lr": 5e-05, "epoch": 0.1917808219178082, "percentage": 1.92, "elapsed_time": "0:17:24", "remaining_time": "14:50:29"}
|
| 57 |
+
{"current_steps": 57, "total_steps": 2920, "loss": 1.1544, "lr": 5e-05, "epoch": 0.1952054794520548, "percentage": 1.95, "elapsed_time": "0:17:43", "remaining_time": "14:50:02"}
|
| 58 |
+
{"current_steps": 58, "total_steps": 2920, "loss": 1.1092, "lr": 5e-05, "epoch": 0.19863013698630136, "percentage": 1.99, "elapsed_time": "0:18:01", "remaining_time": "14:49:36"}
|
| 59 |
+
{"current_steps": 59, "total_steps": 2920, "loss": 1.1411, "lr": 5e-05, "epoch": 0.20205479452054795, "percentage": 2.02, "elapsed_time": "0:18:20", "remaining_time": "14:49:09"}
|
| 60 |
+
{"current_steps": 60, "total_steps": 2920, "loss": 1.1404, "lr": 5e-05, "epoch": 0.2054794520547945, "percentage": 2.05, "elapsed_time": "0:18:38", "remaining_time": "14:48:43"}
|
| 61 |
+
{"current_steps": 61, "total_steps": 2920, "loss": 1.1204, "lr": 5e-05, "epoch": 0.2089041095890411, "percentage": 2.09, "elapsed_time": "0:18:57", "remaining_time": "14:48:17"}
|
| 62 |
+
{"current_steps": 62, "total_steps": 2920, "loss": 1.1135, "lr": 5e-05, "epoch": 0.21232876712328766, "percentage": 2.12, "elapsed_time": "0:19:15", "remaining_time": "14:47:52"}
|
| 63 |
+
{"current_steps": 63, "total_steps": 2920, "loss": 1.132, "lr": 5e-05, "epoch": 0.21575342465753425, "percentage": 2.16, "elapsed_time": "0:19:34", "remaining_time": "14:47:28"}
|
| 64 |
+
{"current_steps": 64, "total_steps": 2920, "loss": 1.1232, "lr": 5e-05, "epoch": 0.2191780821917808, "percentage": 2.19, "elapsed_time": "0:19:52", "remaining_time": "14:47:01"}
|
| 65 |
+
{"current_steps": 65, "total_steps": 2920, "loss": 1.0916, "lr": 5e-05, "epoch": 0.2226027397260274, "percentage": 2.23, "elapsed_time": "0:20:11", "remaining_time": "14:46:37"}
|
| 66 |
+
{"current_steps": 66, "total_steps": 2920, "loss": 1.0963, "lr": 5e-05, "epoch": 0.22602739726027396, "percentage": 2.26, "elapsed_time": "0:20:29", "remaining_time": "14:46:13"}
|
| 67 |
+
{"current_steps": 67, "total_steps": 2920, "loss": 1.1201, "lr": 5e-05, "epoch": 0.22945205479452055, "percentage": 2.29, "elapsed_time": "0:20:48", "remaining_time": "14:45:50"}
|
| 68 |
+
{"current_steps": 68, "total_steps": 2920, "loss": 1.1016, "lr": 5e-05, "epoch": 0.2328767123287671, "percentage": 2.33, "elapsed_time": "0:21:06", "remaining_time": "14:45:28"}
|
| 69 |
+
{"current_steps": 69, "total_steps": 2920, "loss": 1.1275, "lr": 5e-05, "epoch": 0.2363013698630137, "percentage": 2.36, "elapsed_time": "0:21:25", "remaining_time": "14:45:04"}
|
| 70 |
+
{"current_steps": 70, "total_steps": 2920, "loss": 1.2119, "lr": 5e-05, "epoch": 0.23972602739726026, "percentage": 2.4, "elapsed_time": "0:21:43", "remaining_time": "14:44:40"}
|
| 71 |
+
{"current_steps": 71, "total_steps": 2920, "loss": 1.1137, "lr": 5e-05, "epoch": 0.24315068493150685, "percentage": 2.43, "elapsed_time": "0:22:02", "remaining_time": "14:44:18"}
|
| 72 |
+
{"current_steps": 72, "total_steps": 2920, "loss": 1.1247, "lr": 5e-05, "epoch": 0.2465753424657534, "percentage": 2.47, "elapsed_time": "0:22:20", "remaining_time": "14:43:55"}
|
| 73 |
+
{"current_steps": 73, "total_steps": 2920, "loss": 1.1107, "lr": 5e-05, "epoch": 0.25, "percentage": 2.5, "elapsed_time": "0:22:39", "remaining_time": "14:43:32"}
|
| 74 |
+
{"current_steps": 74, "total_steps": 2920, "loss": 1.1278, "lr": 5e-05, "epoch": 0.2534246575342466, "percentage": 2.53, "elapsed_time": "0:22:57", "remaining_time": "14:43:10"}
|
| 75 |
+
{"current_steps": 75, "total_steps": 2920, "loss": 1.095, "lr": 5e-05, "epoch": 0.2568493150684932, "percentage": 2.57, "elapsed_time": "0:23:16", "remaining_time": "14:42:49"}
|
| 76 |
+
{"current_steps": 76, "total_steps": 2920, "loss": 1.0953, "lr": 5e-05, "epoch": 0.2602739726027397, "percentage": 2.6, "elapsed_time": "0:23:34", "remaining_time": "14:42:26"}
|
| 77 |
+
{"current_steps": 77, "total_steps": 2920, "loss": 1.2623, "lr": 5e-05, "epoch": 0.2636986301369863, "percentage": 2.64, "elapsed_time": "0:23:53", "remaining_time": "14:42:03"}
|
| 78 |
+
{"current_steps": 78, "total_steps": 2920, "loss": 1.1161, "lr": 5e-05, "epoch": 0.2671232876712329, "percentage": 2.67, "elapsed_time": "0:24:11", "remaining_time": "14:41:40"}
|
| 79 |
+
{"current_steps": 79, "total_steps": 2920, "loss": 1.1358, "lr": 5e-05, "epoch": 0.2705479452054795, "percentage": 2.71, "elapsed_time": "0:24:30", "remaining_time": "14:41:17"}
|
| 80 |
+
{"current_steps": 80, "total_steps": 2920, "loss": 1.1127, "lr": 5e-05, "epoch": 0.273972602739726, "percentage": 2.74, "elapsed_time": "0:24:48", "remaining_time": "14:40:55"}
|
| 81 |
+
{"current_steps": 81, "total_steps": 2920, "loss": 1.1163, "lr": 5e-05, "epoch": 0.2773972602739726, "percentage": 2.77, "elapsed_time": "0:25:07", "remaining_time": "14:40:34"}
|
| 82 |
+
{"current_steps": 82, "total_steps": 2920, "loss": 1.112, "lr": 5e-05, "epoch": 0.2808219178082192, "percentage": 2.81, "elapsed_time": "0:25:25", "remaining_time": "14:40:13"}
|
| 83 |
+
{"current_steps": 83, "total_steps": 2920, "loss": 1.115, "lr": 5e-05, "epoch": 0.2842465753424658, "percentage": 2.84, "elapsed_time": "0:25:44", "remaining_time": "14:39:51"}
|
| 84 |
+
{"current_steps": 84, "total_steps": 2920, "loss": 1.1481, "lr": 5e-05, "epoch": 0.2876712328767123, "percentage": 2.88, "elapsed_time": "0:26:02", "remaining_time": "14:39:29"}
|
| 85 |
+
{"current_steps": 85, "total_steps": 2920, "loss": 1.1136, "lr": 5e-05, "epoch": 0.2910958904109589, "percentage": 2.91, "elapsed_time": "0:26:21", "remaining_time": "14:39:05"}
|
| 86 |
+
{"current_steps": 86, "total_steps": 2920, "loss": 1.1163, "lr": 5e-05, "epoch": 0.2945205479452055, "percentage": 2.95, "elapsed_time": "0:26:39", "remaining_time": "14:38:43"}
|
| 87 |
+
{"current_steps": 87, "total_steps": 2920, "loss": 1.1247, "lr": 5e-05, "epoch": 0.2979452054794521, "percentage": 2.98, "elapsed_time": "0:26:58", "remaining_time": "14:38:21"}
|
| 88 |
+
{"current_steps": 88, "total_steps": 2920, "loss": 1.0972, "lr": 5e-05, "epoch": 0.3013698630136986, "percentage": 3.01, "elapsed_time": "0:27:16", "remaining_time": "14:37:59"}
|
| 89 |
+
{"current_steps": 89, "total_steps": 2920, "loss": 1.1206, "lr": 5e-05, "epoch": 0.3047945205479452, "percentage": 3.05, "elapsed_time": "0:27:35", "remaining_time": "14:37:37"}
|
| 90 |
+
{"current_steps": 90, "total_steps": 2920, "loss": 1.1232, "lr": 5e-05, "epoch": 0.3082191780821918, "percentage": 3.08, "elapsed_time": "0:27:53", "remaining_time": "14:37:15"}
|
| 91 |
+
{"current_steps": 91, "total_steps": 2920, "loss": 1.1255, "lr": 5e-05, "epoch": 0.3116438356164384, "percentage": 3.12, "elapsed_time": "0:28:12", "remaining_time": "14:36:54"}
|
| 92 |
+
{"current_steps": 92, "total_steps": 2920, "loss": 1.1293, "lr": 5e-05, "epoch": 0.3150684931506849, "percentage": 3.15, "elapsed_time": "0:28:30", "remaining_time": "14:36:31"}
|
| 93 |
+
{"current_steps": 93, "total_steps": 2920, "loss": 1.1486, "lr": 5e-05, "epoch": 0.3184931506849315, "percentage": 3.18, "elapsed_time": "0:28:49", "remaining_time": "14:36:11"}
|
| 94 |
+
{"current_steps": 94, "total_steps": 2920, "loss": 1.121, "lr": 5e-05, "epoch": 0.3219178082191781, "percentage": 3.22, "elapsed_time": "0:29:07", "remaining_time": "14:35:49"}
|
| 95 |
+
{"current_steps": 95, "total_steps": 2920, "loss": 1.1082, "lr": 5e-05, "epoch": 0.3253424657534247, "percentage": 3.25, "elapsed_time": "0:29:26", "remaining_time": "14:35:28"}
|
| 96 |
+
{"current_steps": 96, "total_steps": 2920, "loss": 1.1624, "lr": 5e-05, "epoch": 0.3287671232876712, "percentage": 3.29, "elapsed_time": "0:29:44", "remaining_time": "14:35:05"}
|
| 97 |
+
{"current_steps": 97, "total_steps": 2920, "loss": 1.1889, "lr": 5e-05, "epoch": 0.3321917808219178, "percentage": 3.32, "elapsed_time": "0:30:03", "remaining_time": "14:34:44"}
|
| 98 |
+
{"current_steps": 98, "total_steps": 2920, "loss": 1.1172, "lr": 5e-05, "epoch": 0.3356164383561644, "percentage": 3.36, "elapsed_time": "0:30:21", "remaining_time": "14:34:22"}
|
| 99 |
+
{"current_steps": 99, "total_steps": 2920, "loss": 1.0992, "lr": 5e-05, "epoch": 0.339041095890411, "percentage": 3.39, "elapsed_time": "0:30:40", "remaining_time": "14:34:01"}
|
| 100 |
+
{"current_steps": 100, "total_steps": 2920, "loss": 1.118, "lr": 5e-05, "epoch": 0.3424657534246575, "percentage": 3.42, "elapsed_time": "0:30:58", "remaining_time": "14:33:39"}
|
| 101 |
+
{"current_steps": 101, "total_steps": 2920, "loss": 1.1317, "lr": 5e-05, "epoch": 0.3458904109589041, "percentage": 3.46, "elapsed_time": "0:31:17", "remaining_time": "14:33:18"}
|
| 102 |
+
{"current_steps": 102, "total_steps": 2920, "loss": 1.1086, "lr": 5e-05, "epoch": 0.3493150684931507, "percentage": 3.49, "elapsed_time": "0:31:35", "remaining_time": "14:32:56"}
|
| 103 |
+
{"current_steps": 103, "total_steps": 2920, "loss": 1.0983, "lr": 5e-05, "epoch": 0.3527397260273973, "percentage": 3.53, "elapsed_time": "0:31:54", "remaining_time": "14:32:34"}
|
| 104 |
+
{"current_steps": 104, "total_steps": 2920, "loss": 1.1044, "lr": 5e-05, "epoch": 0.3561643835616438, "percentage": 3.56, "elapsed_time": "0:32:12", "remaining_time": "14:32:12"}
|
| 105 |
+
{"current_steps": 105, "total_steps": 2920, "loss": 1.2, "lr": 5e-05, "epoch": 0.3595890410958904, "percentage": 3.6, "elapsed_time": "0:32:31", "remaining_time": "14:31:51"}
|
| 106 |
+
{"current_steps": 106, "total_steps": 2920, "loss": 1.131, "lr": 5e-05, "epoch": 0.363013698630137, "percentage": 3.63, "elapsed_time": "0:32:49", "remaining_time": "14:31:30"}
|
| 107 |
+
{"current_steps": 107, "total_steps": 2920, "loss": 1.1263, "lr": 5e-05, "epoch": 0.3664383561643836, "percentage": 3.66, "elapsed_time": "0:33:08", "remaining_time": "14:31:08"}
|
| 108 |
+
{"current_steps": 108, "total_steps": 2920, "loss": 1.1249, "lr": 5e-05, "epoch": 0.3698630136986301, "percentage": 3.7, "elapsed_time": "0:33:26", "remaining_time": "14:30:48"}
|
| 109 |
+
{"current_steps": 109, "total_steps": 2920, "loss": 1.1184, "lr": 5e-05, "epoch": 0.3732876712328767, "percentage": 3.73, "elapsed_time": "0:33:45", "remaining_time": "14:30:26"}
|
| 110 |
+
{"current_steps": 110, "total_steps": 2920, "loss": 1.1222, "lr": 5e-05, "epoch": 0.3767123287671233, "percentage": 3.77, "elapsed_time": "0:34:03", "remaining_time": "14:30:05"}
|
| 111 |
+
{"current_steps": 111, "total_steps": 2920, "loss": 1.0973, "lr": 5e-05, "epoch": 0.3801369863013699, "percentage": 3.8, "elapsed_time": "0:34:22", "remaining_time": "14:29:43"}
|
| 112 |
+
{"current_steps": 112, "total_steps": 2920, "loss": 1.1438, "lr": 5e-05, "epoch": 0.3835616438356164, "percentage": 3.84, "elapsed_time": "0:34:40", "remaining_time": "14:29:21"}
|
| 113 |
+
{"current_steps": 113, "total_steps": 2920, "loss": 1.1043, "lr": 5e-05, "epoch": 0.386986301369863, "percentage": 3.87, "elapsed_time": "0:34:59", "remaining_time": "14:29:01"}
|
| 114 |
+
{"current_steps": 114, "total_steps": 2920, "loss": 1.108, "lr": 5e-05, "epoch": 0.3904109589041096, "percentage": 3.9, "elapsed_time": "0:35:17", "remaining_time": "14:28:41"}
|
| 115 |
+
{"current_steps": 115, "total_steps": 2920, "loss": 1.0916, "lr": 5e-05, "epoch": 0.3938356164383562, "percentage": 3.94, "elapsed_time": "0:35:35", "remaining_time": "14:28:19"}
|
| 116 |
+
{"current_steps": 116, "total_steps": 2920, "loss": 1.1323, "lr": 5e-05, "epoch": 0.3972602739726027, "percentage": 3.97, "elapsed_time": "0:35:54", "remaining_time": "14:27:59"}
|
| 117 |
+
{"current_steps": 117, "total_steps": 2920, "loss": 1.1272, "lr": 5e-05, "epoch": 0.4006849315068493, "percentage": 4.01, "elapsed_time": "0:36:12", "remaining_time": "14:27:38"}
|
| 118 |
+
{"current_steps": 118, "total_steps": 2920, "loss": 1.1339, "lr": 5e-05, "epoch": 0.4041095890410959, "percentage": 4.04, "elapsed_time": "0:36:31", "remaining_time": "14:27:18"}
|
| 119 |
+
{"current_steps": 119, "total_steps": 2920, "loss": 1.1089, "lr": 5e-05, "epoch": 0.4075342465753425, "percentage": 4.08, "elapsed_time": "0:36:49", "remaining_time": "14:26:57"}
|
| 120 |
+
{"current_steps": 120, "total_steps": 2920, "loss": 1.127, "lr": 5e-05, "epoch": 0.410958904109589, "percentage": 4.11, "elapsed_time": "0:37:08", "remaining_time": "14:26:37"}
|
| 121 |
+
{"current_steps": 121, "total_steps": 2920, "loss": 1.1252, "lr": 5e-05, "epoch": 0.4143835616438356, "percentage": 4.14, "elapsed_time": "0:37:26", "remaining_time": "14:26:16"}
|
| 122 |
+
{"current_steps": 122, "total_steps": 2920, "loss": 1.112, "lr": 5e-05, "epoch": 0.4178082191780822, "percentage": 4.18, "elapsed_time": "0:37:45", "remaining_time": "14:25:56"}
|
| 123 |
+
{"current_steps": 123, "total_steps": 2920, "loss": 1.1238, "lr": 5e-05, "epoch": 0.4212328767123288, "percentage": 4.21, "elapsed_time": "0:38:03", "remaining_time": "14:25:35"}
|
| 124 |
+
{"current_steps": 124, "total_steps": 2920, "loss": 1.1324, "lr": 5e-05, "epoch": 0.4246575342465753, "percentage": 4.25, "elapsed_time": "0:38:22", "remaining_time": "14:25:13"}
|
| 125 |
+
{"current_steps": 125, "total_steps": 2920, "loss": 1.1103, "lr": 5e-05, "epoch": 0.4280821917808219, "percentage": 4.28, "elapsed_time": "0:38:40", "remaining_time": "14:24:53"}
|
| 126 |
+
{"current_steps": 126, "total_steps": 2920, "loss": 1.1232, "lr": 5e-05, "epoch": 0.4315068493150685, "percentage": 4.32, "elapsed_time": "0:38:59", "remaining_time": "14:24:33"}
|
| 127 |
+
{"current_steps": 127, "total_steps": 2920, "loss": 1.1178, "lr": 5e-05, "epoch": 0.4349315068493151, "percentage": 4.35, "elapsed_time": "0:39:17", "remaining_time": "14:24:13"}
|
| 128 |
+
{"current_steps": 128, "total_steps": 2920, "loss": 1.1253, "lr": 5e-05, "epoch": 0.4383561643835616, "percentage": 4.38, "elapsed_time": "0:39:36", "remaining_time": "14:23:52"}
|
| 129 |
+
{"current_steps": 129, "total_steps": 2920, "loss": 1.1222, "lr": 5e-05, "epoch": 0.4417808219178082, "percentage": 4.42, "elapsed_time": "0:39:54", "remaining_time": "14:23:31"}
|
| 130 |
+
{"current_steps": 130, "total_steps": 2920, "loss": 1.1265, "lr": 5e-05, "epoch": 0.4452054794520548, "percentage": 4.45, "elapsed_time": "0:40:13", "remaining_time": "14:23:11"}
|
| 131 |
+
{"current_steps": 131, "total_steps": 2920, "loss": 1.1103, "lr": 5e-05, "epoch": 0.4486301369863014, "percentage": 4.49, "elapsed_time": "0:40:31", "remaining_time": "14:22:51"}
|
| 132 |
+
{"current_steps": 132, "total_steps": 2920, "loss": 1.1264, "lr": 5e-05, "epoch": 0.4520547945205479, "percentage": 4.52, "elapsed_time": "0:40:50", "remaining_time": "14:22:30"}
|
| 133 |
+
{"current_steps": 133, "total_steps": 2920, "loss": 1.1154, "lr": 5e-05, "epoch": 0.4554794520547945, "percentage": 4.55, "elapsed_time": "0:41:08", "remaining_time": "14:22:10"}
|
| 134 |
+
{"current_steps": 134, "total_steps": 2920, "loss": 1.0984, "lr": 5e-05, "epoch": 0.4589041095890411, "percentage": 4.59, "elapsed_time": "0:41:27", "remaining_time": "14:21:49"}
|
| 135 |
+
{"current_steps": 135, "total_steps": 2920, "loss": 1.1134, "lr": 5e-05, "epoch": 0.4623287671232877, "percentage": 4.62, "elapsed_time": "0:41:45", "remaining_time": "14:21:29"}
|
| 136 |
+
{"current_steps": 136, "total_steps": 2920, "loss": 1.1195, "lr": 5e-05, "epoch": 0.4657534246575342, "percentage": 4.66, "elapsed_time": "0:42:04", "remaining_time": "14:21:09"}
|
| 137 |
+
{"current_steps": 137, "total_steps": 2920, "loss": 1.1035, "lr": 5e-05, "epoch": 0.4691780821917808, "percentage": 4.69, "elapsed_time": "0:42:22", "remaining_time": "14:20:50"}
|
| 138 |
+
{"current_steps": 138, "total_steps": 2920, "loss": 1.1132, "lr": 5e-05, "epoch": 0.4726027397260274, "percentage": 4.73, "elapsed_time": "0:42:41", "remaining_time": "14:20:31"}
|
| 139 |
+
{"current_steps": 139, "total_steps": 2920, "loss": 1.1077, "lr": 5e-05, "epoch": 0.476027397260274, "percentage": 4.76, "elapsed_time": "0:42:59", "remaining_time": "14:20:11"}
|
| 140 |
+
{"current_steps": 140, "total_steps": 2920, "loss": 1.1238, "lr": 5e-05, "epoch": 0.4794520547945205, "percentage": 4.79, "elapsed_time": "0:43:18", "remaining_time": "14:19:51"}
|
| 141 |
+
{"current_steps": 141, "total_steps": 2920, "loss": 1.1422, "lr": 5e-05, "epoch": 0.4828767123287671, "percentage": 4.83, "elapsed_time": "0:43:36", "remaining_time": "14:19:32"}
|
| 142 |
+
{"current_steps": 142, "total_steps": 2920, "loss": 1.1256, "lr": 5e-05, "epoch": 0.4863013698630137, "percentage": 4.86, "elapsed_time": "0:43:55", "remaining_time": "14:19:13"}
|
| 143 |
+
{"current_steps": 143, "total_steps": 2920, "loss": 1.1296, "lr": 5e-05, "epoch": 0.4897260273972603, "percentage": 4.9, "elapsed_time": "0:44:13", "remaining_time": "14:18:54"}
|
| 144 |
+
{"current_steps": 144, "total_steps": 2920, "loss": 1.128, "lr": 5e-05, "epoch": 0.4931506849315068, "percentage": 4.93, "elapsed_time": "0:44:32", "remaining_time": "14:18:34"}
|
| 145 |
+
{"current_steps": 145, "total_steps": 2920, "loss": 1.0962, "lr": 5e-05, "epoch": 0.4965753424657534, "percentage": 4.97, "elapsed_time": "0:44:50", "remaining_time": "14:18:15"}
|
| 146 |
+
{"current_steps": 146, "total_steps": 2920, "loss": 1.1024, "lr": 5e-05, "epoch": 0.5, "percentage": 5.0, "elapsed_time": "0:45:09", "remaining_time": "14:17:55"}
|
| 147 |
+
{"current_steps": 147, "total_steps": 2920, "loss": 1.1218, "lr": 5e-05, "epoch": 0.5034246575342466, "percentage": 5.03, "elapsed_time": "0:45:27", "remaining_time": "14:17:36"}
|
| 148 |
+
{"current_steps": 148, "total_steps": 2920, "loss": 1.0957, "lr": 5e-05, "epoch": 0.5068493150684932, "percentage": 5.07, "elapsed_time": "0:45:46", "remaining_time": "14:17:15"}
|
| 149 |
+
{"current_steps": 149, "total_steps": 2920, "loss": 1.0843, "lr": 5e-05, "epoch": 0.5102739726027398, "percentage": 5.1, "elapsed_time": "0:46:04", "remaining_time": "14:16:56"}
|
| 150 |
+
{"current_steps": 150, "total_steps": 2920, "loss": 1.1122, "lr": 5e-05, "epoch": 0.5136986301369864, "percentage": 5.14, "elapsed_time": "0:46:23", "remaining_time": "14:16:36"}
|
| 151 |
+
{"current_steps": 151, "total_steps": 2920, "loss": 1.0908, "lr": 5e-05, "epoch": 0.5171232876712328, "percentage": 5.17, "elapsed_time": "0:46:48", "remaining_time": "14:18:19"}
|
| 152 |
+
{"current_steps": 152, "total_steps": 2920, "loss": 1.1186, "lr": 5e-05, "epoch": 0.5205479452054794, "percentage": 5.21, "elapsed_time": "0:47:06", "remaining_time": "14:17:58"}
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68a67ec30ff5cb47be6a276e82e1bf59098cbbf4681a58e2342e0425186d8490
|
| 3 |
+
size 7608
|
vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|