Training in progress, step 50
Browse files- .gitattributes +1 -0
- config.json +28 -0
- merges.txt +0 -0
- model.safetensors +3 -0
- special_tokens_map.json +20 -0
- tokenizer.json +3 -0
- tokenizer_config.json +77 -0
- trainer_log.jsonl +52 -0
- training_args.bin +3 -0
- vocab.json +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen2ForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_dropout": 0.0,
|
| 6 |
+
"bos_token_id": 151643,
|
| 7 |
+
"eos_token_id": 151643,
|
| 8 |
+
"hidden_act": "silu",
|
| 9 |
+
"hidden_size": 896,
|
| 10 |
+
"initializer_range": 0.02,
|
| 11 |
+
"intermediate_size": 4864,
|
| 12 |
+
"max_position_embeddings": 131072,
|
| 13 |
+
"max_window_layers": 24,
|
| 14 |
+
"model_type": "qwen2",
|
| 15 |
+
"num_attention_heads": 14,
|
| 16 |
+
"num_hidden_layers": 24,
|
| 17 |
+
"num_key_value_heads": 2,
|
| 18 |
+
"rms_norm_eps": 1e-06,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 1000000.0,
|
| 21 |
+
"sliding_window": null,
|
| 22 |
+
"tie_word_embeddings": true,
|
| 23 |
+
"torch_dtype": "bfloat16",
|
| 24 |
+
"transformers_version": "4.51.2",
|
| 25 |
+
"use_cache": false,
|
| 26 |
+
"use_sliding_window": false,
|
| 27 |
+
"vocab_size": 194498
|
| 28 |
+
}
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3742d6597fbd10d5734b76ff214315b8c5a5ed0e9898f3efe7fa41be5f1c5ad9
|
| 3 |
+
size 1064369000
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>"
|
| 5 |
+
],
|
| 6 |
+
"eos_token": {
|
| 7 |
+
"content": "<|im_end|>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false
|
| 12 |
+
},
|
| 13 |
+
"pad_token": {
|
| 14 |
+
"content": "<|endoftext|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false
|
| 19 |
+
}
|
| 20 |
+
}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8ec123df4ff5fd027294ce9931a2709a061268c399250056437627ea20e304f
|
| 3 |
+
size 27868597
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"11370": {
|
| 5 |
+
"content": "/************************************************************************",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": false,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
},
|
| 12 |
+
"16395": {
|
| 13 |
+
"content": "%%%%%%%%%%%%%%%%",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": false,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false,
|
| 18 |
+
"special": true
|
| 19 |
+
},
|
| 20 |
+
"33009": {
|
| 21 |
+
"content": "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%",
|
| 22 |
+
"lstrip": false,
|
| 23 |
+
"normalized": false,
|
| 24 |
+
"rstrip": false,
|
| 25 |
+
"single_word": false,
|
| 26 |
+
"special": true
|
| 27 |
+
},
|
| 28 |
+
"65080": {
|
| 29 |
+
"content": "//************************************************************************",
|
| 30 |
+
"lstrip": false,
|
| 31 |
+
"normalized": false,
|
| 32 |
+
"rstrip": false,
|
| 33 |
+
"single_word": false,
|
| 34 |
+
"special": true
|
| 35 |
+
},
|
| 36 |
+
"151643": {
|
| 37 |
+
"content": "<|endoftext|>",
|
| 38 |
+
"lstrip": false,
|
| 39 |
+
"normalized": false,
|
| 40 |
+
"rstrip": false,
|
| 41 |
+
"single_word": false,
|
| 42 |
+
"special": true
|
| 43 |
+
},
|
| 44 |
+
"151644": {
|
| 45 |
+
"content": "<|im_start|>",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": false,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false,
|
| 50 |
+
"special": true
|
| 51 |
+
},
|
| 52 |
+
"151645": {
|
| 53 |
+
"content": "<|im_end|>",
|
| 54 |
+
"lstrip": false,
|
| 55 |
+
"normalized": false,
|
| 56 |
+
"rstrip": false,
|
| 57 |
+
"single_word": false,
|
| 58 |
+
"special": true
|
| 59 |
+
}
|
| 60 |
+
},
|
| 61 |
+
"additional_special_tokens": [
|
| 62 |
+
"<|im_start|>",
|
| 63 |
+
"<|im_end|>"
|
| 64 |
+
],
|
| 65 |
+
"bos_token": null,
|
| 66 |
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are BanglaQwen, developed by AI4BD. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within /************************************************************************//************************************************************************ XML tags:\\n/************************************************************************\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n//************************************************************************<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are BanglaQwen, developed by AI4BD. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n/************************************************************************\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n//************************************************************************' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n%%%%%%%%%%%%%%%%\\n' }}\n {{- message.content }}\n {{- '\\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
| 67 |
+
"clean_up_tokenization_spaces": false,
|
| 68 |
+
"eos_token": "<|im_end|>",
|
| 69 |
+
"errors": "replace",
|
| 70 |
+
"extra_special_tokens": {},
|
| 71 |
+
"model_max_length": 4096,
|
| 72 |
+
"pad_token": "<|endoftext|>",
|
| 73 |
+
"padding_side": "right",
|
| 74 |
+
"split_special_tokens": false,
|
| 75 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 76 |
+
"unk_token": null
|
| 77 |
+
}
|
trainer_log.jsonl
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"current_steps": 1, "total_steps": 166, "loss": 2.3086, "lr": 1e-06, "epoch": 0.011976047904191617, "percentage": 0.6, "elapsed_time": "0:00:28", "remaining_time": "1:18:05"}
|
| 2 |
+
{"current_steps": 2, "total_steps": 166, "loss": 2.3322, "lr": 1e-06, "epoch": 0.023952095808383235, "percentage": 1.2, "elapsed_time": "0:00:45", "remaining_time": "1:01:45"}
|
| 3 |
+
{"current_steps": 3, "total_steps": 166, "loss": 2.3189, "lr": 1e-06, "epoch": 0.03592814371257485, "percentage": 1.81, "elapsed_time": "0:01:02", "remaining_time": "0:56:13"}
|
| 4 |
+
{"current_steps": 4, "total_steps": 166, "loss": 2.3221, "lr": 1e-06, "epoch": 0.04790419161676647, "percentage": 2.41, "elapsed_time": "0:01:19", "remaining_time": "0:53:25"}
|
| 5 |
+
{"current_steps": 5, "total_steps": 166, "loss": 2.3203, "lr": 1e-06, "epoch": 0.059880239520958084, "percentage": 3.01, "elapsed_time": "0:01:36", "remaining_time": "0:51:45"}
|
| 6 |
+
{"current_steps": 6, "total_steps": 166, "loss": 2.3189, "lr": 1e-06, "epoch": 0.0718562874251497, "percentage": 3.61, "elapsed_time": "0:01:53", "remaining_time": "0:50:34"}
|
| 7 |
+
{"current_steps": 7, "total_steps": 166, "loss": 2.314, "lr": 1e-06, "epoch": 0.08383233532934131, "percentage": 4.22, "elapsed_time": "0:02:11", "remaining_time": "0:49:39"}
|
| 8 |
+
{"current_steps": 8, "total_steps": 166, "loss": 2.3104, "lr": 1e-06, "epoch": 0.09580838323353294, "percentage": 4.82, "elapsed_time": "0:02:28", "remaining_time": "0:48:55"}
|
| 9 |
+
{"current_steps": 9, "total_steps": 166, "loss": 2.3067, "lr": 1e-06, "epoch": 0.10778443113772455, "percentage": 5.42, "elapsed_time": "0:02:46", "remaining_time": "0:48:17"}
|
| 10 |
+
{"current_steps": 10, "total_steps": 166, "loss": 2.3168, "lr": 1e-06, "epoch": 0.11976047904191617, "percentage": 6.02, "elapsed_time": "0:03:03", "remaining_time": "0:47:42"}
|
| 11 |
+
{"current_steps": 11, "total_steps": 166, "loss": 2.322, "lr": 1e-06, "epoch": 0.1317365269461078, "percentage": 6.63, "elapsed_time": "0:03:20", "remaining_time": "0:47:11"}
|
| 12 |
+
{"current_steps": 12, "total_steps": 166, "loss": 2.321, "lr": 1e-06, "epoch": 0.1437125748502994, "percentage": 7.23, "elapsed_time": "0:03:38", "remaining_time": "0:46:43"}
|
| 13 |
+
{"current_steps": 13, "total_steps": 166, "loss": 2.312, "lr": 1e-06, "epoch": 0.15568862275449102, "percentage": 7.83, "elapsed_time": "0:03:55", "remaining_time": "0:46:16"}
|
| 14 |
+
{"current_steps": 14, "total_steps": 166, "loss": 2.303, "lr": 1e-06, "epoch": 0.16766467065868262, "percentage": 8.43, "elapsed_time": "0:04:13", "remaining_time": "0:45:51"}
|
| 15 |
+
{"current_steps": 15, "total_steps": 166, "loss": 2.3105, "lr": 1e-06, "epoch": 0.17964071856287425, "percentage": 9.04, "elapsed_time": "0:04:30", "remaining_time": "0:45:27"}
|
| 16 |
+
{"current_steps": 16, "total_steps": 166, "loss": 2.2994, "lr": 1e-06, "epoch": 0.19161676646706588, "percentage": 9.64, "elapsed_time": "0:04:48", "remaining_time": "0:45:03"}
|
| 17 |
+
{"current_steps": 17, "total_steps": 166, "loss": 2.3085, "lr": 1e-06, "epoch": 0.20359281437125748, "percentage": 10.24, "elapsed_time": "0:05:05", "remaining_time": "0:44:41"}
|
| 18 |
+
{"current_steps": 18, "total_steps": 166, "loss": 2.3006, "lr": 1e-06, "epoch": 0.2155688622754491, "percentage": 10.84, "elapsed_time": "0:05:23", "remaining_time": "0:44:18"}
|
| 19 |
+
{"current_steps": 19, "total_steps": 166, "loss": 2.2872, "lr": 1e-06, "epoch": 0.2275449101796407, "percentage": 11.45, "elapsed_time": "0:05:40", "remaining_time": "0:43:57"}
|
| 20 |
+
{"current_steps": 20, "total_steps": 166, "loss": 2.3008, "lr": 1e-06, "epoch": 0.23952095808383234, "percentage": 12.05, "elapsed_time": "0:05:58", "remaining_time": "0:43:35"}
|
| 21 |
+
{"current_steps": 21, "total_steps": 166, "loss": 2.2948, "lr": 1e-06, "epoch": 0.25149700598802394, "percentage": 12.65, "elapsed_time": "0:06:15", "remaining_time": "0:43:15"}
|
| 22 |
+
{"current_steps": 22, "total_steps": 166, "loss": 2.2899, "lr": 1e-06, "epoch": 0.2634730538922156, "percentage": 13.25, "elapsed_time": "0:06:33", "remaining_time": "0:42:54"}
|
| 23 |
+
{"current_steps": 23, "total_steps": 166, "loss": 2.2967, "lr": 1e-06, "epoch": 0.2754491017964072, "percentage": 13.86, "elapsed_time": "0:06:50", "remaining_time": "0:42:34"}
|
| 24 |
+
{"current_steps": 24, "total_steps": 166, "loss": 2.2969, "lr": 1e-06, "epoch": 0.2874251497005988, "percentage": 14.46, "elapsed_time": "0:07:08", "remaining_time": "0:42:13"}
|
| 25 |
+
{"current_steps": 25, "total_steps": 166, "loss": 2.3023, "lr": 1e-06, "epoch": 0.2994011976047904, "percentage": 15.06, "elapsed_time": "0:07:25", "remaining_time": "0:41:54"}
|
| 26 |
+
{"current_steps": 26, "total_steps": 166, "loss": 2.2952, "lr": 1e-06, "epoch": 0.31137724550898205, "percentage": 15.66, "elapsed_time": "0:07:43", "remaining_time": "0:41:34"}
|
| 27 |
+
{"current_steps": 27, "total_steps": 166, "loss": 2.286, "lr": 1e-06, "epoch": 0.32335329341317365, "percentage": 16.27, "elapsed_time": "0:08:00", "remaining_time": "0:41:15"}
|
| 28 |
+
{"current_steps": 28, "total_steps": 166, "loss": 2.2964, "lr": 1e-06, "epoch": 0.33532934131736525, "percentage": 16.87, "elapsed_time": "0:08:18", "remaining_time": "0:40:56"}
|
| 29 |
+
{"current_steps": 29, "total_steps": 166, "loss": 2.2871, "lr": 1e-06, "epoch": 0.3473053892215569, "percentage": 17.47, "elapsed_time": "0:08:35", "remaining_time": "0:40:37"}
|
| 30 |
+
{"current_steps": 30, "total_steps": 166, "loss": 2.3018, "lr": 1e-06, "epoch": 0.3592814371257485, "percentage": 18.07, "elapsed_time": "0:08:53", "remaining_time": "0:40:17"}
|
| 31 |
+
{"current_steps": 31, "total_steps": 166, "loss": 2.2784, "lr": 1e-06, "epoch": 0.3712574850299401, "percentage": 18.67, "elapsed_time": "0:09:10", "remaining_time": "0:39:59"}
|
| 32 |
+
{"current_steps": 32, "total_steps": 166, "loss": 2.2764, "lr": 1e-06, "epoch": 0.38323353293413176, "percentage": 19.28, "elapsed_time": "0:09:28", "remaining_time": "0:39:40"}
|
| 33 |
+
{"current_steps": 33, "total_steps": 166, "loss": 2.2706, "lr": 1e-06, "epoch": 0.39520958083832336, "percentage": 19.88, "elapsed_time": "0:09:45", "remaining_time": "0:39:21"}
|
| 34 |
+
{"current_steps": 34, "total_steps": 166, "loss": 2.2593, "lr": 1e-06, "epoch": 0.40718562874251496, "percentage": 20.48, "elapsed_time": "0:10:03", "remaining_time": "0:39:02"}
|
| 35 |
+
{"current_steps": 35, "total_steps": 166, "loss": 2.2705, "lr": 1e-06, "epoch": 0.41916167664670656, "percentage": 21.08, "elapsed_time": "0:10:20", "remaining_time": "0:38:44"}
|
| 36 |
+
{"current_steps": 36, "total_steps": 166, "loss": 2.2708, "lr": 1e-06, "epoch": 0.4311377245508982, "percentage": 21.69, "elapsed_time": "0:10:38", "remaining_time": "0:38:25"}
|
| 37 |
+
{"current_steps": 37, "total_steps": 166, "loss": 2.2636, "lr": 1e-06, "epoch": 0.4431137724550898, "percentage": 22.29, "elapsed_time": "0:10:55", "remaining_time": "0:38:06"}
|
| 38 |
+
{"current_steps": 38, "total_steps": 166, "loss": 2.2593, "lr": 1e-06, "epoch": 0.4550898203592814, "percentage": 22.89, "elapsed_time": "0:11:13", "remaining_time": "0:37:48"}
|
| 39 |
+
{"current_steps": 39, "total_steps": 166, "loss": 2.2531, "lr": 1e-06, "epoch": 0.46706586826347307, "percentage": 23.49, "elapsed_time": "0:11:30", "remaining_time": "0:37:29"}
|
| 40 |
+
{"current_steps": 40, "total_steps": 166, "loss": 2.2651, "lr": 1e-06, "epoch": 0.47904191616766467, "percentage": 24.1, "elapsed_time": "0:11:48", "remaining_time": "0:37:11"}
|
| 41 |
+
{"current_steps": 41, "total_steps": 166, "loss": 2.2721, "lr": 1e-06, "epoch": 0.49101796407185627, "percentage": 24.7, "elapsed_time": "0:12:05", "remaining_time": "0:36:53"}
|
| 42 |
+
{"current_steps": 42, "total_steps": 166, "loss": 2.2568, "lr": 1e-06, "epoch": 0.5029940119760479, "percentage": 25.3, "elapsed_time": "0:12:23", "remaining_time": "0:36:34"}
|
| 43 |
+
{"current_steps": 43, "total_steps": 166, "loss": 2.2504, "lr": 1e-06, "epoch": 0.5149700598802395, "percentage": 25.9, "elapsed_time": "0:12:40", "remaining_time": "0:36:16"}
|
| 44 |
+
{"current_steps": 44, "total_steps": 166, "loss": 2.2611, "lr": 1e-06, "epoch": 0.5269461077844312, "percentage": 26.51, "elapsed_time": "0:12:58", "remaining_time": "0:35:58"}
|
| 45 |
+
{"current_steps": 45, "total_steps": 166, "loss": 2.2637, "lr": 1e-06, "epoch": 0.5389221556886228, "percentage": 27.11, "elapsed_time": "0:13:15", "remaining_time": "0:35:40"}
|
| 46 |
+
{"current_steps": 46, "total_steps": 166, "loss": 2.2536, "lr": 1e-06, "epoch": 0.5508982035928144, "percentage": 27.71, "elapsed_time": "0:13:33", "remaining_time": "0:35:21"}
|
| 47 |
+
{"current_steps": 47, "total_steps": 166, "loss": 2.2543, "lr": 1e-06, "epoch": 0.562874251497006, "percentage": 28.31, "elapsed_time": "0:13:50", "remaining_time": "0:35:03"}
|
| 48 |
+
{"current_steps": 48, "total_steps": 166, "loss": 2.252, "lr": 1e-06, "epoch": 0.5748502994011976, "percentage": 28.92, "elapsed_time": "0:14:08", "remaining_time": "0:34:45"}
|
| 49 |
+
{"current_steps": 49, "total_steps": 166, "loss": 2.2604, "lr": 1e-06, "epoch": 0.5868263473053892, "percentage": 29.52, "elapsed_time": "0:14:25", "remaining_time": "0:34:27"}
|
| 50 |
+
{"current_steps": 50, "total_steps": 166, "loss": 2.2571, "lr": 1e-06, "epoch": 0.5988023952095808, "percentage": 30.12, "elapsed_time": "0:14:43", "remaining_time": "0:34:09"}
|
| 51 |
+
{"current_steps": 51, "total_steps": 166, "loss": 2.247, "lr": 1e-06, "epoch": 0.6107784431137725, "percentage": 30.72, "elapsed_time": "0:15:07", "remaining_time": "0:34:06"}
|
| 52 |
+
{"current_steps": 52, "total_steps": 166, "loss": 2.2607, "lr": 1e-06, "epoch": 0.6227544910179641, "percentage": 31.33, "elapsed_time": "0:15:25", "remaining_time": "0:33:48"}
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a948de6f187071e8f2ca072dd444d9f4f3bca449376a9daf6e78ed345298a3b
|
| 3 |
+
size 7608
|
vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|