SKNahin commited on
Commit
e0cfe7d
·
verified ·
1 Parent(s): 1af9225

Training in progress, step 150

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151643,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 896,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 4864,
12
+ "max_position_embeddings": 131072,
13
+ "max_window_layers": 24,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 14,
16
+ "num_hidden_layers": 24,
17
+ "num_key_value_heads": 2,
18
+ "rms_norm_eps": 1e-06,
19
+ "rope_scaling": null,
20
+ "rope_theta": 1000000.0,
21
+ "sliding_window": null,
22
+ "tie_word_embeddings": true,
23
+ "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.51.2",
25
+ "use_cache": false,
26
+ "use_sliding_window": false,
27
+ "vocab_size": 194498
28
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48ba39a7956bd21790caf9e9b1ec7713721d6d9ec09f6197568e1cc7870db0ab
3
+ size 1064369000
special_tokens_map.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "eos_token": {
7
+ "content": "<|im_end|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "pad_token": {
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ }
20
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8ec123df4ff5fd027294ce9931a2709a061268c399250056437627ea20e304f
3
+ size 27868597
tokenizer_config.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "11370": {
5
+ "content": "/************************************************************************",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "16395": {
13
+ "content": "%%%%%%%%%%%%%%%%",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "33009": {
21
+ "content": "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "65080": {
29
+ "content": "//************************************************************************",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "151643": {
37
+ "content": "<|endoftext|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "151644": {
45
+ "content": "<|im_start|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "151645": {
53
+ "content": "<|im_end|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ }
60
+ },
61
+ "additional_special_tokens": [
62
+ "<|im_start|>",
63
+ "<|im_end|>"
64
+ ],
65
+ "bos_token": null,
66
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are BanglaQwen, developed by AI4BD. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within /************************************************************************//************************************************************************ XML tags:\\n/************************************************************************\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n//************************************************************************<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are BanglaQwen, developed by AI4BD. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n/************************************************************************\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n//************************************************************************' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n%%%%%%%%%%%%%%%%\\n' }}\n {{- message.content }}\n {{- '\\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
67
+ "clean_up_tokenization_spaces": false,
68
+ "eos_token": "<|im_end|>",
69
+ "errors": "replace",
70
+ "extra_special_tokens": {},
71
+ "model_max_length": 4096,
72
+ "pad_token": "<|endoftext|>",
73
+ "padding_side": "right",
74
+ "split_special_tokens": false,
75
+ "tokenizer_class": "Qwen2Tokenizer",
76
+ "unk_token": null
77
+ }
trainer_log.jsonl ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 1, "total_steps": 2920, "loss": 1.4935, "lr": 5e-05, "epoch": 0.003424657534246575, "percentage": 0.03, "elapsed_time": "0:00:29", "remaining_time": "23:54:03"}
2
+ {"current_steps": 2, "total_steps": 2920, "loss": 1.3436, "lr": 5e-05, "epoch": 0.00684931506849315, "percentage": 0.07, "elapsed_time": "0:00:47", "remaining_time": "19:21:11"}
3
+ {"current_steps": 3, "total_steps": 2920, "loss": 1.3261, "lr": 5e-05, "epoch": 0.010273972602739725, "percentage": 0.1, "elapsed_time": "0:01:06", "remaining_time": "17:51:23"}
4
+ {"current_steps": 4, "total_steps": 2920, "loss": 1.2239, "lr": 5e-05, "epoch": 0.0136986301369863, "percentage": 0.14, "elapsed_time": "0:01:24", "remaining_time": "17:06:50"}
5
+ {"current_steps": 5, "total_steps": 2920, "loss": 1.4355, "lr": 5e-05, "epoch": 0.017123287671232876, "percentage": 0.17, "elapsed_time": "0:01:42", "remaining_time": "16:40:05"}
6
+ {"current_steps": 6, "total_steps": 2920, "loss": 1.2159, "lr": 5e-05, "epoch": 0.02054794520547945, "percentage": 0.21, "elapsed_time": "0:02:01", "remaining_time": "16:22:18"}
7
+ {"current_steps": 7, "total_steps": 2920, "loss": 1.1885, "lr": 5e-05, "epoch": 0.023972602739726026, "percentage": 0.24, "elapsed_time": "0:02:19", "remaining_time": "16:09:43"}
8
+ {"current_steps": 8, "total_steps": 2920, "loss": 1.1773, "lr": 5e-05, "epoch": 0.0273972602739726, "percentage": 0.27, "elapsed_time": "0:02:38", "remaining_time": "15:59:57"}
9
+ {"current_steps": 9, "total_steps": 2920, "loss": 1.1569, "lr": 5e-05, "epoch": 0.030821917808219176, "percentage": 0.31, "elapsed_time": "0:02:56", "remaining_time": "15:52:16"}
10
+ {"current_steps": 10, "total_steps": 2920, "loss": 1.1633, "lr": 5e-05, "epoch": 0.03424657534246575, "percentage": 0.34, "elapsed_time": "0:03:15", "remaining_time": "15:46:04"}
11
+ {"current_steps": 11, "total_steps": 2920, "loss": 1.2799, "lr": 5e-05, "epoch": 0.03767123287671233, "percentage": 0.38, "elapsed_time": "0:03:33", "remaining_time": "15:41:05"}
12
+ {"current_steps": 12, "total_steps": 2920, "loss": 1.168, "lr": 5e-05, "epoch": 0.0410958904109589, "percentage": 0.41, "elapsed_time": "0:03:51", "remaining_time": "15:36:50"}
13
+ {"current_steps": 13, "total_steps": 2920, "loss": 1.2093, "lr": 5e-05, "epoch": 0.04452054794520548, "percentage": 0.45, "elapsed_time": "0:04:10", "remaining_time": "15:33:14"}
14
+ {"current_steps": 14, "total_steps": 2920, "loss": 1.1542, "lr": 5e-05, "epoch": 0.04794520547945205, "percentage": 0.48, "elapsed_time": "0:04:28", "remaining_time": "15:30:10"}
15
+ {"current_steps": 15, "total_steps": 2920, "loss": 1.1545, "lr": 5e-05, "epoch": 0.05136986301369863, "percentage": 0.51, "elapsed_time": "0:04:47", "remaining_time": "15:27:30"}
16
+ {"current_steps": 16, "total_steps": 2920, "loss": 1.1554, "lr": 5e-05, "epoch": 0.0547945205479452, "percentage": 0.55, "elapsed_time": "0:05:05", "remaining_time": "15:25:08"}
17
+ {"current_steps": 17, "total_steps": 2920, "loss": 1.143, "lr": 5e-05, "epoch": 0.05821917808219178, "percentage": 0.58, "elapsed_time": "0:05:24", "remaining_time": "15:22:53"}
18
+ {"current_steps": 18, "total_steps": 2920, "loss": 1.1375, "lr": 5e-05, "epoch": 0.06164383561643835, "percentage": 0.62, "elapsed_time": "0:05:42", "remaining_time": "15:20:53"}
19
+ {"current_steps": 19, "total_steps": 2920, "loss": 1.1397, "lr": 5e-05, "epoch": 0.06506849315068493, "percentage": 0.65, "elapsed_time": "0:06:01", "remaining_time": "15:18:59"}
20
+ {"current_steps": 20, "total_steps": 2920, "loss": 1.145, "lr": 5e-05, "epoch": 0.0684931506849315, "percentage": 0.68, "elapsed_time": "0:06:19", "remaining_time": "15:17:18"}
21
+ {"current_steps": 21, "total_steps": 2920, "loss": 1.2124, "lr": 5e-05, "epoch": 0.07191780821917808, "percentage": 0.72, "elapsed_time": "0:06:37", "remaining_time": "15:15:42"}
22
+ {"current_steps": 22, "total_steps": 2920, "loss": 1.1434, "lr": 5e-05, "epoch": 0.07534246575342465, "percentage": 0.75, "elapsed_time": "0:06:56", "remaining_time": "15:14:17"}
23
+ {"current_steps": 23, "total_steps": 2920, "loss": 1.1397, "lr": 5e-05, "epoch": 0.07876712328767123, "percentage": 0.79, "elapsed_time": "0:07:14", "remaining_time": "15:12:58"}
24
+ {"current_steps": 24, "total_steps": 2920, "loss": 1.122, "lr": 5e-05, "epoch": 0.0821917808219178, "percentage": 0.82, "elapsed_time": "0:07:33", "remaining_time": "15:11:42"}
25
+ {"current_steps": 25, "total_steps": 2920, "loss": 1.1376, "lr": 5e-05, "epoch": 0.08561643835616438, "percentage": 0.86, "elapsed_time": "0:07:51", "remaining_time": "15:10:31"}
26
+ {"current_steps": 26, "total_steps": 2920, "loss": 1.1414, "lr": 5e-05, "epoch": 0.08904109589041095, "percentage": 0.89, "elapsed_time": "0:08:10", "remaining_time": "15:09:26"}
27
+ {"current_steps": 27, "total_steps": 2920, "loss": 1.1469, "lr": 5e-05, "epoch": 0.09246575342465753, "percentage": 0.92, "elapsed_time": "0:08:28", "remaining_time": "15:08:28"}
28
+ {"current_steps": 28, "total_steps": 2920, "loss": 1.126, "lr": 5e-05, "epoch": 0.0958904109589041, "percentage": 0.96, "elapsed_time": "0:08:47", "remaining_time": "15:07:29"}
29
+ {"current_steps": 29, "total_steps": 2920, "loss": 1.145, "lr": 5e-05, "epoch": 0.09931506849315068, "percentage": 0.99, "elapsed_time": "0:09:05", "remaining_time": "15:06:34"}
30
+ {"current_steps": 30, "total_steps": 2920, "loss": 1.1656, "lr": 5e-05, "epoch": 0.10273972602739725, "percentage": 1.03, "elapsed_time": "0:09:24", "remaining_time": "15:05:44"}
31
+ {"current_steps": 31, "total_steps": 2920, "loss": 1.1404, "lr": 5e-05, "epoch": 0.10616438356164383, "percentage": 1.06, "elapsed_time": "0:09:42", "remaining_time": "15:04:56"}
32
+ {"current_steps": 32, "total_steps": 2920, "loss": 1.1347, "lr": 5e-05, "epoch": 0.1095890410958904, "percentage": 1.1, "elapsed_time": "0:10:01", "remaining_time": "15:04:11"}
33
+ {"current_steps": 33, "total_steps": 2920, "loss": 1.1156, "lr": 5e-05, "epoch": 0.11301369863013698, "percentage": 1.13, "elapsed_time": "0:10:19", "remaining_time": "15:03:24"}
34
+ {"current_steps": 34, "total_steps": 2920, "loss": 1.1382, "lr": 5e-05, "epoch": 0.11643835616438356, "percentage": 1.16, "elapsed_time": "0:10:38", "remaining_time": "15:02:41"}
35
+ {"current_steps": 35, "total_steps": 2920, "loss": 1.1639, "lr": 5e-05, "epoch": 0.11986301369863013, "percentage": 1.2, "elapsed_time": "0:10:56", "remaining_time": "15:01:55"}
36
+ {"current_steps": 36, "total_steps": 2920, "loss": 1.1248, "lr": 5e-05, "epoch": 0.1232876712328767, "percentage": 1.23, "elapsed_time": "0:11:14", "remaining_time": "15:01:14"}
37
+ {"current_steps": 37, "total_steps": 2920, "loss": 1.1312, "lr": 5e-05, "epoch": 0.1267123287671233, "percentage": 1.27, "elapsed_time": "0:11:33", "remaining_time": "15:00:34"}
38
+ {"current_steps": 38, "total_steps": 2920, "loss": 1.1198, "lr": 5e-05, "epoch": 0.13013698630136986, "percentage": 1.3, "elapsed_time": "0:11:51", "remaining_time": "14:59:57"}
39
+ {"current_steps": 39, "total_steps": 2920, "loss": 1.1168, "lr": 5e-05, "epoch": 0.13356164383561644, "percentage": 1.34, "elapsed_time": "0:12:10", "remaining_time": "14:59:21"}
40
+ {"current_steps": 40, "total_steps": 2920, "loss": 1.132, "lr": 5e-05, "epoch": 0.136986301369863, "percentage": 1.37, "elapsed_time": "0:12:28", "remaining_time": "14:58:43"}
41
+ {"current_steps": 41, "total_steps": 2920, "loss": 1.1054, "lr": 5e-05, "epoch": 0.1404109589041096, "percentage": 1.4, "elapsed_time": "0:12:47", "remaining_time": "14:58:07"}
42
+ {"current_steps": 42, "total_steps": 2920, "loss": 1.114, "lr": 5e-05, "epoch": 0.14383561643835616, "percentage": 1.44, "elapsed_time": "0:13:05", "remaining_time": "14:57:32"}
43
+ {"current_steps": 43, "total_steps": 2920, "loss": 1.1548, "lr": 5e-05, "epoch": 0.14726027397260275, "percentage": 1.47, "elapsed_time": "0:13:24", "remaining_time": "14:56:59"}
44
+ {"current_steps": 44, "total_steps": 2920, "loss": 1.1145, "lr": 5e-05, "epoch": 0.1506849315068493, "percentage": 1.51, "elapsed_time": "0:13:42", "remaining_time": "14:56:27"}
45
+ {"current_steps": 45, "total_steps": 2920, "loss": 1.1042, "lr": 5e-05, "epoch": 0.1541095890410959, "percentage": 1.54, "elapsed_time": "0:14:01", "remaining_time": "14:55:56"}
46
+ {"current_steps": 46, "total_steps": 2920, "loss": 1.1457, "lr": 5e-05, "epoch": 0.15753424657534246, "percentage": 1.58, "elapsed_time": "0:14:19", "remaining_time": "14:55:21"}
47
+ {"current_steps": 47, "total_steps": 2920, "loss": 1.1522, "lr": 5e-05, "epoch": 0.16095890410958905, "percentage": 1.61, "elapsed_time": "0:14:38", "remaining_time": "14:54:49"}
48
+ {"current_steps": 48, "total_steps": 2920, "loss": 1.0885, "lr": 5e-05, "epoch": 0.1643835616438356, "percentage": 1.64, "elapsed_time": "0:14:56", "remaining_time": "14:54:17"}
49
+ {"current_steps": 49, "total_steps": 2920, "loss": 1.1329, "lr": 5e-05, "epoch": 0.1678082191780822, "percentage": 1.68, "elapsed_time": "0:15:15", "remaining_time": "14:53:46"}
50
+ {"current_steps": 50, "total_steps": 2920, "loss": 1.1298, "lr": 5e-05, "epoch": 0.17123287671232876, "percentage": 1.71, "elapsed_time": "0:15:33", "remaining_time": "14:53:16"}
51
+ {"current_steps": 51, "total_steps": 2920, "loss": 1.1355, "lr": 5e-05, "epoch": 0.17465753424657535, "percentage": 1.75, "elapsed_time": "0:15:52", "remaining_time": "14:52:47"}
52
+ {"current_steps": 52, "total_steps": 2920, "loss": 1.1248, "lr": 5e-05, "epoch": 0.1780821917808219, "percentage": 1.78, "elapsed_time": "0:16:10", "remaining_time": "14:52:18"}
53
+ {"current_steps": 53, "total_steps": 2920, "loss": 1.1203, "lr": 5e-05, "epoch": 0.1815068493150685, "percentage": 1.82, "elapsed_time": "0:16:29", "remaining_time": "14:51:50"}
54
+ {"current_steps": 54, "total_steps": 2920, "loss": 1.1076, "lr": 5e-05, "epoch": 0.18493150684931506, "percentage": 1.85, "elapsed_time": "0:16:47", "remaining_time": "14:51:21"}
55
+ {"current_steps": 55, "total_steps": 2920, "loss": 1.1492, "lr": 5e-05, "epoch": 0.18835616438356165, "percentage": 1.88, "elapsed_time": "0:17:06", "remaining_time": "14:50:56"}
56
+ {"current_steps": 56, "total_steps": 2920, "loss": 1.2298, "lr": 5e-05, "epoch": 0.1917808219178082, "percentage": 1.92, "elapsed_time": "0:17:24", "remaining_time": "14:50:29"}
57
+ {"current_steps": 57, "total_steps": 2920, "loss": 1.1544, "lr": 5e-05, "epoch": 0.1952054794520548, "percentage": 1.95, "elapsed_time": "0:17:43", "remaining_time": "14:50:02"}
58
+ {"current_steps": 58, "total_steps": 2920, "loss": 1.1092, "lr": 5e-05, "epoch": 0.19863013698630136, "percentage": 1.99, "elapsed_time": "0:18:01", "remaining_time": "14:49:36"}
59
+ {"current_steps": 59, "total_steps": 2920, "loss": 1.1411, "lr": 5e-05, "epoch": 0.20205479452054795, "percentage": 2.02, "elapsed_time": "0:18:20", "remaining_time": "14:49:09"}
60
+ {"current_steps": 60, "total_steps": 2920, "loss": 1.1404, "lr": 5e-05, "epoch": 0.2054794520547945, "percentage": 2.05, "elapsed_time": "0:18:38", "remaining_time": "14:48:43"}
61
+ {"current_steps": 61, "total_steps": 2920, "loss": 1.1204, "lr": 5e-05, "epoch": 0.2089041095890411, "percentage": 2.09, "elapsed_time": "0:18:57", "remaining_time": "14:48:17"}
62
+ {"current_steps": 62, "total_steps": 2920, "loss": 1.1135, "lr": 5e-05, "epoch": 0.21232876712328766, "percentage": 2.12, "elapsed_time": "0:19:15", "remaining_time": "14:47:52"}
63
+ {"current_steps": 63, "total_steps": 2920, "loss": 1.132, "lr": 5e-05, "epoch": 0.21575342465753425, "percentage": 2.16, "elapsed_time": "0:19:34", "remaining_time": "14:47:28"}
64
+ {"current_steps": 64, "total_steps": 2920, "loss": 1.1232, "lr": 5e-05, "epoch": 0.2191780821917808, "percentage": 2.19, "elapsed_time": "0:19:52", "remaining_time": "14:47:01"}
65
+ {"current_steps": 65, "total_steps": 2920, "loss": 1.0916, "lr": 5e-05, "epoch": 0.2226027397260274, "percentage": 2.23, "elapsed_time": "0:20:11", "remaining_time": "14:46:37"}
66
+ {"current_steps": 66, "total_steps": 2920, "loss": 1.0963, "lr": 5e-05, "epoch": 0.22602739726027396, "percentage": 2.26, "elapsed_time": "0:20:29", "remaining_time": "14:46:13"}
67
+ {"current_steps": 67, "total_steps": 2920, "loss": 1.1201, "lr": 5e-05, "epoch": 0.22945205479452055, "percentage": 2.29, "elapsed_time": "0:20:48", "remaining_time": "14:45:50"}
68
+ {"current_steps": 68, "total_steps": 2920, "loss": 1.1016, "lr": 5e-05, "epoch": 0.2328767123287671, "percentage": 2.33, "elapsed_time": "0:21:06", "remaining_time": "14:45:28"}
69
+ {"current_steps": 69, "total_steps": 2920, "loss": 1.1275, "lr": 5e-05, "epoch": 0.2363013698630137, "percentage": 2.36, "elapsed_time": "0:21:25", "remaining_time": "14:45:04"}
70
+ {"current_steps": 70, "total_steps": 2920, "loss": 1.2119, "lr": 5e-05, "epoch": 0.23972602739726026, "percentage": 2.4, "elapsed_time": "0:21:43", "remaining_time": "14:44:40"}
71
+ {"current_steps": 71, "total_steps": 2920, "loss": 1.1137, "lr": 5e-05, "epoch": 0.24315068493150685, "percentage": 2.43, "elapsed_time": "0:22:02", "remaining_time": "14:44:18"}
72
+ {"current_steps": 72, "total_steps": 2920, "loss": 1.1247, "lr": 5e-05, "epoch": 0.2465753424657534, "percentage": 2.47, "elapsed_time": "0:22:20", "remaining_time": "14:43:55"}
73
+ {"current_steps": 73, "total_steps": 2920, "loss": 1.1107, "lr": 5e-05, "epoch": 0.25, "percentage": 2.5, "elapsed_time": "0:22:39", "remaining_time": "14:43:32"}
74
+ {"current_steps": 74, "total_steps": 2920, "loss": 1.1278, "lr": 5e-05, "epoch": 0.2534246575342466, "percentage": 2.53, "elapsed_time": "0:22:57", "remaining_time": "14:43:10"}
75
+ {"current_steps": 75, "total_steps": 2920, "loss": 1.095, "lr": 5e-05, "epoch": 0.2568493150684932, "percentage": 2.57, "elapsed_time": "0:23:16", "remaining_time": "14:42:49"}
76
+ {"current_steps": 76, "total_steps": 2920, "loss": 1.0953, "lr": 5e-05, "epoch": 0.2602739726027397, "percentage": 2.6, "elapsed_time": "0:23:34", "remaining_time": "14:42:26"}
77
+ {"current_steps": 77, "total_steps": 2920, "loss": 1.2623, "lr": 5e-05, "epoch": 0.2636986301369863, "percentage": 2.64, "elapsed_time": "0:23:53", "remaining_time": "14:42:03"}
78
+ {"current_steps": 78, "total_steps": 2920, "loss": 1.1161, "lr": 5e-05, "epoch": 0.2671232876712329, "percentage": 2.67, "elapsed_time": "0:24:11", "remaining_time": "14:41:40"}
79
+ {"current_steps": 79, "total_steps": 2920, "loss": 1.1358, "lr": 5e-05, "epoch": 0.2705479452054795, "percentage": 2.71, "elapsed_time": "0:24:30", "remaining_time": "14:41:17"}
80
+ {"current_steps": 80, "total_steps": 2920, "loss": 1.1127, "lr": 5e-05, "epoch": 0.273972602739726, "percentage": 2.74, "elapsed_time": "0:24:48", "remaining_time": "14:40:55"}
81
+ {"current_steps": 81, "total_steps": 2920, "loss": 1.1163, "lr": 5e-05, "epoch": 0.2773972602739726, "percentage": 2.77, "elapsed_time": "0:25:07", "remaining_time": "14:40:34"}
82
+ {"current_steps": 82, "total_steps": 2920, "loss": 1.112, "lr": 5e-05, "epoch": 0.2808219178082192, "percentage": 2.81, "elapsed_time": "0:25:25", "remaining_time": "14:40:13"}
83
+ {"current_steps": 83, "total_steps": 2920, "loss": 1.115, "lr": 5e-05, "epoch": 0.2842465753424658, "percentage": 2.84, "elapsed_time": "0:25:44", "remaining_time": "14:39:51"}
84
+ {"current_steps": 84, "total_steps": 2920, "loss": 1.1481, "lr": 5e-05, "epoch": 0.2876712328767123, "percentage": 2.88, "elapsed_time": "0:26:02", "remaining_time": "14:39:29"}
85
+ {"current_steps": 85, "total_steps": 2920, "loss": 1.1136, "lr": 5e-05, "epoch": 0.2910958904109589, "percentage": 2.91, "elapsed_time": "0:26:21", "remaining_time": "14:39:05"}
86
+ {"current_steps": 86, "total_steps": 2920, "loss": 1.1163, "lr": 5e-05, "epoch": 0.2945205479452055, "percentage": 2.95, "elapsed_time": "0:26:39", "remaining_time": "14:38:43"}
87
+ {"current_steps": 87, "total_steps": 2920, "loss": 1.1247, "lr": 5e-05, "epoch": 0.2979452054794521, "percentage": 2.98, "elapsed_time": "0:26:58", "remaining_time": "14:38:21"}
88
+ {"current_steps": 88, "total_steps": 2920, "loss": 1.0972, "lr": 5e-05, "epoch": 0.3013698630136986, "percentage": 3.01, "elapsed_time": "0:27:16", "remaining_time": "14:37:59"}
89
+ {"current_steps": 89, "total_steps": 2920, "loss": 1.1206, "lr": 5e-05, "epoch": 0.3047945205479452, "percentage": 3.05, "elapsed_time": "0:27:35", "remaining_time": "14:37:37"}
90
+ {"current_steps": 90, "total_steps": 2920, "loss": 1.1232, "lr": 5e-05, "epoch": 0.3082191780821918, "percentage": 3.08, "elapsed_time": "0:27:53", "remaining_time": "14:37:15"}
91
+ {"current_steps": 91, "total_steps": 2920, "loss": 1.1255, "lr": 5e-05, "epoch": 0.3116438356164384, "percentage": 3.12, "elapsed_time": "0:28:12", "remaining_time": "14:36:54"}
92
+ {"current_steps": 92, "total_steps": 2920, "loss": 1.1293, "lr": 5e-05, "epoch": 0.3150684931506849, "percentage": 3.15, "elapsed_time": "0:28:30", "remaining_time": "14:36:31"}
93
+ {"current_steps": 93, "total_steps": 2920, "loss": 1.1486, "lr": 5e-05, "epoch": 0.3184931506849315, "percentage": 3.18, "elapsed_time": "0:28:49", "remaining_time": "14:36:11"}
94
+ {"current_steps": 94, "total_steps": 2920, "loss": 1.121, "lr": 5e-05, "epoch": 0.3219178082191781, "percentage": 3.22, "elapsed_time": "0:29:07", "remaining_time": "14:35:49"}
95
+ {"current_steps": 95, "total_steps": 2920, "loss": 1.1082, "lr": 5e-05, "epoch": 0.3253424657534247, "percentage": 3.25, "elapsed_time": "0:29:26", "remaining_time": "14:35:28"}
96
+ {"current_steps": 96, "total_steps": 2920, "loss": 1.1624, "lr": 5e-05, "epoch": 0.3287671232876712, "percentage": 3.29, "elapsed_time": "0:29:44", "remaining_time": "14:35:05"}
97
+ {"current_steps": 97, "total_steps": 2920, "loss": 1.1889, "lr": 5e-05, "epoch": 0.3321917808219178, "percentage": 3.32, "elapsed_time": "0:30:03", "remaining_time": "14:34:44"}
98
+ {"current_steps": 98, "total_steps": 2920, "loss": 1.1172, "lr": 5e-05, "epoch": 0.3356164383561644, "percentage": 3.36, "elapsed_time": "0:30:21", "remaining_time": "14:34:22"}
99
+ {"current_steps": 99, "total_steps": 2920, "loss": 1.0992, "lr": 5e-05, "epoch": 0.339041095890411, "percentage": 3.39, "elapsed_time": "0:30:40", "remaining_time": "14:34:01"}
100
+ {"current_steps": 100, "total_steps": 2920, "loss": 1.118, "lr": 5e-05, "epoch": 0.3424657534246575, "percentage": 3.42, "elapsed_time": "0:30:58", "remaining_time": "14:33:39"}
101
+ {"current_steps": 101, "total_steps": 2920, "loss": 1.1317, "lr": 5e-05, "epoch": 0.3458904109589041, "percentage": 3.46, "elapsed_time": "0:31:17", "remaining_time": "14:33:18"}
102
+ {"current_steps": 102, "total_steps": 2920, "loss": 1.1086, "lr": 5e-05, "epoch": 0.3493150684931507, "percentage": 3.49, "elapsed_time": "0:31:35", "remaining_time": "14:32:56"}
103
+ {"current_steps": 103, "total_steps": 2920, "loss": 1.0983, "lr": 5e-05, "epoch": 0.3527397260273973, "percentage": 3.53, "elapsed_time": "0:31:54", "remaining_time": "14:32:34"}
104
+ {"current_steps": 104, "total_steps": 2920, "loss": 1.1044, "lr": 5e-05, "epoch": 0.3561643835616438, "percentage": 3.56, "elapsed_time": "0:32:12", "remaining_time": "14:32:12"}
105
+ {"current_steps": 105, "total_steps": 2920, "loss": 1.2, "lr": 5e-05, "epoch": 0.3595890410958904, "percentage": 3.6, "elapsed_time": "0:32:31", "remaining_time": "14:31:51"}
106
+ {"current_steps": 106, "total_steps": 2920, "loss": 1.131, "lr": 5e-05, "epoch": 0.363013698630137, "percentage": 3.63, "elapsed_time": "0:32:49", "remaining_time": "14:31:30"}
107
+ {"current_steps": 107, "total_steps": 2920, "loss": 1.1263, "lr": 5e-05, "epoch": 0.3664383561643836, "percentage": 3.66, "elapsed_time": "0:33:08", "remaining_time": "14:31:08"}
108
+ {"current_steps": 108, "total_steps": 2920, "loss": 1.1249, "lr": 5e-05, "epoch": 0.3698630136986301, "percentage": 3.7, "elapsed_time": "0:33:26", "remaining_time": "14:30:48"}
109
+ {"current_steps": 109, "total_steps": 2920, "loss": 1.1184, "lr": 5e-05, "epoch": 0.3732876712328767, "percentage": 3.73, "elapsed_time": "0:33:45", "remaining_time": "14:30:26"}
110
+ {"current_steps": 110, "total_steps": 2920, "loss": 1.1222, "lr": 5e-05, "epoch": 0.3767123287671233, "percentage": 3.77, "elapsed_time": "0:34:03", "remaining_time": "14:30:05"}
111
+ {"current_steps": 111, "total_steps": 2920, "loss": 1.0973, "lr": 5e-05, "epoch": 0.3801369863013699, "percentage": 3.8, "elapsed_time": "0:34:22", "remaining_time": "14:29:43"}
112
+ {"current_steps": 112, "total_steps": 2920, "loss": 1.1438, "lr": 5e-05, "epoch": 0.3835616438356164, "percentage": 3.84, "elapsed_time": "0:34:40", "remaining_time": "14:29:21"}
113
+ {"current_steps": 113, "total_steps": 2920, "loss": 1.1043, "lr": 5e-05, "epoch": 0.386986301369863, "percentage": 3.87, "elapsed_time": "0:34:59", "remaining_time": "14:29:01"}
114
+ {"current_steps": 114, "total_steps": 2920, "loss": 1.108, "lr": 5e-05, "epoch": 0.3904109589041096, "percentage": 3.9, "elapsed_time": "0:35:17", "remaining_time": "14:28:41"}
115
+ {"current_steps": 115, "total_steps": 2920, "loss": 1.0916, "lr": 5e-05, "epoch": 0.3938356164383562, "percentage": 3.94, "elapsed_time": "0:35:35", "remaining_time": "14:28:19"}
116
+ {"current_steps": 116, "total_steps": 2920, "loss": 1.1323, "lr": 5e-05, "epoch": 0.3972602739726027, "percentage": 3.97, "elapsed_time": "0:35:54", "remaining_time": "14:27:59"}
117
+ {"current_steps": 117, "total_steps": 2920, "loss": 1.1272, "lr": 5e-05, "epoch": 0.4006849315068493, "percentage": 4.01, "elapsed_time": "0:36:12", "remaining_time": "14:27:38"}
118
+ {"current_steps": 118, "total_steps": 2920, "loss": 1.1339, "lr": 5e-05, "epoch": 0.4041095890410959, "percentage": 4.04, "elapsed_time": "0:36:31", "remaining_time": "14:27:18"}
119
+ {"current_steps": 119, "total_steps": 2920, "loss": 1.1089, "lr": 5e-05, "epoch": 0.4075342465753425, "percentage": 4.08, "elapsed_time": "0:36:49", "remaining_time": "14:26:57"}
120
+ {"current_steps": 120, "total_steps": 2920, "loss": 1.127, "lr": 5e-05, "epoch": 0.410958904109589, "percentage": 4.11, "elapsed_time": "0:37:08", "remaining_time": "14:26:37"}
121
+ {"current_steps": 121, "total_steps": 2920, "loss": 1.1252, "lr": 5e-05, "epoch": 0.4143835616438356, "percentage": 4.14, "elapsed_time": "0:37:26", "remaining_time": "14:26:16"}
122
+ {"current_steps": 122, "total_steps": 2920, "loss": 1.112, "lr": 5e-05, "epoch": 0.4178082191780822, "percentage": 4.18, "elapsed_time": "0:37:45", "remaining_time": "14:25:56"}
123
+ {"current_steps": 123, "total_steps": 2920, "loss": 1.1238, "lr": 5e-05, "epoch": 0.4212328767123288, "percentage": 4.21, "elapsed_time": "0:38:03", "remaining_time": "14:25:35"}
124
+ {"current_steps": 124, "total_steps": 2920, "loss": 1.1324, "lr": 5e-05, "epoch": 0.4246575342465753, "percentage": 4.25, "elapsed_time": "0:38:22", "remaining_time": "14:25:13"}
125
+ {"current_steps": 125, "total_steps": 2920, "loss": 1.1103, "lr": 5e-05, "epoch": 0.4280821917808219, "percentage": 4.28, "elapsed_time": "0:38:40", "remaining_time": "14:24:53"}
126
+ {"current_steps": 126, "total_steps": 2920, "loss": 1.1232, "lr": 5e-05, "epoch": 0.4315068493150685, "percentage": 4.32, "elapsed_time": "0:38:59", "remaining_time": "14:24:33"}
127
+ {"current_steps": 127, "total_steps": 2920, "loss": 1.1178, "lr": 5e-05, "epoch": 0.4349315068493151, "percentage": 4.35, "elapsed_time": "0:39:17", "remaining_time": "14:24:13"}
128
+ {"current_steps": 128, "total_steps": 2920, "loss": 1.1253, "lr": 5e-05, "epoch": 0.4383561643835616, "percentage": 4.38, "elapsed_time": "0:39:36", "remaining_time": "14:23:52"}
129
+ {"current_steps": 129, "total_steps": 2920, "loss": 1.1222, "lr": 5e-05, "epoch": 0.4417808219178082, "percentage": 4.42, "elapsed_time": "0:39:54", "remaining_time": "14:23:31"}
130
+ {"current_steps": 130, "total_steps": 2920, "loss": 1.1265, "lr": 5e-05, "epoch": 0.4452054794520548, "percentage": 4.45, "elapsed_time": "0:40:13", "remaining_time": "14:23:11"}
131
+ {"current_steps": 131, "total_steps": 2920, "loss": 1.1103, "lr": 5e-05, "epoch": 0.4486301369863014, "percentage": 4.49, "elapsed_time": "0:40:31", "remaining_time": "14:22:51"}
132
+ {"current_steps": 132, "total_steps": 2920, "loss": 1.1264, "lr": 5e-05, "epoch": 0.4520547945205479, "percentage": 4.52, "elapsed_time": "0:40:50", "remaining_time": "14:22:30"}
133
+ {"current_steps": 133, "total_steps": 2920, "loss": 1.1154, "lr": 5e-05, "epoch": 0.4554794520547945, "percentage": 4.55, "elapsed_time": "0:41:08", "remaining_time": "14:22:10"}
134
+ {"current_steps": 134, "total_steps": 2920, "loss": 1.0984, "lr": 5e-05, "epoch": 0.4589041095890411, "percentage": 4.59, "elapsed_time": "0:41:27", "remaining_time": "14:21:49"}
135
+ {"current_steps": 135, "total_steps": 2920, "loss": 1.1134, "lr": 5e-05, "epoch": 0.4623287671232877, "percentage": 4.62, "elapsed_time": "0:41:45", "remaining_time": "14:21:29"}
136
+ {"current_steps": 136, "total_steps": 2920, "loss": 1.1195, "lr": 5e-05, "epoch": 0.4657534246575342, "percentage": 4.66, "elapsed_time": "0:42:04", "remaining_time": "14:21:09"}
137
+ {"current_steps": 137, "total_steps": 2920, "loss": 1.1035, "lr": 5e-05, "epoch": 0.4691780821917808, "percentage": 4.69, "elapsed_time": "0:42:22", "remaining_time": "14:20:50"}
138
+ {"current_steps": 138, "total_steps": 2920, "loss": 1.1132, "lr": 5e-05, "epoch": 0.4726027397260274, "percentage": 4.73, "elapsed_time": "0:42:41", "remaining_time": "14:20:31"}
139
+ {"current_steps": 139, "total_steps": 2920, "loss": 1.1077, "lr": 5e-05, "epoch": 0.476027397260274, "percentage": 4.76, "elapsed_time": "0:42:59", "remaining_time": "14:20:11"}
140
+ {"current_steps": 140, "total_steps": 2920, "loss": 1.1238, "lr": 5e-05, "epoch": 0.4794520547945205, "percentage": 4.79, "elapsed_time": "0:43:18", "remaining_time": "14:19:51"}
141
+ {"current_steps": 141, "total_steps": 2920, "loss": 1.1422, "lr": 5e-05, "epoch": 0.4828767123287671, "percentage": 4.83, "elapsed_time": "0:43:36", "remaining_time": "14:19:32"}
142
+ {"current_steps": 142, "total_steps": 2920, "loss": 1.1256, "lr": 5e-05, "epoch": 0.4863013698630137, "percentage": 4.86, "elapsed_time": "0:43:55", "remaining_time": "14:19:13"}
143
+ {"current_steps": 143, "total_steps": 2920, "loss": 1.1296, "lr": 5e-05, "epoch": 0.4897260273972603, "percentage": 4.9, "elapsed_time": "0:44:13", "remaining_time": "14:18:54"}
144
+ {"current_steps": 144, "total_steps": 2920, "loss": 1.128, "lr": 5e-05, "epoch": 0.4931506849315068, "percentage": 4.93, "elapsed_time": "0:44:32", "remaining_time": "14:18:34"}
145
+ {"current_steps": 145, "total_steps": 2920, "loss": 1.0962, "lr": 5e-05, "epoch": 0.4965753424657534, "percentage": 4.97, "elapsed_time": "0:44:50", "remaining_time": "14:18:15"}
146
+ {"current_steps": 146, "total_steps": 2920, "loss": 1.1024, "lr": 5e-05, "epoch": 0.5, "percentage": 5.0, "elapsed_time": "0:45:09", "remaining_time": "14:17:55"}
147
+ {"current_steps": 147, "total_steps": 2920, "loss": 1.1218, "lr": 5e-05, "epoch": 0.5034246575342466, "percentage": 5.03, "elapsed_time": "0:45:27", "remaining_time": "14:17:36"}
148
+ {"current_steps": 148, "total_steps": 2920, "loss": 1.0957, "lr": 5e-05, "epoch": 0.5068493150684932, "percentage": 5.07, "elapsed_time": "0:45:46", "remaining_time": "14:17:15"}
149
+ {"current_steps": 149, "total_steps": 2920, "loss": 1.0843, "lr": 5e-05, "epoch": 0.5102739726027398, "percentage": 5.1, "elapsed_time": "0:46:04", "remaining_time": "14:16:56"}
150
+ {"current_steps": 150, "total_steps": 2920, "loss": 1.1122, "lr": 5e-05, "epoch": 0.5136986301369864, "percentage": 5.14, "elapsed_time": "0:46:23", "remaining_time": "14:16:36"}
151
+ {"current_steps": 151, "total_steps": 2920, "loss": 1.0908, "lr": 5e-05, "epoch": 0.5171232876712328, "percentage": 5.17, "elapsed_time": "0:46:48", "remaining_time": "14:18:19"}
152
+ {"current_steps": 152, "total_steps": 2920, "loss": 1.1186, "lr": 5e-05, "epoch": 0.5205479452054794, "percentage": 5.21, "elapsed_time": "0:47:06", "remaining_time": "14:17:58"}
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68a67ec30ff5cb47be6a276e82e1bf59098cbbf4681a58e2342e0425186d8490
3
+ size 7608
vocab.json ADDED
The diff for this file is too large to render. See raw diff