SKNahin commited on
Commit
fb989ce
·
verified ·
1 Parent(s): b74a9d7

Training in progress, step 50

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151643,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 896,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 4864,
12
+ "max_position_embeddings": 131072,
13
+ "max_window_layers": 24,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 14,
16
+ "num_hidden_layers": 24,
17
+ "num_key_value_heads": 2,
18
+ "rms_norm_eps": 1e-06,
19
+ "rope_scaling": null,
20
+ "rope_theta": 1000000.0,
21
+ "sliding_window": null,
22
+ "tie_word_embeddings": true,
23
+ "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.51.2",
25
+ "use_cache": false,
26
+ "use_sliding_window": false,
27
+ "vocab_size": 194498
28
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3742d6597fbd10d5734b76ff214315b8c5a5ed0e9898f3efe7fa41be5f1c5ad9
3
+ size 1064369000
special_tokens_map.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "eos_token": {
7
+ "content": "<|im_end|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "pad_token": {
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ }
20
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8ec123df4ff5fd027294ce9931a2709a061268c399250056437627ea20e304f
3
+ size 27868597
tokenizer_config.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "11370": {
5
+ "content": "/************************************************************************",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "16395": {
13
+ "content": "%%%%%%%%%%%%%%%%",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "33009": {
21
+ "content": "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "65080": {
29
+ "content": "//************************************************************************",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "151643": {
37
+ "content": "<|endoftext|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "151644": {
45
+ "content": "<|im_start|>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "151645": {
53
+ "content": "<|im_end|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ }
60
+ },
61
+ "additional_special_tokens": [
62
+ "<|im_start|>",
63
+ "<|im_end|>"
64
+ ],
65
+ "bos_token": null,
66
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are BanglaQwen, developed by AI4BD. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within /************************************************************************//************************************************************************ XML tags:\\n/************************************************************************\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n//************************************************************************<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are BanglaQwen, developed by AI4BD. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n/************************************************************************\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n//************************************************************************' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n%%%%%%%%%%%%%%%%\\n' }}\n {{- message.content }}\n {{- '\\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
67
+ "clean_up_tokenization_spaces": false,
68
+ "eos_token": "<|im_end|>",
69
+ "errors": "replace",
70
+ "extra_special_tokens": {},
71
+ "model_max_length": 4096,
72
+ "pad_token": "<|endoftext|>",
73
+ "padding_side": "right",
74
+ "split_special_tokens": false,
75
+ "tokenizer_class": "Qwen2Tokenizer",
76
+ "unk_token": null
77
+ }
trainer_log.jsonl ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 1, "total_steps": 166, "loss": 2.3086, "lr": 1e-06, "epoch": 0.011976047904191617, "percentage": 0.6, "elapsed_time": "0:00:28", "remaining_time": "1:18:05"}
2
+ {"current_steps": 2, "total_steps": 166, "loss": 2.3322, "lr": 1e-06, "epoch": 0.023952095808383235, "percentage": 1.2, "elapsed_time": "0:00:45", "remaining_time": "1:01:45"}
3
+ {"current_steps": 3, "total_steps": 166, "loss": 2.3189, "lr": 1e-06, "epoch": 0.03592814371257485, "percentage": 1.81, "elapsed_time": "0:01:02", "remaining_time": "0:56:13"}
4
+ {"current_steps": 4, "total_steps": 166, "loss": 2.3221, "lr": 1e-06, "epoch": 0.04790419161676647, "percentage": 2.41, "elapsed_time": "0:01:19", "remaining_time": "0:53:25"}
5
+ {"current_steps": 5, "total_steps": 166, "loss": 2.3203, "lr": 1e-06, "epoch": 0.059880239520958084, "percentage": 3.01, "elapsed_time": "0:01:36", "remaining_time": "0:51:45"}
6
+ {"current_steps": 6, "total_steps": 166, "loss": 2.3189, "lr": 1e-06, "epoch": 0.0718562874251497, "percentage": 3.61, "elapsed_time": "0:01:53", "remaining_time": "0:50:34"}
7
+ {"current_steps": 7, "total_steps": 166, "loss": 2.314, "lr": 1e-06, "epoch": 0.08383233532934131, "percentage": 4.22, "elapsed_time": "0:02:11", "remaining_time": "0:49:39"}
8
+ {"current_steps": 8, "total_steps": 166, "loss": 2.3104, "lr": 1e-06, "epoch": 0.09580838323353294, "percentage": 4.82, "elapsed_time": "0:02:28", "remaining_time": "0:48:55"}
9
+ {"current_steps": 9, "total_steps": 166, "loss": 2.3067, "lr": 1e-06, "epoch": 0.10778443113772455, "percentage": 5.42, "elapsed_time": "0:02:46", "remaining_time": "0:48:17"}
10
+ {"current_steps": 10, "total_steps": 166, "loss": 2.3168, "lr": 1e-06, "epoch": 0.11976047904191617, "percentage": 6.02, "elapsed_time": "0:03:03", "remaining_time": "0:47:42"}
11
+ {"current_steps": 11, "total_steps": 166, "loss": 2.322, "lr": 1e-06, "epoch": 0.1317365269461078, "percentage": 6.63, "elapsed_time": "0:03:20", "remaining_time": "0:47:11"}
12
+ {"current_steps": 12, "total_steps": 166, "loss": 2.321, "lr": 1e-06, "epoch": 0.1437125748502994, "percentage": 7.23, "elapsed_time": "0:03:38", "remaining_time": "0:46:43"}
13
+ {"current_steps": 13, "total_steps": 166, "loss": 2.312, "lr": 1e-06, "epoch": 0.15568862275449102, "percentage": 7.83, "elapsed_time": "0:03:55", "remaining_time": "0:46:16"}
14
+ {"current_steps": 14, "total_steps": 166, "loss": 2.303, "lr": 1e-06, "epoch": 0.16766467065868262, "percentage": 8.43, "elapsed_time": "0:04:13", "remaining_time": "0:45:51"}
15
+ {"current_steps": 15, "total_steps": 166, "loss": 2.3105, "lr": 1e-06, "epoch": 0.17964071856287425, "percentage": 9.04, "elapsed_time": "0:04:30", "remaining_time": "0:45:27"}
16
+ {"current_steps": 16, "total_steps": 166, "loss": 2.2994, "lr": 1e-06, "epoch": 0.19161676646706588, "percentage": 9.64, "elapsed_time": "0:04:48", "remaining_time": "0:45:03"}
17
+ {"current_steps": 17, "total_steps": 166, "loss": 2.3085, "lr": 1e-06, "epoch": 0.20359281437125748, "percentage": 10.24, "elapsed_time": "0:05:05", "remaining_time": "0:44:41"}
18
+ {"current_steps": 18, "total_steps": 166, "loss": 2.3006, "lr": 1e-06, "epoch": 0.2155688622754491, "percentage": 10.84, "elapsed_time": "0:05:23", "remaining_time": "0:44:18"}
19
+ {"current_steps": 19, "total_steps": 166, "loss": 2.2872, "lr": 1e-06, "epoch": 0.2275449101796407, "percentage": 11.45, "elapsed_time": "0:05:40", "remaining_time": "0:43:57"}
20
+ {"current_steps": 20, "total_steps": 166, "loss": 2.3008, "lr": 1e-06, "epoch": 0.23952095808383234, "percentage": 12.05, "elapsed_time": "0:05:58", "remaining_time": "0:43:35"}
21
+ {"current_steps": 21, "total_steps": 166, "loss": 2.2948, "lr": 1e-06, "epoch": 0.25149700598802394, "percentage": 12.65, "elapsed_time": "0:06:15", "remaining_time": "0:43:15"}
22
+ {"current_steps": 22, "total_steps": 166, "loss": 2.2899, "lr": 1e-06, "epoch": 0.2634730538922156, "percentage": 13.25, "elapsed_time": "0:06:33", "remaining_time": "0:42:54"}
23
+ {"current_steps": 23, "total_steps": 166, "loss": 2.2967, "lr": 1e-06, "epoch": 0.2754491017964072, "percentage": 13.86, "elapsed_time": "0:06:50", "remaining_time": "0:42:34"}
24
+ {"current_steps": 24, "total_steps": 166, "loss": 2.2969, "lr": 1e-06, "epoch": 0.2874251497005988, "percentage": 14.46, "elapsed_time": "0:07:08", "remaining_time": "0:42:13"}
25
+ {"current_steps": 25, "total_steps": 166, "loss": 2.3023, "lr": 1e-06, "epoch": 0.2994011976047904, "percentage": 15.06, "elapsed_time": "0:07:25", "remaining_time": "0:41:54"}
26
+ {"current_steps": 26, "total_steps": 166, "loss": 2.2952, "lr": 1e-06, "epoch": 0.31137724550898205, "percentage": 15.66, "elapsed_time": "0:07:43", "remaining_time": "0:41:34"}
27
+ {"current_steps": 27, "total_steps": 166, "loss": 2.286, "lr": 1e-06, "epoch": 0.32335329341317365, "percentage": 16.27, "elapsed_time": "0:08:00", "remaining_time": "0:41:15"}
28
+ {"current_steps": 28, "total_steps": 166, "loss": 2.2964, "lr": 1e-06, "epoch": 0.33532934131736525, "percentage": 16.87, "elapsed_time": "0:08:18", "remaining_time": "0:40:56"}
29
+ {"current_steps": 29, "total_steps": 166, "loss": 2.2871, "lr": 1e-06, "epoch": 0.3473053892215569, "percentage": 17.47, "elapsed_time": "0:08:35", "remaining_time": "0:40:37"}
30
+ {"current_steps": 30, "total_steps": 166, "loss": 2.3018, "lr": 1e-06, "epoch": 0.3592814371257485, "percentage": 18.07, "elapsed_time": "0:08:53", "remaining_time": "0:40:17"}
31
+ {"current_steps": 31, "total_steps": 166, "loss": 2.2784, "lr": 1e-06, "epoch": 0.3712574850299401, "percentage": 18.67, "elapsed_time": "0:09:10", "remaining_time": "0:39:59"}
32
+ {"current_steps": 32, "total_steps": 166, "loss": 2.2764, "lr": 1e-06, "epoch": 0.38323353293413176, "percentage": 19.28, "elapsed_time": "0:09:28", "remaining_time": "0:39:40"}
33
+ {"current_steps": 33, "total_steps": 166, "loss": 2.2706, "lr": 1e-06, "epoch": 0.39520958083832336, "percentage": 19.88, "elapsed_time": "0:09:45", "remaining_time": "0:39:21"}
34
+ {"current_steps": 34, "total_steps": 166, "loss": 2.2593, "lr": 1e-06, "epoch": 0.40718562874251496, "percentage": 20.48, "elapsed_time": "0:10:03", "remaining_time": "0:39:02"}
35
+ {"current_steps": 35, "total_steps": 166, "loss": 2.2705, "lr": 1e-06, "epoch": 0.41916167664670656, "percentage": 21.08, "elapsed_time": "0:10:20", "remaining_time": "0:38:44"}
36
+ {"current_steps": 36, "total_steps": 166, "loss": 2.2708, "lr": 1e-06, "epoch": 0.4311377245508982, "percentage": 21.69, "elapsed_time": "0:10:38", "remaining_time": "0:38:25"}
37
+ {"current_steps": 37, "total_steps": 166, "loss": 2.2636, "lr": 1e-06, "epoch": 0.4431137724550898, "percentage": 22.29, "elapsed_time": "0:10:55", "remaining_time": "0:38:06"}
38
+ {"current_steps": 38, "total_steps": 166, "loss": 2.2593, "lr": 1e-06, "epoch": 0.4550898203592814, "percentage": 22.89, "elapsed_time": "0:11:13", "remaining_time": "0:37:48"}
39
+ {"current_steps": 39, "total_steps": 166, "loss": 2.2531, "lr": 1e-06, "epoch": 0.46706586826347307, "percentage": 23.49, "elapsed_time": "0:11:30", "remaining_time": "0:37:29"}
40
+ {"current_steps": 40, "total_steps": 166, "loss": 2.2651, "lr": 1e-06, "epoch": 0.47904191616766467, "percentage": 24.1, "elapsed_time": "0:11:48", "remaining_time": "0:37:11"}
41
+ {"current_steps": 41, "total_steps": 166, "loss": 2.2721, "lr": 1e-06, "epoch": 0.49101796407185627, "percentage": 24.7, "elapsed_time": "0:12:05", "remaining_time": "0:36:53"}
42
+ {"current_steps": 42, "total_steps": 166, "loss": 2.2568, "lr": 1e-06, "epoch": 0.5029940119760479, "percentage": 25.3, "elapsed_time": "0:12:23", "remaining_time": "0:36:34"}
43
+ {"current_steps": 43, "total_steps": 166, "loss": 2.2504, "lr": 1e-06, "epoch": 0.5149700598802395, "percentage": 25.9, "elapsed_time": "0:12:40", "remaining_time": "0:36:16"}
44
+ {"current_steps": 44, "total_steps": 166, "loss": 2.2611, "lr": 1e-06, "epoch": 0.5269461077844312, "percentage": 26.51, "elapsed_time": "0:12:58", "remaining_time": "0:35:58"}
45
+ {"current_steps": 45, "total_steps": 166, "loss": 2.2637, "lr": 1e-06, "epoch": 0.5389221556886228, "percentage": 27.11, "elapsed_time": "0:13:15", "remaining_time": "0:35:40"}
46
+ {"current_steps": 46, "total_steps": 166, "loss": 2.2536, "lr": 1e-06, "epoch": 0.5508982035928144, "percentage": 27.71, "elapsed_time": "0:13:33", "remaining_time": "0:35:21"}
47
+ {"current_steps": 47, "total_steps": 166, "loss": 2.2543, "lr": 1e-06, "epoch": 0.562874251497006, "percentage": 28.31, "elapsed_time": "0:13:50", "remaining_time": "0:35:03"}
48
+ {"current_steps": 48, "total_steps": 166, "loss": 2.252, "lr": 1e-06, "epoch": 0.5748502994011976, "percentage": 28.92, "elapsed_time": "0:14:08", "remaining_time": "0:34:45"}
49
+ {"current_steps": 49, "total_steps": 166, "loss": 2.2604, "lr": 1e-06, "epoch": 0.5868263473053892, "percentage": 29.52, "elapsed_time": "0:14:25", "remaining_time": "0:34:27"}
50
+ {"current_steps": 50, "total_steps": 166, "loss": 2.2571, "lr": 1e-06, "epoch": 0.5988023952095808, "percentage": 30.12, "elapsed_time": "0:14:43", "remaining_time": "0:34:09"}
51
+ {"current_steps": 51, "total_steps": 166, "loss": 2.247, "lr": 1e-06, "epoch": 0.6107784431137725, "percentage": 30.72, "elapsed_time": "0:15:07", "remaining_time": "0:34:06"}
52
+ {"current_steps": 52, "total_steps": 166, "loss": 2.2607, "lr": 1e-06, "epoch": 0.6227544910179641, "percentage": 31.33, "elapsed_time": "0:15:25", "remaining_time": "0:33:48"}
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a948de6f187071e8f2ca072dd444d9f4f3bca449376a9daf6e78ed345298a3b
3
+ size 7608
vocab.json ADDED
The diff for this file is too large to render. See raw diff