L3133625978 commited on
Commit
4bb941f
·
verified ·
1 Parent(s): 1e623e7

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. README.md +60 -0
  3. added_tokens.json +24 -0
  4. all_results.json +8 -0
  5. chat_template.jinja +54 -0
  6. checkpoint-12/added_tokens.json +24 -0
  7. checkpoint-12/chat_template.jinja +54 -0
  8. checkpoint-12/config.json +28 -0
  9. checkpoint-12/generation_config.json +14 -0
  10. checkpoint-12/global_step3/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  11. checkpoint-12/global_step3/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  12. checkpoint-12/global_step3/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  13. checkpoint-12/global_step3/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  14. checkpoint-12/global_step3/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  15. checkpoint-12/global_step3/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  16. checkpoint-12/global_step3/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  17. checkpoint-12/global_step3/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  18. checkpoint-12/global_step3/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  19. checkpoint-12/global_step3/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  20. checkpoint-12/global_step3/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  21. checkpoint-12/global_step3/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  22. checkpoint-12/global_step3/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  23. checkpoint-12/global_step3/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  24. checkpoint-12/global_step3/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  25. checkpoint-12/global_step3/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  26. checkpoint-12/global_step6/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  27. checkpoint-12/global_step6/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  28. checkpoint-12/global_step6/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  29. checkpoint-12/global_step6/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  30. checkpoint-12/global_step6/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  31. checkpoint-12/global_step6/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  32. checkpoint-12/global_step6/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  33. checkpoint-12/global_step6/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  34. checkpoint-12/global_step6/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  35. checkpoint-12/global_step6/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  36. checkpoint-12/global_step6/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  37. checkpoint-12/global_step6/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  38. checkpoint-12/global_step6/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  39. checkpoint-12/global_step6/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  40. checkpoint-12/global_step6/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  41. checkpoint-12/global_step6/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  42. checkpoint-12/latest +1 -0
  43. checkpoint-12/merges.txt +0 -0
  44. checkpoint-12/model.safetensors +3 -0
  45. checkpoint-12/rng_state_0.pth +3 -0
  46. checkpoint-12/rng_state_1.pth +3 -0
  47. checkpoint-12/rng_state_2.pth +3 -0
  48. checkpoint-12/rng_state_3.pth +3 -0
  49. checkpoint-12/rng_state_4.pth +3 -0
  50. checkpoint-12/rng_state_5.pth +3 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-12/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ license_link: https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct/blob/main/LICENSE
4
+ language:
5
+ - en
6
+ pipeline_tag: text-generation
7
+ base_model: Qwen/Qwen2.5-0.5B
8
+ tags:
9
+ - chat
10
+ library_name: transformers
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # sft
17
+
18
+ This model is a fine-tuned version of [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) on the sft_with_format dataset.
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 1e-05
38
+ - train_batch_size: 128
39
+ - eval_batch_size: 8
40
+ - seed: 42
41
+ - distributed_type: multi-GPU
42
+ - num_devices: 8
43
+ - gradient_accumulation_steps: 4
44
+ - total_train_batch_size: 4096
45
+ - total_eval_batch_size: 64
46
+ - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
47
+ - lr_scheduler_type: cosine
48
+ - lr_scheduler_warmup_ratio: 0.1
49
+ - num_epochs: 12.0
50
+
51
+ ### Training results
52
+
53
+
54
+
55
+ ### Framework versions
56
+
57
+ - Transformers 4.52.4
58
+ - Pytorch 2.6.0+cu124
59
+ - Datasets 3.3.2
60
+ - Tokenizers 0.21.1
added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 12.0,
3
+ "total_flos": 994849849344.0,
4
+ "train_loss": 0.8606694539388021,
5
+ "train_runtime": 110.8365,
6
+ "train_samples_per_second": 22.303,
7
+ "train_steps_per_second": 0.108
8
+ }
chat_template.jinja ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- for message in messages %}
22
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
23
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
24
+ {%- elif message.role == "assistant" %}
25
+ {{- '<|im_start|>' + message.role }}
26
+ {%- if message.content %}
27
+ {{- '\n' + message.content }}
28
+ {%- endif %}
29
+ {%- for tool_call in message.tool_calls %}
30
+ {%- if tool_call.function is defined %}
31
+ {%- set tool_call = tool_call.function %}
32
+ {%- endif %}
33
+ {{- '\n<tool_call>\n{"name": "' }}
34
+ {{- tool_call.name }}
35
+ {{- '", "arguments": ' }}
36
+ {{- tool_call.arguments | tojson }}
37
+ {{- '}\n</tool_call>' }}
38
+ {%- endfor %}
39
+ {{- '<|im_end|>\n' }}
40
+ {%- elif message.role == "tool" %}
41
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
42
+ {{- '<|im_start|>user' }}
43
+ {%- endif %}
44
+ {{- '\n<tool_response>\n' }}
45
+ {{- message.content }}
46
+ {{- '\n</tool_response>' }}
47
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
48
+ {{- '<|im_end|>\n' }}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {%- endfor %}
52
+ {%- if add_generation_prompt %}
53
+ {{- '<|im_start|>assistant\n' }}
54
+ {%- endif %}
checkpoint-12/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
checkpoint-12/chat_template.jinja ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- for message in messages %}
22
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
23
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
24
+ {%- elif message.role == "assistant" %}
25
+ {{- '<|im_start|>' + message.role }}
26
+ {%- if message.content %}
27
+ {{- '\n' + message.content }}
28
+ {%- endif %}
29
+ {%- for tool_call in message.tool_calls %}
30
+ {%- if tool_call.function is defined %}
31
+ {%- set tool_call = tool_call.function %}
32
+ {%- endif %}
33
+ {{- '\n<tool_call>\n{"name": "' }}
34
+ {{- tool_call.name }}
35
+ {{- '", "arguments": ' }}
36
+ {{- tool_call.arguments | tojson }}
37
+ {{- '}\n</tool_call>' }}
38
+ {%- endfor %}
39
+ {{- '<|im_end|>\n' }}
40
+ {%- elif message.role == "tool" %}
41
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
42
+ {{- '<|im_start|>user' }}
43
+ {%- endif %}
44
+ {{- '\n<tool_response>\n' }}
45
+ {{- message.content }}
46
+ {{- '\n</tool_response>' }}
47
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
48
+ {{- '<|im_end|>\n' }}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {%- endfor %}
52
+ {%- if add_generation_prompt %}
53
+ {{- '<|im_start|>assistant\n' }}
54
+ {%- endif %}
checkpoint-12/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 896,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 4864,
12
+ "max_position_embeddings": 32768,
13
+ "max_window_layers": 21,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 14,
16
+ "num_hidden_layers": 24,
17
+ "num_key_value_heads": 2,
18
+ "rms_norm_eps": 1e-06,
19
+ "rope_scaling": null,
20
+ "rope_theta": 1000000.0,
21
+ "sliding_window": 32768,
22
+ "tie_word_embeddings": true,
23
+ "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.52.4",
25
+ "use_cache": false,
26
+ "use_sliding_window": false,
27
+ "vocab_size": 151936
28
+ }
checkpoint-12/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.1,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.52.4"
14
+ }
checkpoint-12/global_step3/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf7861830679d3911d751f2b0f2d9bc6a567cdfc35ca8fda3c8696527b3b02c4
3
+ size 741054256
checkpoint-12/global_step3/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6c965ad438391ec1cd44fb70012aac42a7256403dcc761b7d63c0e418bec361
3
+ size 741054256
checkpoint-12/global_step3/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20a4bfbe071d6d7aa7508f6b8381b1b049092d2e616e0bc14ad1667327c9af3e
3
+ size 741054256
checkpoint-12/global_step3/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b027523761d3d78777122e50ce35ff83b1ed260de0938f18abdbec7bf42b5c3a
3
+ size 741054256
checkpoint-12/global_step3/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc1fcd92084124fe6430ce8624324b14b31a2f736a42213d20552fdc65fefcbe
3
+ size 741054256
checkpoint-12/global_step3/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c8ac82883f62a72435fbe8f692cbd51e977846f59dfd3837847f3b54f6b1412
3
+ size 741054256
checkpoint-12/global_step3/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4037cd32490f799eb968aa37f6aa22068d57965c80b46825f4c9ceb1ecb3d26d
3
+ size 741054256
checkpoint-12/global_step3/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3455c6015cf2827b13479c63bf38a623452e74286bc60957d9a269b1b3c1811
3
+ size 741054256
checkpoint-12/global_step3/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7a8eb9b1fde110abea562fa186ed7b5f4b4d10c775d2c24075e8a64b17043ab
3
+ size 142856
checkpoint-12/global_step3/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79b48954ae08f80b2282e303667763977876bc84fb2f68f244c2dab3cd50f88d
3
+ size 142792
checkpoint-12/global_step3/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48de1162feff575e0614e93eefebeac580e60fbb9f77039a1e6779628cddac76
3
+ size 142792
checkpoint-12/global_step3/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:923094ba9bc606c88ce22c249884bdcf01ffdea7128ca8fe5849dd0b6fc7b1e9
3
+ size 142792
checkpoint-12/global_step3/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:832faa081ec545f2f5d489247ede63360c71bb633fc4decd1fdbdb297d6b6ced
3
+ size 142792
checkpoint-12/global_step3/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbbb61cfe4ae30c298e4f030083118385832ebeee15abb51ec7edfdc24a8687f
3
+ size 142792
checkpoint-12/global_step3/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a099ccc4ca8d9a1cb81b245ab4bda7657364379771787f40658c0c90673ea7cd
3
+ size 142792
checkpoint-12/global_step3/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af38350b134c4b31f2f1311f4e073ca0d58bcb9243f6290273d6a2ab6cf98ba8
3
+ size 142792
checkpoint-12/global_step6/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66475e6c187d4b17072a2ce7de316e3bdce7a487de55d49c524599ad7767747f
3
+ size 741054256
checkpoint-12/global_step6/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:beadc02c68bdf648a3ba6905a89d5d3fb7e19457adca012f32be6751fd670868
3
+ size 741054256
checkpoint-12/global_step6/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b18f3558d8029867e455e616c645bd783c8a8724ff3eb88560e9c454fdb9767
3
+ size 741054256
checkpoint-12/global_step6/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d741e89c2179f8d79b9c5de2d2b1816b706572a42fd493dff64c70db6afcc106
3
+ size 741054256
checkpoint-12/global_step6/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a444965382d4a85b021eb996f5df174df262fec125e03b4d9a4e538adb96724c
3
+ size 741054256
checkpoint-12/global_step6/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:303b6377e6d6e3078d9b3167c69e5175f1c8b1f533a5055cb399c4c3702eed52
3
+ size 741054256
checkpoint-12/global_step6/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22eb7f5bb52e4c91a5000861e3e7c3045ad43993af574e3bcf34defb21edda3d
3
+ size 741054256
checkpoint-12/global_step6/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:186f8305ba79c822093859a003da0ae427ad17d8cb7ebec955c322f03aa1d348
3
+ size 741054256
checkpoint-12/global_step6/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2306cd10ee2fde9a4e6bb94daa83624b598b92bc8d6eb726bad832749df27f49
3
+ size 142856
checkpoint-12/global_step6/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a66fbee80a0394fa63c3da87c42ae099d397f70658c025d4d36fdcd1fc2e2095
3
+ size 142792
checkpoint-12/global_step6/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fb4165e279e1cf3f1f5da9b065e79fae614bcd1c87c9efac7019ac6d2e4d99d
3
+ size 142792
checkpoint-12/global_step6/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7975aa6064a9b3957b61378cfeb778ddb748262d9b890c3e114e7ea60c17807b
3
+ size 142792
checkpoint-12/global_step6/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ab148784c39325d1bed67f1977a7f1663885ac4f170f0cb9136c915a30f1b44
3
+ size 142792
checkpoint-12/global_step6/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd79423ed883c8fe8e60d4262c2ee21e6167f1c017994d699a5b576ea6696909
3
+ size 142792
checkpoint-12/global_step6/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:607b4a170c3fae2a2c7b29f780d1d51f2d7fbac20a7de6f6d9d0bfa3bb16ef6e
3
+ size 142792
checkpoint-12/global_step6/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c52bf3889ade86050866e64e7d0673d90b595321bffd13dc90464bcee9ea631d
3
+ size 142792
checkpoint-12/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step3
checkpoint-12/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-12/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0fa882e87061a945bc5a004cf85ad86989a9f7f0c408057cb4ed091107693c0
3
+ size 988097824
checkpoint-12/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:feb6462d333dbc5bb5e497ea9b0adb960f7616f79e6eea63222de6d5bd559516
3
+ size 15984
checkpoint-12/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b045e1bfa728f51c8b51ab0faa20b128a4fbd350da006b9b39a19e24abdf5a74
3
+ size 15984
checkpoint-12/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f76a3d058d2628a61848c2441d313f251278bd8f74ce43dc44d8cd8ad3e619a8
3
+ size 15984
checkpoint-12/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7f72fc498e6eaa671cdc0e8a627a668b8ef607063a22ddb4edbc05e791be830
3
+ size 15984
checkpoint-12/rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12889af98e175b734a788f4c5b8c4da91dd61ff3a05aaf61b9d4c66aa3dd8ad6
3
+ size 15984
checkpoint-12/rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe21a86abfceeac2cf2f48afd61a9a506cf61a287f3403f1adf391bb2ffa5a83
3
+ size 15984