leonMW commited on
Commit
39c109c
·
verified ·
1 Parent(s): 20cfb3d

Training in progress, step 100

Browse files
README.md CHANGED
@@ -4,8 +4,8 @@ library_name: transformers
4
  model_name: DeepSeek-R1-Distill-Qwen-14B-GSPO-Basic
5
  tags:
6
  - generated_from_trainer
7
- - grpo
8
  - trl
 
9
  licence: license
10
  ---
11
 
@@ -27,18 +27,18 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/leonwenderoth-tu-darmstadt/huggingface/runs/n84a5sz9)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
34
 
35
  ### Framework versions
36
 
37
- - TRL: 0.23.0
38
- - Transformers: 4.56.1
39
- - Pytorch: 2.7.1
40
- - Datasets: 4.1.0
41
- - Tokenizers: 0.22.0
42
 
43
  ## Citations
44
 
 
4
  model_name: DeepSeek-R1-Distill-Qwen-14B-GSPO-Basic
5
  tags:
6
  - generated_from_trainer
 
7
  - trl
8
+ - grpo
9
  licence: license
10
  ---
11
 
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/leonwenderoth-tu-darmstadt/huggingface/runs/y15xxv4x)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
34
 
35
  ### Framework versions
36
 
37
+ - TRL: 0.23.1
38
+ - Transformers: 4.57.1
39
+ - Pytorch: 2.8.0
40
+ - Datasets: 4.2.0
41
+ - Tokenizers: 0.22.1
42
 
43
  ## Citations
44
 
chat_template.jinja CHANGED
@@ -1 +1 @@
1
- {% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}
 
1
+ {% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\n'}}{% endif %}
config.json CHANGED
@@ -72,7 +72,7 @@
72
  "rope_theta": 1000000.0,
73
  "sliding_window": null,
74
  "tie_word_embeddings": false,
75
- "transformers_version": "4.56.1",
76
  "use_cache": true,
77
  "use_sliding_window": false,
78
  "vocab_size": 152064
 
72
  "rope_theta": 1000000.0,
73
  "sliding_window": null,
74
  "tie_word_embeddings": false,
75
+ "transformers_version": "4.57.1",
76
  "use_cache": true,
77
  "use_sliding_window": false,
78
  "vocab_size": 152064
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 151646,
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "temperature": 0.6,
10
+ "top_p": 0.95,
11
+ "transformers_version": "4.57.1"
12
+ }
model-00001-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db273848ddbad7f3b4d231889bef2691dbf3ef82ca1d09ffa7048ddb72c656c8
3
  size 4986211280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17c6c6d7f0936a67c1732b1ef397bf67ed91b0ed6d3ed4425b23e39d15a2a960
3
  size 4986211280
model-00002-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a3ebbdc63c73c5f509e5b474c32b9449023e30deec30130fc881c5604374aa8
3
  size 4954847344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d001732d02768fbfe085fa8e901d15a826c076898e3c08a70a425e118c338702
3
  size 4954847344
model-00003-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac6d2ed7aa0d424f80af6bfbe0cda0e248df66ec6bb638229e7314440122dbd6
3
  size 4954847392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55a8684f363646527f81949511f7bc8605232ca4070d4797f95b82f23e698a4a
3
  size 4954847392
model-00004-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36ea5aebb1e9eb2cbb27f4f468de40823d645ef3220369d5f24889aaf85ab92d
3
  size 4954847392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c1f86cbebec22246340a13e35f4989a235bf617b52ab29537d396940b1f223c
3
  size 4954847392
model-00005-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64a13a4c4077344964041811ad10bf21401c71e0d1e14e49d2192933bf5280d4
3
  size 4954847392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf993d8e64c57b6de582e2110a77c7d3ad6f75059162150fd40bea5ad071b848
3
  size 4954847392
model-00006-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6270532200ac249b81ad30032e45c427d9d42606199fa8390fe97889f5201fc9
3
  size 4734533160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ee8225d5aa17056297d9b9a62871e5e3dd75cbf3a43f9afa726855b32f69a0c
3
  size 4734533160
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa2f68557d7a8730158a06000325ae16ff41f48487c41184a81ab28c40e9d032
3
- size 11665
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:504a5b1f59d9c02bb8e5bed65e0029cf5b845ca6e6aa62fcf87d1e462a7788b1
3
+ size 9553