bfilar commited on
Commit
0f59cd7
·
verified ·
1 Parent(s): d6684fa

Training in progress, epoch 1

Browse files
README.md CHANGED
@@ -4,7 +4,7 @@ library_name: transformers
4
  model_name: mql-finetune
5
  tags:
6
  - generated_from_trainer
7
- - grpo
8
  - trl
9
  licence: license
10
  ---
@@ -31,7 +31,7 @@ print(output["generated_text"])
31
 
32
 
33
 
34
- This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
35
 
36
  ### Framework versions
37
 
@@ -43,16 +43,7 @@ This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing
43
 
44
  ## Citations
45
 
46
- Cite GRPO as:
47
 
48
- ```bibtex
49
- @article{shao2024deepseekmath,
50
- title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}},
51
- author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo},
52
- year = 2024,
53
- eprint = {arXiv:2402.03300},
54
- }
55
- ```
56
 
57
  Cite TRL as:
58
 
 
4
  model_name: mql-finetune
5
  tags:
6
  - generated_from_trainer
7
+ - sft
8
  - trl
9
  licence: license
10
  ---
 
31
 
32
 
33
 
34
+ This model was trained with SFT.
35
 
36
  ### Framework versions
37
 
 
43
 
44
  ## Citations
45
 
 
46
 
 
 
 
 
 
 
 
 
47
 
48
  Cite TRL as:
49
 
adapter_config.json CHANGED
@@ -30,12 +30,12 @@
30
  "rank_pattern": {},
31
  "revision": null,
32
  "target_modules": [
33
- "gate_proj",
34
  "o_proj",
35
- "up_proj",
36
  "down_proj",
37
- "k_proj",
38
  "v_proj",
 
 
 
39
  "q_proj"
40
  ],
41
  "target_parameters": null,
 
30
  "rank_pattern": {},
31
  "revision": null,
32
  "target_modules": [
 
33
  "o_proj",
 
34
  "down_proj",
 
35
  "v_proj",
36
+ "gate_proj",
37
+ "up_proj",
38
+ "k_proj",
39
  "q_proj"
40
  ],
41
  "target_parameters": null,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60d95b10b6e140a9626a7058d5038528f2ff80148dc4569b881db56052046509
3
- size 40
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9517e917b2cf5d12b013f431f8eda9893d620fd61d908335df57774106e6101
3
+ size 66127776
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b467ffd0aa71bdef98f0a1f0424e340fb630c605dfec4d209d46d919feb0e3b
3
- size 7121
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aa382ae7bb50f31fa3d152a9426d770fc36b2d3eaf859bfef058ffaaff5ffd2
3
+ size 5713