jfang commited on
Commit
e70a0c2
·
verified ·
1 Parent(s): 478a821

Training in progress, step 10

Browse files
README.md CHANGED
@@ -4,8 +4,8 @@ library_name: transformers
4
  model_name: glv4-1v-all-linear-500sample-random
5
  tags:
6
  - generated_from_trainer
7
- - trl
8
  - grpo
 
9
  licence: license
10
  ---
11
 
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/on-planets/glm4-1v-8b/runs/oy234co0)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
 
4
  model_name: glv4-1v-all-linear-500sample-random
5
  tags:
6
  - generated_from_trainer
 
7
  - grpo
8
+ - trl
9
  licence: license
10
  ---
11
 
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/on-planets/glm4-1v-8b/runs/x9wg06f1)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
adapter_config.json CHANGED
@@ -25,17 +25,17 @@
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
 
28
  "gate_up_proj",
29
- "k_proj",
30
  "q_proj",
31
- "gate_proj",
32
  "qkv",
33
- "v_proj",
34
  "attn.proj",
35
- "merger.proj",
36
- "up_proj",
37
- "down_proj",
38
- "o_proj"
 
39
  ],
40
  "task_type": "CAUSAL_LM",
41
  "trainable_token_indices": null,
 
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
28
+ "up_proj",
29
  "gate_up_proj",
30
+ "merger.proj",
31
  "q_proj",
 
32
  "qkv",
 
33
  "attn.proj",
34
+ "gate_proj",
35
+ "k_proj",
36
+ "o_proj",
37
+ "v_proj",
38
+ "down_proj"
39
  ],
40
  "task_type": "CAUSAL_LM",
41
  "trainable_token_indices": null,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5d73296ea86b17a38f65541f5136473df01899274eae985c8cd8271670428af
3
  size 117320232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66ca24b714e78d6918189fcc45b8b94db809e951e01277ba9867521f13162441
3
  size 117320232
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:185a61227ee260ef096ee964c230d7fca7146b2982090201346ca40037bb2c0a
3
  size 6993
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad6cbdceb997e880e095b8f23c8ba5d1da3757b205edce00c92a8240a2a73018
3
  size 6993