Zachary1150 commited on
Commit
0f1a3f1
·
verified ·
1 Parent(s): ce18f9e

Training in progress, step 5

Browse files
README.md CHANGED
@@ -29,7 +29,7 @@ print(output["generated_text"])
29
 
30
  ## Training procedure
31
 
32
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/zhikaili/1B-Single-Reward/runs/uyipfzwn)
33
 
34
 
35
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
 
29
 
30
  ## Training procedure
31
 
32
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/zhikaili/1B-Single-Reward/runs/vawxvuhs)
33
 
34
 
35
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
adapter_config.json CHANGED
@@ -26,12 +26,12 @@
26
  "revision": null,
27
  "target_modules": [
28
  "v_proj",
29
- "q_proj",
30
- "down_proj",
31
- "k_proj",
32
  "gate_proj",
 
 
 
33
  "up_proj",
34
- "o_proj"
35
  ],
36
  "task_type": "CAUSAL_LM",
37
  "trainable_token_indices": null,
 
26
  "revision": null,
27
  "target_modules": [
28
  "v_proj",
 
 
 
29
  "gate_proj",
30
+ "o_proj",
31
+ "k_proj",
32
+ "down_proj",
33
  "up_proj",
34
+ "q_proj"
35
  ],
36
  "task_type": "CAUSAL_LM",
37
  "trainable_token_indices": null,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84e669cec34676fcd17dcfd2c7fb3f8dbeeac5df85c7828e47c450a4b411fb9a
3
  size 73911504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db588b63ef37a030252cac192e7881b3639fefa7f795a98f8751abdb35c48688
3
  size 73911504
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.019014376905397513,
4
- "train_runtime": 5087.0078,
5
  "train_samples": 6999,
6
- "train_samples_per_second": 1.887,
7
- "train_steps_per_second": 0.02
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.07624867705628276,
4
+ "train_runtime": 5165.8204,
5
  "train_samples": 6999,
6
+ "train_samples_per_second": 1.858,
7
+ "train_steps_per_second": 0.019
8
  }
config.json CHANGED
@@ -22,7 +22,7 @@
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
  "transformers_version": "4.52.3",
25
- "use_cache": true,
26
  "use_mrope": false,
27
  "use_sliding_window": false,
28
  "vocab_size": 151936
 
22
  "tie_word_embeddings": false,
23
  "torch_dtype": "bfloat16",
24
  "transformers_version": "4.52.3",
25
+ "use_cache": false,
26
  "use_mrope": false,
27
  "use_sliding_window": false,
28
  "vocab_size": 151936
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d9a9c1d5e6231293aef840d0f29598566b3353f7e658e302714b86200874097
3
+ size 3554214752
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.019014376905397513,
4
- "train_runtime": 5087.0078,
5
  "train_samples": 6999,
6
- "train_samples_per_second": 1.887,
7
- "train_steps_per_second": 0.02
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 0.07624867705628276,
4
+ "train_runtime": 5165.8204,
5
  "train_samples": 6999,
6
+ "train_samples_per_second": 1.858,
7
+ "train_steps_per_second": 0.019
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c94f70d5b96c7b033961119295c30246adf26a0f8115824db15199b4e21a8625
3
  size 8696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6337f2c47311f2ef979bfc37d70c67755f0929eab44160efb6b6c707e1221f46
3
  size 8696