Training in progress, step 5

Files changed (9) hide show

README.md CHANGED Viewed

@@ -29,7 +29,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/zhikaili/1B-Single-Reward/runs/uyipfzwn)
 This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).

 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/zhikaili/1B-Single-Reward/runs/vawxvuhs)
 This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).

adapter_config.json CHANGED Viewed

@@ -26,12 +26,12 @@
   "revision": null,
   "target_modules": [
     "v_proj",
-    "q_proj",
-    "down_proj",
-    "k_proj",
     "gate_proj",
     "up_proj",
-    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

   "revision": null,
   "target_modules": [
     "v_proj",
     "gate_proj",
+    "o_proj",
+    "k_proj",
+    "down_proj",
     "up_proj",
+    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:84e669cec34676fcd17dcfd2c7fb3f8dbeeac5df85c7828e47c450a4b411fb9a
 size 73911504

 version https://git-lfs.github.com/spec/v1
+oid sha256:db588b63ef37a030252cac192e7881b3639fefa7f795a98f8751abdb35c48688
 size 73911504

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 0.019014376905397513,
-    "train_runtime": 5087.0078,
     "train_samples": 6999,
-    "train_samples_per_second": 1.887,
-    "train_steps_per_second": 0.02
 }

 {
     "total_flos": 0.0,
+    "train_loss": 0.07624867705628276,
+    "train_runtime": 5165.8204,
     "train_samples": 6999,
+    "train_samples_per_second": 1.858,
+    "train_steps_per_second": 0.019
 }

config.json CHANGED Viewed

@@ -22,7 +22,7 @@
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.52.3",
-  "use_cache": true,
   "use_mrope": false,
   "use_sliding_window": false,
   "vocab_size": 151936

   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.52.3",
+  "use_cache": false,
   "use_mrope": false,
   "use_sliding_window": false,
   "vocab_size": 151936

model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d9a9c1d5e6231293aef840d0f29598566b3353f7e658e302714b86200874097
+size 3554214752

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 0.019014376905397513,
-    "train_runtime": 5087.0078,
     "train_samples": 6999,
-    "train_samples_per_second": 1.887,
-    "train_steps_per_second": 0.02
 }

 {
     "total_flos": 0.0,
+    "train_loss": 0.07624867705628276,
+    "train_runtime": 5165.8204,
     "train_samples": 6999,
+    "train_samples_per_second": 1.858,
+    "train_steps_per_second": 0.019
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c94f70d5b96c7b033961119295c30246adf26a0f8115824db15199b4e21a8625
 size 8696

 version https://git-lfs.github.com/spec/v1
+oid sha256:6337f2c47311f2ef979bfc37d70c67755f0929eab44160efb6b6c707e1221f46
 size 8696