Model save

Files changed (5) hide show

README.md CHANGED Viewed

@@ -1,11 +1,9 @@
 ---
 base_model: Qwen/Qwen3.5-0.8B
-datasets: knoveleng/open-rs
 library_name: transformers
 model_name: OpenRS-GRPO
 tags:
 - generated_from_trainer
-- open-r1
 - trl
 - grpo
 licence: license
@@ -13,7 +11,7 @@ licence: license
 # Model Card for OpenRS-GRPO
-This model is a fine-tuned version of [Qwen/Qwen3.5-0.8B](https://huggingface.co/Qwen/Qwen3.5-0.8B) on the [knoveleng/open-rs](https://huggingface.co/datasets/knoveleng/open-rs) dataset.
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start

 ---
 base_model: Qwen/Qwen3.5-0.8B
 library_name: transformers
 model_name: OpenRS-GRPO
 tags:
 - generated_from_trainer
 - trl
 - grpo
 licence: license
 # Model Card for OpenRS-GRPO
+This model is a fine-tuned version of [Qwen/Qwen3.5-0.8B](https://huggingface.co/Qwen/Qwen3.5-0.8B).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 0.002416267991065979,
-    "train_runtime": 474.1897,
     "train_samples": 7000,
-    "train_samples_per_second": 0.051,
     "train_steps_per_second": 0.002
 }

 {
     "total_flos": 0.0,
+    "train_loss": -2.682209014892578e-07,
+    "train_runtime": 481.1453,
     "train_samples": 7000,
+    "train_samples_per_second": 0.033,
     "train_steps_per_second": 0.002
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 0.002416267991065979,
-    "train_runtime": 474.1897,
     "train_samples": 7000,
-    "train_samples_per_second": 0.051,
     "train_steps_per_second": 0.002
 }

 {
     "total_flos": 0.0,
+    "train_loss": -2.682209014892578e-07,
+    "train_runtime": 481.1453,
     "train_samples": 7000,
+    "train_samples_per_second": 0.033,
     "train_steps_per_second": 0.002
 }

trainer_state.json CHANGED Viewed

@@ -11,14 +11,14 @@
   "log_history": [
     {
       "clip_ratio": 0.0,
-      "completion_length": 499.33333587646484,
       "epoch": 0.0005714285714285715,
       "kl": 0.0,
       "learning_rate": 0.0,
-      "loss": 0.002416267991065979,
-      "reward": -0.8758379369974136,
-      "reward_std": 0.15247419755905867,
-      "rewards/cosine_scaled_reward": -0.4379189759492874,
       "rewards/format_reward": 0.0,
       "step": 1
     },
@@ -26,9 +26,9 @@
       "epoch": 0.0005714285714285715,
       "step": 1,
       "total_flos": 0.0,
-      "train_loss": 0.002416267991065979,
-      "train_runtime": 474.1897,
-      "train_samples_per_second": 0.051,
       "train_steps_per_second": 0.002
     }
   ],
@@ -50,7 +50,7 @@
     }
   },
   "total_flos": 0.0,
-  "train_batch_size": 3,
   "trial_name": null,
   "trial_params": null
 }

   "log_history": [
     {
       "clip_ratio": 0.0,
+      "completion_length": 512.0,
       "epoch": 0.0005714285714285715,
       "kl": 0.0,
       "learning_rate": 0.0,
+      "loss": -2.682209014892578e-07,
+      "reward": -0.866689532995224,
+      "reward_std": 0.22406927682459354,
+      "rewards/cosine_scaled_reward": -0.4333447590470314,
       "rewards/format_reward": 0.0,
       "step": 1
     },
       "epoch": 0.0005714285714285715,
       "step": 1,
       "total_flos": 0.0,
+      "train_loss": -2.682209014892578e-07,
+      "train_runtime": 481.1453,
+      "train_samples_per_second": 0.033,
       "train_steps_per_second": 0.002
     }
   ],
     }
   },
   "total_flos": 0.0,
+  "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null
 }

training_metrics.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 total_size_before (MB): 1455.72
 total_size_after (MB): 1445.40
-total_time (seconds): 480.72
-ram_peak (MB): 4001.29
-ram_consump (MB): 1477.27
 disk_storage (MB): 575.25

 total_size_before (MB): 1455.72
 total_size_after (MB): 1445.40
+total_time (seconds): 487.71
+ram_peak (MB): 3477.81
+ram_consump (MB): 1477.23
 disk_storage (MB): 575.25