Model save

Files changed (6) hide show

README.md CHANGED Viewed

@@ -1,11 +1,9 @@
 ---
 base_model: Qwen/Qwen3.5-0.8B
-datasets: knoveleng/open-rs
 library_name: transformers
 model_name: OpenRS-GRPO
 tags:
 - generated_from_trainer
-- open-r1
 - trl
 - grpo
 licence: license
@@ -13,7 +11,7 @@ licence: license
 # Model Card for OpenRS-GRPO
-This model is a fine-tuned version of [Qwen/Qwen3.5-0.8B](https://huggingface.co/Qwen/Qwen3.5-0.8B) on the [knoveleng/open-rs](https://huggingface.co/datasets/knoveleng/open-rs) dataset.
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start

 ---
 base_model: Qwen/Qwen3.5-0.8B
 library_name: transformers
 model_name: OpenRS-GRPO
 tags:
 - generated_from_trainer
 - trl
 - grpo
 licence: license
 # Model Card for OpenRS-GRPO
+This model is a fine-tuned version of [Qwen/Qwen3.5-0.8B](https://huggingface.co/Qwen/Qwen3.5-0.8B).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": -2.682209014892578e-07,
-    "train_runtime": 481.1453,
     "train_samples": 7000,
-    "train_samples_per_second": 0.033,
     "train_steps_per_second": 0.002
 }

 {
     "total_flos": 0.0,
+    "train_loss": 4.023313522338867e-07,
+    "train_runtime": 537.7965,
     "train_samples": 7000,
+    "train_samples_per_second": 0.015,
     "train_steps_per_second": 0.002
 }

step_metrics.csv ADDED Viewed

+step,epoch,loss,learning_rate,grad_norm,rewards/format_reward,rewards/cosine_scaled_reward,reward,reward_std,gpu_mem_alloc_mb,gpu_mem_peak_mb,step_time_sec
+1,0.0006,4.023313522338867e-07,0.0,,0.0,-0.40046167373657227,-0.8009233474731445,0.2397190211340785,1549.0,3351.1,533.87
+1,0.0006,,,,,,,,1549.0,3351.1,537.79

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": -2.682209014892578e-07,
-    "train_runtime": 481.1453,
     "train_samples": 7000,
-    "train_samples_per_second": 0.033,
     "train_steps_per_second": 0.002
 }

 {
     "total_flos": 0.0,
+    "train_loss": 4.023313522338867e-07,
+    "train_runtime": 537.7965,
     "train_samples": 7000,
+    "train_samples_per_second": 0.015,
     "train_steps_per_second": 0.002
 }

trainer_state.json CHANGED Viewed

@@ -11,14 +11,14 @@
   "log_history": [
     {
       "clip_ratio": 0.0,
-      "completion_length": 512.0,
       "epoch": 0.0005714285714285715,
       "kl": 0.0,
       "learning_rate": 0.0,
-      "loss": -2.682209014892578e-07,
-      "reward": -0.866689532995224,
-      "reward_std": 0.22406927682459354,
-      "rewards/cosine_scaled_reward": -0.4333447590470314,
       "rewards/format_reward": 0.0,
       "step": 1
     },
@@ -26,9 +26,9 @@
       "epoch": 0.0005714285714285715,
       "step": 1,
       "total_flos": 0.0,
-      "train_loss": -2.682209014892578e-07,
-      "train_runtime": 481.1453,
-      "train_samples_per_second": 0.033,
       "train_steps_per_second": 0.002
     }
   ],
@@ -50,7 +50,7 @@
     }
   },
   "total_flos": 0.0,
-  "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null
 }

   "log_history": [
     {
       "clip_ratio": 0.0,
+      "completion_length": 445.25,
       "epoch": 0.0005714285714285715,
       "kl": 0.0,
       "learning_rate": 0.0,
+      "loss": 4.023313522338867e-07,
+      "reward": -0.8009233474731445,
+      "reward_std": 0.2397190211340785,
+      "rewards/cosine_scaled_reward": -0.40046167373657227,
       "rewards/format_reward": 0.0,
       "step": 1
     },
       "epoch": 0.0005714285714285715,
       "step": 1,
       "total_flos": 0.0,
+      "train_loss": 4.023313522338867e-07,
+      "train_runtime": 537.7965,
+      "train_samples_per_second": 0.015,
       "train_steps_per_second": 0.002
     }
   ],
     }
   },
   "total_flos": 0.0,
+  "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null
 }

training_metrics.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 total_size_before (MB): 1455.72
 total_size_after (MB): 1445.40
-total_time (seconds): 487.71
-ram_peak (MB): 3477.81
-ram_consump (MB): 1477.23
 disk_storage (MB): 575.25

 total_size_before (MB): 1455.72
 total_size_after (MB): 1445.40
+total_time (seconds): 544.56
+ram_peak (MB): 3195.88
+ram_consump (MB): 1477.19
 disk_storage (MB): 575.25