NTQuoc commited on
Commit
8b52b51
·
verified ·
1 Parent(s): f481f67

Model save

Browse files
README.md CHANGED
@@ -1,11 +1,9 @@
1
  ---
2
  base_model: Qwen/Qwen3.5-0.8B
3
- datasets: knoveleng/open-rs
4
  library_name: transformers
5
  model_name: OpenRS-GRPO
6
  tags:
7
  - generated_from_trainer
8
- - open-r1
9
  - trl
10
  - grpo
11
  licence: license
@@ -13,7 +11,7 @@ licence: license
13
 
14
  # Model Card for OpenRS-GRPO
15
 
16
- This model is a fine-tuned version of [Qwen/Qwen3.5-0.8B](https://huggingface.co/Qwen/Qwen3.5-0.8B) on the [knoveleng/open-rs](https://huggingface.co/datasets/knoveleng/open-rs) dataset.
17
  It has been trained using [TRL](https://github.com/huggingface/trl).
18
 
19
  ## Quick start
 
1
  ---
2
  base_model: Qwen/Qwen3.5-0.8B
 
3
  library_name: transformers
4
  model_name: OpenRS-GRPO
5
  tags:
6
  - generated_from_trainer
 
7
  - trl
8
  - grpo
9
  licence: license
 
11
 
12
  # Model Card for OpenRS-GRPO
13
 
14
+ This model is a fine-tuned version of [Qwen/Qwen3.5-0.8B](https://huggingface.co/Qwen/Qwen3.5-0.8B).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.002416267991065979,
4
- "train_runtime": 474.1897,
5
  "train_samples": 7000,
6
- "train_samples_per_second": 0.051,
7
  "train_steps_per_second": 0.002
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": -2.682209014892578e-07,
4
+ "train_runtime": 481.1453,
5
  "train_samples": 7000,
6
+ "train_samples_per_second": 0.033,
7
  "train_steps_per_second": 0.002
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.002416267991065979,
4
- "train_runtime": 474.1897,
5
  "train_samples": 7000,
6
- "train_samples_per_second": 0.051,
7
  "train_steps_per_second": 0.002
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": -2.682209014892578e-07,
4
+ "train_runtime": 481.1453,
5
  "train_samples": 7000,
6
+ "train_samples_per_second": 0.033,
7
  "train_steps_per_second": 0.002
8
  }
trainer_state.json CHANGED
@@ -11,14 +11,14 @@
11
  "log_history": [
12
  {
13
  "clip_ratio": 0.0,
14
- "completion_length": 499.33333587646484,
15
  "epoch": 0.0005714285714285715,
16
  "kl": 0.0,
17
  "learning_rate": 0.0,
18
- "loss": 0.002416267991065979,
19
- "reward": -0.8758379369974136,
20
- "reward_std": 0.15247419755905867,
21
- "rewards/cosine_scaled_reward": -0.4379189759492874,
22
  "rewards/format_reward": 0.0,
23
  "step": 1
24
  },
@@ -26,9 +26,9 @@
26
  "epoch": 0.0005714285714285715,
27
  "step": 1,
28
  "total_flos": 0.0,
29
- "train_loss": 0.002416267991065979,
30
- "train_runtime": 474.1897,
31
- "train_samples_per_second": 0.051,
32
  "train_steps_per_second": 0.002
33
  }
34
  ],
@@ -50,7 +50,7 @@
50
  }
51
  },
52
  "total_flos": 0.0,
53
- "train_batch_size": 3,
54
  "trial_name": null,
55
  "trial_params": null
56
  }
 
11
  "log_history": [
12
  {
13
  "clip_ratio": 0.0,
14
+ "completion_length": 512.0,
15
  "epoch": 0.0005714285714285715,
16
  "kl": 0.0,
17
  "learning_rate": 0.0,
18
+ "loss": -2.682209014892578e-07,
19
+ "reward": -0.866689532995224,
20
+ "reward_std": 0.22406927682459354,
21
+ "rewards/cosine_scaled_reward": -0.4333447590470314,
22
  "rewards/format_reward": 0.0,
23
  "step": 1
24
  },
 
26
  "epoch": 0.0005714285714285715,
27
  "step": 1,
28
  "total_flos": 0.0,
29
+ "train_loss": -2.682209014892578e-07,
30
+ "train_runtime": 481.1453,
31
+ "train_samples_per_second": 0.033,
32
  "train_steps_per_second": 0.002
33
  }
34
  ],
 
50
  }
51
  },
52
  "total_flos": 0.0,
53
+ "train_batch_size": 2,
54
  "trial_name": null,
55
  "trial_params": null
56
  }
training_metrics.txt CHANGED
@@ -1,6 +1,6 @@
1
  total_size_before (MB): 1455.72
2
  total_size_after (MB): 1445.40
3
- total_time (seconds): 480.72
4
- ram_peak (MB): 4001.29
5
- ram_consump (MB): 1477.27
6
  disk_storage (MB): 575.25
 
1
  total_size_before (MB): 1455.72
2
  total_size_after (MB): 1445.40
3
+ total_time (seconds): 487.71
4
+ ram_peak (MB): 3477.81
5
+ ram_consump (MB): 1477.23
6
  disk_storage (MB): 575.25