NTQuoc commited on
Commit
e104d31
·
verified ·
1 Parent(s): d18fcf4

Model save

Browse files
README.md CHANGED
@@ -1,11 +1,9 @@
1
  ---
2
  base_model: Qwen/Qwen3.5-0.8B
3
- datasets: knoveleng/open-rs
4
  library_name: transformers
5
  model_name: OpenRS-GRPO
6
  tags:
7
  - generated_from_trainer
8
- - open-r1
9
  - trl
10
  - grpo
11
  licence: license
@@ -13,7 +11,7 @@ licence: license
13
 
14
  # Model Card for OpenRS-GRPO
15
 
16
- This model is a fine-tuned version of [Qwen/Qwen3.5-0.8B](https://huggingface.co/Qwen/Qwen3.5-0.8B) on the [knoveleng/open-rs](https://huggingface.co/datasets/knoveleng/open-rs) dataset.
17
  It has been trained using [TRL](https://github.com/huggingface/trl).
18
 
19
  ## Quick start
 
1
  ---
2
  base_model: Qwen/Qwen3.5-0.8B
 
3
  library_name: transformers
4
  model_name: OpenRS-GRPO
5
  tags:
6
  - generated_from_trainer
 
7
  - trl
8
  - grpo
9
  licence: license
 
11
 
12
  # Model Card for OpenRS-GRPO
13
 
14
+ This model is a fine-tuned version of [Qwen/Qwen3.5-0.8B](https://huggingface.co/Qwen/Qwen3.5-0.8B).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": -2.682209014892578e-07,
4
- "train_runtime": 481.1453,
5
  "train_samples": 7000,
6
- "train_samples_per_second": 0.033,
7
  "train_steps_per_second": 0.002
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 4.023313522338867e-07,
4
+ "train_runtime": 537.7965,
5
  "train_samples": 7000,
6
+ "train_samples_per_second": 0.015,
7
  "train_steps_per_second": 0.002
8
  }
step_metrics.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ step,epoch,loss,learning_rate,grad_norm,rewards/format_reward,rewards/cosine_scaled_reward,reward,reward_std,gpu_mem_alloc_mb,gpu_mem_peak_mb,step_time_sec
2
+ 1,0.0006,4.023313522338867e-07,0.0,,0.0,-0.40046167373657227,-0.8009233474731445,0.2397190211340785,1549.0,3351.1,533.87
3
+ 1,0.0006,,,,,,,,1549.0,3351.1,537.79
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": -2.682209014892578e-07,
4
- "train_runtime": 481.1453,
5
  "train_samples": 7000,
6
- "train_samples_per_second": 0.033,
7
  "train_steps_per_second": 0.002
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 4.023313522338867e-07,
4
+ "train_runtime": 537.7965,
5
  "train_samples": 7000,
6
+ "train_samples_per_second": 0.015,
7
  "train_steps_per_second": 0.002
8
  }
trainer_state.json CHANGED
@@ -11,14 +11,14 @@
11
  "log_history": [
12
  {
13
  "clip_ratio": 0.0,
14
- "completion_length": 512.0,
15
  "epoch": 0.0005714285714285715,
16
  "kl": 0.0,
17
  "learning_rate": 0.0,
18
- "loss": -2.682209014892578e-07,
19
- "reward": -0.866689532995224,
20
- "reward_std": 0.22406927682459354,
21
- "rewards/cosine_scaled_reward": -0.4333447590470314,
22
  "rewards/format_reward": 0.0,
23
  "step": 1
24
  },
@@ -26,9 +26,9 @@
26
  "epoch": 0.0005714285714285715,
27
  "step": 1,
28
  "total_flos": 0.0,
29
- "train_loss": -2.682209014892578e-07,
30
- "train_runtime": 481.1453,
31
- "train_samples_per_second": 0.033,
32
  "train_steps_per_second": 0.002
33
  }
34
  ],
@@ -50,7 +50,7 @@
50
  }
51
  },
52
  "total_flos": 0.0,
53
- "train_batch_size": 2,
54
  "trial_name": null,
55
  "trial_params": null
56
  }
 
11
  "log_history": [
12
  {
13
  "clip_ratio": 0.0,
14
+ "completion_length": 445.25,
15
  "epoch": 0.0005714285714285715,
16
  "kl": 0.0,
17
  "learning_rate": 0.0,
18
+ "loss": 4.023313522338867e-07,
19
+ "reward": -0.8009233474731445,
20
+ "reward_std": 0.2397190211340785,
21
+ "rewards/cosine_scaled_reward": -0.40046167373657227,
22
  "rewards/format_reward": 0.0,
23
  "step": 1
24
  },
 
26
  "epoch": 0.0005714285714285715,
27
  "step": 1,
28
  "total_flos": 0.0,
29
+ "train_loss": 4.023313522338867e-07,
30
+ "train_runtime": 537.7965,
31
+ "train_samples_per_second": 0.015,
32
  "train_steps_per_second": 0.002
33
  }
34
  ],
 
50
  }
51
  },
52
  "total_flos": 0.0,
53
+ "train_batch_size": 1,
54
  "trial_name": null,
55
  "trial_params": null
56
  }
training_metrics.txt CHANGED
@@ -1,6 +1,6 @@
1
  total_size_before (MB): 1455.72
2
  total_size_after (MB): 1445.40
3
- total_time (seconds): 487.71
4
- ram_peak (MB): 3477.81
5
- ram_consump (MB): 1477.23
6
  disk_storage (MB): 575.25
 
1
  total_size_before (MB): 1455.72
2
  total_size_after (MB): 1445.40
3
+ total_time (seconds): 544.56
4
+ ram_peak (MB): 3195.88
5
+ ram_consump (MB): 1477.19
6
  disk_storage (MB): 575.25