nileshmalpeddi commited on
Commit
69de038
·
verified ·
1 Parent(s): c214259

End of training

Browse files
Files changed (2) hide show
  1. README.md +2 -1
  2. trainer_state.json +55 -0
README.md CHANGED
@@ -1,4 +1,5 @@
1
  ---
 
2
  library_name: transformers
3
  model_name: ppo
4
  tags:
@@ -8,7 +9,7 @@ licence: license
8
 
9
  # Model Card for ppo
10
 
11
- This model is a fine-tuned version of [None](https://huggingface.co/None).
12
  It has been trained using [TRL](https://github.com/huggingface/trl).
13
 
14
  ## Quick start
 
1
  ---
2
+ datasets: trl-internal-testing/descriptiveness-sentiment-trl-style
3
  library_name: transformers
4
  model_name: ppo
5
  tags:
 
9
 
10
  # Model Card for ppo
11
 
12
+ This model is a fine-tuned version of [None](https://huggingface.co/None) on the [trl-internal-testing/descriptiveness-sentiment-trl-style](https://huggingface.co/datasets/trl-internal-testing/descriptiveness-sentiment-trl-style) dataset.
13
  It has been trained using [TRL](https://github.com/huggingface/trl).
14
 
15
  ## Quick start
trainer_state.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "episode": 2,
5
+ "epoch": 6.55694708543702e-05,
6
+ "eval_steps": 500,
7
+ "global_step": 1,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "episode": 2,
14
+ "epoch": 6.55694708543702e-05,
15
+ "eps": 0,
16
+ "loss/policy_avg": 0.21204827725887299,
17
+ "loss/value_avg": 33.20888137817383,
18
+ "lr": 3e-06,
19
+ "objective/entropy": 70.37702941894531,
20
+ "objective/kl": 427.1629638671875,
21
+ "objective/non_score_reward": -21.3581485748291,
22
+ "objective/rlhf_reward": -17.6393985748291,
23
+ "objective/scores": 3.71875,
24
+ "policy/approxkl_avg": 0.31922289729118347,
25
+ "policy/clipfrac_avg": 0.1320754736661911,
26
+ "policy/entropy_avg": 1.321390151977539,
27
+ "step": 1,
28
+ "val/clipfrac_avg": 0.0,
29
+ "val/num_eos_tokens": 1,
30
+ "val/ratio": 1.5207229852676392,
31
+ "val/ratio_var": 0.5327765941619873
32
+ }
33
+ ],
34
+ "logging_steps": 500,
35
+ "max_steps": 1,
36
+ "num_input_tokens_seen": 0,
37
+ "num_train_epochs": 3.27847354271851e-05,
38
+ "save_steps": 500,
39
+ "stateful_callbacks": {
40
+ "TrainerControl": {
41
+ "args": {
42
+ "should_epoch_stop": false,
43
+ "should_evaluate": false,
44
+ "should_log": false,
45
+ "should_save": true,
46
+ "should_training_stop": true
47
+ },
48
+ "attributes": {}
49
+ }
50
+ },
51
+ "total_flos": 0,
52
+ "train_batch_size": null,
53
+ "trial_name": null,
54
+ "trial_params": null
55
+ }