michael-sigamani commited on
Commit
dd0bbb8
·
verified ·
1 Parent(s): 984dc20

Upload training/human at 2025-11-11 02:45:27 UTC

Browse files
training/human/assistant_policy_e30.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:575664268d268683a4741946b64b398bec26f3d3b6a7d96f967e46542b469140
3
+ size 3713793
training/human/assistant_policy_e40.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc632740aa5bf07e7574a668f8f170fa8a36e9ba1b1c0cb21fa66ba9e0fd9be1
3
+ size 3713793
training/human/train.jsonl CHANGED
@@ -55,3 +55,12 @@
55
  {"phase": "ppo_from_episodes", "epoch": 29, "loss": 179.0795440673828, "policy_loss": -0.0042334082536399364, "value_loss": 358.20416259765625, "entropy": 1.8311089277267456, "approx_kl": 0.0011348866391927004, "clip_frac": 0.0, "run": "human"}
56
  {"phase": "ppo_from_episodes", "epoch": 30, "loss": 178.70802307128906, "policy_loss": -0.0076076555997133255, "value_loss": 357.46783447265625, "entropy": 1.8292598724365234, "approx_kl": -0.0013783263275399804, "clip_frac": 0.014925372786819935, "run": "human"}
57
  {"phase": "ppo_from_episodes", "epoch": 31, "loss": 178.42886352539062, "policy_loss": 0.0013227426679804921, "value_loss": 356.891357421875, "entropy": 1.814236044883728, "approx_kl": 0.0519341379404068, "clip_frac": 0.014925372786819935, "run": "human"}
 
 
 
 
 
 
 
 
 
 
55
  {"phase": "ppo_from_episodes", "epoch": 29, "loss": 179.0795440673828, "policy_loss": -0.0042334082536399364, "value_loss": 358.20416259765625, "entropy": 1.8311089277267456, "approx_kl": 0.0011348866391927004, "clip_frac": 0.0, "run": "human"}
56
  {"phase": "ppo_from_episodes", "epoch": 30, "loss": 178.70802307128906, "policy_loss": -0.0076076555997133255, "value_loss": 357.46783447265625, "entropy": 1.8292598724365234, "approx_kl": -0.0013783263275399804, "clip_frac": 0.014925372786819935, "run": "human"}
57
  {"phase": "ppo_from_episodes", "epoch": 31, "loss": 178.42886352539062, "policy_loss": 0.0013227426679804921, "value_loss": 356.891357421875, "entropy": 1.814236044883728, "approx_kl": 0.0519341379404068, "clip_frac": 0.014925372786819935, "run": "human"}
58
+ {"phase": "ppo_from_episodes", "epoch": 32, "loss": 178.28369140625, "policy_loss": -0.004191377200186253, "value_loss": 356.61187744140625, "entropy": 1.805659532546997, "approx_kl": 0.017802903428673744, "clip_frac": 0.014925372786819935, "run": "human"}
59
+ {"phase": "ppo_from_episodes", "epoch": 33, "loss": 178.14686584472656, "policy_loss": -0.00977545790374279, "value_loss": 356.3490905761719, "entropy": 1.7899821996688843, "approx_kl": 0.13188686966896057, "clip_frac": 0.02985074557363987, "run": "human"}
60
+ {"phase": "ppo_from_episodes", "epoch": 34, "loss": 177.9296875, "policy_loss": -0.0067907413467764854, "value_loss": 355.9084777832031, "entropy": 1.776634931564331, "approx_kl": 0.07582738250494003, "clip_frac": 0.014925372786819935, "run": "human"}
61
+ {"phase": "ppo_from_episodes", "epoch": 35, "loss": 177.44515991210938, "policy_loss": -0.005201483611017466, "value_loss": 354.935791015625, "entropy": 1.753600835800171, "approx_kl": 0.007656221743673086, "clip_frac": 0.014925372786819935, "run": "human"}
62
+ {"phase": "ppo_from_episodes", "epoch": 36, "loss": 177.62261962890625, "policy_loss": -0.0038581774570047855, "value_loss": 355.28594970703125, "entropy": 1.6500777006149292, "approx_kl": 0.09904095530509949, "clip_frac": 0.0, "run": "human"}
63
+ {"phase": "ppo_from_episodes", "epoch": 37, "loss": 178.0361328125, "policy_loss": -0.005568492691963911, "value_loss": 356.114990234375, "entropy": 1.5791844129562378, "approx_kl": -0.0333586148917675, "clip_frac": 0.02985074557363987, "run": "human"}
64
+ {"phase": "ppo_from_episodes", "epoch": 38, "loss": 178.00653076171875, "policy_loss": -0.01611790806055069, "value_loss": 356.0747375488281, "entropy": 1.471631407737732, "approx_kl": 0.01781309023499489, "clip_frac": 0.02985074557363987, "run": "human"}
65
+ {"phase": "ppo_from_episodes", "epoch": 39, "loss": 178.4744110107422, "policy_loss": -0.006810346618294716, "value_loss": 356.9889831542969, "entropy": 1.3275822401046753, "approx_kl": -0.00926678255200386, "clip_frac": 0.014925372786819935, "run": "human"}
66
+ {"phase": "ppo_from_episodes", "epoch": 40, "loss": 178.48475646972656, "policy_loss": 0.01597599685192108, "value_loss": 356.9628601074219, "entropy": 1.2645403146743774, "approx_kl": -0.1880638748407364, "clip_frac": 0.7313432693481445, "run": "human"}