rl-grpo-sql-model / training_history.json
Ali Assi
Upload training_history.json with huggingface_hub
308e4ba verified
raw
history blame contribute delete
421 Bytes
[
{
"train_loss": -0.15511070310068362,
"train_policy_loss": 0.0,
"train_kl_loss": -3.102214025259018,
"train_reward": 0.30753333568573,
"baseline": 0.3100000023841858,
"epoch": 1
},
{
"train_loss": -0.24350257394835353,
"train_policy_loss": 0.0,
"train_kl_loss": -5.263461359739304,
"train_reward": 0.300500001758337,
"baseline": 0.3100000023841858,
"epoch": 2
}
]