{ "run_dir": "/kaggle/working/python-llm-grpo-new-techniques/training/output/phase2-codellama-7b-lora-kaggle-1h", "trainer_state_path": "/kaggle/working/python-llm-grpo-new-techniques/training/output/phase2-codellama-7b-lora-kaggle-1h/checkpoint-43/trainer_state.json", "metrics": { "global_step": 43, "best_metric": null, "last_train_loss": 0.7746126174926757, "best_eval_loss": null, "last_eval_loss": null, "last_learning_rate": 0.00015600000000000002, "last_step_logged": 40, "num_train_loss_logs": 4, "num_eval_loss_logs": 0 } }