{ "training_config": { "dataset_name": "InverseIFEval", "split": "train", "print_samples": 3, "output_dir": "checkpoints/THUDM_GLM-4-9B-0414-InverseIFEval-DPO", "output_prefix": "ext", "preprocessing": { "truncate_length": 131072 }, "method": "dpo", "abductive": false, "gpt": { "api_key": "OPENAI_API_KEY", "model": "gpt-5", "temperature": 0.7, "max_tokens": 512 }, "model_name_or_path": "THUDM/GLM-4-9B-0414", "beta": 0.05, "max_length": 8192, "max_prompt_length": 2048, "gradient_checkpointing": true, "gradient_checkpointing_kwargs": { "use_reentrant": false }, "truncation_model": "keep_end", "per_device_train_batch_size": 1, "per_device_eval_batch_size": 1, "num_train_epochs": 10, "learning_rate": 5e-07, "weight_decay": 0.0, "gradient_accumulation_steps": 16, "max_grad_norm": 1.0, "lr_scheduler_type": "constant_with_warmup", "warmup_ratio": 0.1, "logging_steps": 1, "eval_steps": 50, "eval_delay": 0, "eval_accumulation_steps": 1, "save_strategy": "epoch", "save_total_limit": 10, "save_only_model": true, "seed": 42, "do_eval": false, "dataloader_num_workers": 0, "dataloader_pin_memory": false, "remove_unused_columns": false, "prediction_loss_only": false, "disable_tqdm": false, "log_level": "info", "optimizer": { "type": "adam", "lr": 5e-07, "weight_decay": 0.0, "betas": [ 0.9, 0.999 ], "eps": 1e-08 }, "data_file": "src/datasets/InverseIFEval_data/GLM-4-9B-0414/InverseIFEval_english_GLM-4-9B-0414_gpt-5_dpo_results.json" }, "training_completed_at": "2026-03-25T07:16:52.568816", "model_name": "THUDM/GLM-4-9B-0414", "dataset_name": "InverseIFEval", "method": "dpo", "abductive": false }