| { |
| "best_global_step": 100, |
| "best_metric": 0.8566527962684631, |
| "best_model_checkpoint": "/workspace/llm-storage/output/qwen-32B/checkpoint-100", |
| "epoch": 0.017302909051584298, |
| "eval_steps": 100, |
| "global_step": 100, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "entropy": 1.1931779585778712, |
| "epoch": 0.0017302909051584298, |
| "grad_norm": 0.4932125508785248, |
| "learning_rate": 4.999999763579933e-05, |
| "loss": 1.2397, |
| "mean_token_accuracy": 0.6963519304990768, |
| "num_tokens": 49591.0, |
| "step": 10 |
| }, |
| { |
| "entropy": 1.2187666453421115, |
| "epoch": 0.0034605818103168595, |
| "grad_norm": 0.5244883298873901, |
| "learning_rate": 4.9999971038546894e-05, |
| "loss": 1.0699, |
| "mean_token_accuracy": 0.7256920143961907, |
| "num_tokens": 93081.0, |
| "step": 20 |
| }, |
| { |
| "entropy": 1.0517689533531667, |
| "epoch": 0.005190872715475289, |
| "grad_norm": 0.49299755692481995, |
| "learning_rate": 4.999991488882273e-05, |
| "loss": 0.9953, |
| "mean_token_accuracy": 0.7465604640543461, |
| "num_tokens": 135994.0, |
| "step": 30 |
| }, |
| { |
| "entropy": 1.0336962267756462, |
| "epoch": 0.006921163620633719, |
| "grad_norm": 0.45133888721466064, |
| "learning_rate": 4.999982918669321e-05, |
| "loss": 0.9783, |
| "mean_token_accuracy": 0.7449970491230488, |
| "num_tokens": 180229.0, |
| "step": 40 |
| }, |
| { |
| "entropy": 0.9868150301277637, |
| "epoch": 0.008651454525792149, |
| "grad_norm": 0.5825394988059998, |
| "learning_rate": 4.999971393225964e-05, |
| "loss": 0.9399, |
| "mean_token_accuracy": 0.7513915061950683, |
| "num_tokens": 227765.0, |
| "step": 50 |
| }, |
| { |
| "entropy": 0.9615857690572739, |
| "epoch": 0.010381745430950579, |
| "grad_norm": 0.5444238185882568, |
| "learning_rate": 4.999956912565827e-05, |
| "loss": 0.9146, |
| "mean_token_accuracy": 0.755208445340395, |
| "num_tokens": 279011.0, |
| "step": 60 |
| }, |
| { |
| "entropy": 0.9492803812026978, |
| "epoch": 0.012112036336109008, |
| "grad_norm": 0.5472911596298218, |
| "learning_rate": 4.999939476706027e-05, |
| "loss": 0.8886, |
| "mean_token_accuracy": 0.7594497002661228, |
| "num_tokens": 325299.0, |
| "step": 70 |
| }, |
| { |
| "entropy": 0.9504749856889247, |
| "epoch": 0.013842327241267438, |
| "grad_norm": 0.6487861275672913, |
| "learning_rate": 4.999919085667175e-05, |
| "loss": 0.9086, |
| "mean_token_accuracy": 0.7555940046906471, |
| "num_tokens": 369302.0, |
| "step": 80 |
| }, |
| { |
| "entropy": 0.8765211813151836, |
| "epoch": 0.015572618146425868, |
| "grad_norm": 0.6562181711196899, |
| "learning_rate": 4.999895739473375e-05, |
| "loss": 0.8392, |
| "mean_token_accuracy": 0.7781092718243598, |
| "num_tokens": 416444.0, |
| "step": 90 |
| }, |
| { |
| "entropy": 0.8944034688174725, |
| "epoch": 0.017302909051584298, |
| "grad_norm": 0.704955518245697, |
| "learning_rate": 4.999869438152225e-05, |
| "loss": 0.8638, |
| "mean_token_accuracy": 0.7715348787605762, |
| "num_tokens": 465771.0, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.017302909051584298, |
| "eval_entropy": 0.878015579309018, |
| "eval_loss": 0.8566527962684631, |
| "eval_mean_token_accuracy": 0.7751626556307387, |
| "eval_num_tokens": 465771.0, |
| "eval_runtime": 758.416, |
| "eval_samples_per_second": 7.621, |
| "eval_steps_per_second": 7.621, |
| "step": 100 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 28900, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7838108021219328.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|