| W0817 22:49:50.668000 5653 torch/distributed/run.py:766] | |
| W0817 22:49:50.668000 5653 torch/distributed/run.py:766] ***************************************** | |
| W0817 22:49:50.668000 5653 torch/distributed/run.py:766] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. | |
| W0817 22:49:50.668000 5653 torch/distributed/run.py:766] ***************************************** | |
| Could not find the bitsandbytes CUDA binary at PosixPath('/usr/local/lib/python3.12/dist-packages/bitsandbytes/libbitsandbytes_cuda129.so') | |
| Could not find the bitsandbytes CUDA binary at PosixPath('/usr/local/lib/python3.12/dist-packages/bitsandbytes/libbitsandbytes_cuda129.so') | |
| The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. | |
| The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. | |
| Could not find the bitsandbytes CUDA binary at PosixPath('/usr/local/lib/python3.12/dist-packages/bitsandbytes/libbitsandbytes_cuda129.so') | |
| The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. | |
| Could not find the bitsandbytes CUDA binary at PosixPath('/usr/local/lib/python3.12/dist-packages/bitsandbytes/libbitsandbytes_cuda129.so') | |
| The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. | |
| Could not find the bitsandbytes CUDA binary at PosixPath('/usr/local/lib/python3.12/dist-packages/bitsandbytes/libbitsandbytes_cuda129.so') | |
| The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. | |
| Could not find the bitsandbytes CUDA binary at PosixPath('/usr/local/lib/python3.12/dist-packages/bitsandbytes/libbitsandbytes_cuda129.so') | |
| The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. | |
| Could not find the bitsandbytes CUDA binary at PosixPath('/usr/local/lib/python3.12/dist-packages/bitsandbytes/libbitsandbytes_cuda129.so') | |
| The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. | |
| Could not find the bitsandbytes CUDA binary at PosixPath('/usr/local/lib/python3.12/dist-packages/bitsandbytes/libbitsandbytes_cuda129.so') | |
| The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012103, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 327}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012103, "event_type": "POINT_IN_TIME", "key": "submission_benchmark", "value": "llama2_70b_lora", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012103, "event_type": "POINT_IN_TIME", "key": "submission_org", "value": "SUBMISSION_ORG_PLACEHOLDER", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012103, "event_type": "POINT_IN_TIME", "key": "submission_division", "value": "closed", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012103, "event_type": "POINT_IN_TIME", "key": "submission_status", "value": "onprem", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012103, "event_type": "POINT_IN_TIME", "key": "submission_platform", "value": "1xSUBMISSION_PLATFORM_PLACEHOLDER", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012105, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 327}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012105, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 327}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012106, "event_type": "POINT_IN_TIME", "key": "submission_benchmark", "value": "llama2_70b_lora", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012106, "event_type": "POINT_IN_TIME", "key": "submission_org", "value": "SUBMISSION_ORG_PLACEHOLDER", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012106, "event_type": "POINT_IN_TIME", "key": "submission_benchmark", "value": "llama2_70b_lora", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012106, "event_type": "POINT_IN_TIME", "key": "submission_division", "value": "closed", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012106, "event_type": "POINT_IN_TIME", "key": "submission_org", "value": "SUBMISSION_ORG_PLACEHOLDER", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012106, "event_type": "POINT_IN_TIME", "key": "submission_status", "value": "onprem", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012106, "event_type": "POINT_IN_TIME", "key": "submission_division", "value": "closed", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012106, "event_type": "POINT_IN_TIME", "key": "submission_platform", "value": "1xSUBMISSION_PLATFORM_PLACEHOLDER", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012106, "event_type": "POINT_IN_TIME", "key": "submission_status", "value": "onprem", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012106, "event_type": "POINT_IN_TIME", "key": "submission_platform", "value": "1xSUBMISSION_PLATFORM_PLACEHOLDER", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012107, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 327}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012108, "event_type": "POINT_IN_TIME", "key": "submission_benchmark", "value": "llama2_70b_lora", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012108, "event_type": "POINT_IN_TIME", "key": "submission_org", "value": "SUBMISSION_ORG_PLACEHOLDER", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012108, "event_type": "POINT_IN_TIME", "key": "submission_division", "value": "closed", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012108, "event_type": "POINT_IN_TIME", "key": "submission_status", "value": "onprem", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012108, "event_type": "POINT_IN_TIME", "key": "submission_platform", "value": "1xSUBMISSION_PLATFORM_PLACEHOLDER", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012109, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 327}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012109, "event_type": "POINT_IN_TIME", "key": "submission_benchmark", "value": "llama2_70b_lora", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012109, "event_type": "POINT_IN_TIME", "key": "submission_org", "value": "SUBMISSION_ORG_PLACEHOLDER", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012109, "event_type": "POINT_IN_TIME", "key": "submission_division", "value": "closed", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012110, "event_type": "POINT_IN_TIME", "key": "submission_status", "value": "onprem", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012110, "event_type": "POINT_IN_TIME", "key": "submission_platform", "value": "1xSUBMISSION_PLATFORM_PLACEHOLDER", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012112, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 327}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012112, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 327}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012112, "event_type": "POINT_IN_TIME", "key": "submission_benchmark", "value": "llama2_70b_lora", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012112, "event_type": "POINT_IN_TIME", "key": "submission_benchmark", "value": "llama2_70b_lora", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012112, "event_type": "POINT_IN_TIME", "key": "submission_org", "value": "SUBMISSION_ORG_PLACEHOLDER", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012112, "event_type": "INTERVAL_START", "key": "init_start", "value": null, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 327}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012112, "event_type": "POINT_IN_TIME", "key": "submission_org", "value": "SUBMISSION_ORG_PLACEHOLDER", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012112, "event_type": "POINT_IN_TIME", "key": "submission_division", "value": "closed", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012112, "event_type": "POINT_IN_TIME", "key": "submission_division", "value": "closed", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012112, "event_type": "POINT_IN_TIME", "key": "submission_status", "value": "onprem", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012112, "event_type": "POINT_IN_TIME", "key": "submission_benchmark", "value": "llama2_70b_lora", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012112, "event_type": "POINT_IN_TIME", "key": "submission_status", "value": "onprem", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012112, "event_type": "POINT_IN_TIME", "key": "submission_platform", "value": "1xSUBMISSION_PLATFORM_PLACEHOLDER", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012112, "event_type": "POINT_IN_TIME", "key": "submission_platform", "value": "1xSUBMISSION_PLATFORM_PLACEHOLDER", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012112, "event_type": "POINT_IN_TIME", "key": "submission_org", "value": "SUBMISSION_ORG_PLACEHOLDER", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012112, "event_type": "POINT_IN_TIME", "key": "submission_division", "value": "closed", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012112, "event_type": "POINT_IN_TIME", "key": "submission_status", "value": "onprem", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012112, "event_type": "POINT_IN_TIME", "key": "submission_platform", "value": "1xSUBMISSION_PLATFORM_PLACEHOLDER", "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012644, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.00055, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 162}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012644, "event_type": "POINT_IN_TIME", "key": "opt_adamw_weight_decay", "value": 0.0001, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 163}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012644, "event_type": "POINT_IN_TIME", "key": "opt_gradient_clip_norm", "value": 0.3, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 164}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012644, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_factor", "value": 0.0, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 189}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012645, "event_type": "POINT_IN_TIME", "key": "lora_rank", "value": 16, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 198}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012645, "event_type": "POINT_IN_TIME", "key": "lora_alpha", "value": 32, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 199}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012649, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_training_steps", "value": 800, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 350}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012666, "event_type": "POINT_IN_TIME", "key": "seed", "value": 1, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 393}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012684, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.00055, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 162}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012684, "event_type": "POINT_IN_TIME", "key": "opt_adamw_weight_decay", "value": 0.0001, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 163}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012684, "event_type": "POINT_IN_TIME", "key": "opt_gradient_clip_norm", "value": 0.3, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 164}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012685, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_factor", "value": 0.0, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 189}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012686, "event_type": "POINT_IN_TIME", "key": "lora_rank", "value": 16, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 198}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012686, "event_type": "POINT_IN_TIME", "key": "lora_alpha", "value": 32, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 199}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012689, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_training_steps", "value": 800, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 350}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012690, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.00055, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 162}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012690, "event_type": "POINT_IN_TIME", "key": "opt_adamw_weight_decay", "value": 0.0001, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 163}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012690, "event_type": "POINT_IN_TIME", "key": "opt_gradient_clip_norm", "value": 0.3, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 164}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012691, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_factor", "value": 0.0, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 189}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012692, "event_type": "POINT_IN_TIME", "key": "lora_rank", "value": 16, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 198}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012692, "event_type": "POINT_IN_TIME", "key": "lora_alpha", "value": 32, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 199}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012692, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.00055, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 162}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012692, "event_type": "POINT_IN_TIME", "key": "opt_adamw_weight_decay", "value": 0.0001, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 163}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012692, "event_type": "POINT_IN_TIME", "key": "opt_gradient_clip_norm", "value": 0.3, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 164}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012692, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.00055, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 162}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012692, "event_type": "POINT_IN_TIME", "key": "opt_adamw_weight_decay", "value": 0.0001, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 163}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012692, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_factor", "value": 0.0, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 189}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012693, "event_type": "POINT_IN_TIME", "key": "opt_gradient_clip_norm", "value": 0.3, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 164}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012693, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_factor", "value": 0.0, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 189}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012693, "event_type": "POINT_IN_TIME", "key": "lora_rank", "value": 16, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 198}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012694, "event_type": "POINT_IN_TIME", "key": "lora_alpha", "value": 32, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 199}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012694, "event_type": "POINT_IN_TIME", "key": "lora_rank", "value": 16, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 198}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012694, "event_type": "POINT_IN_TIME", "key": "lora_alpha", "value": 32, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 199}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012695, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_training_steps", "value": 800, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 350}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012697, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_training_steps", "value": 800, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 350}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012697, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_training_steps", "value": 800, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 350}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012699, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.00055, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 162}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012699, "event_type": "POINT_IN_TIME", "key": "opt_adamw_weight_decay", "value": 0.0001, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 163}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012699, "event_type": "POINT_IN_TIME", "key": "opt_gradient_clip_norm", "value": 0.3, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 164}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012700, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_factor", "value": 0.0, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 189}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012701, "event_type": "POINT_IN_TIME", "key": "lora_rank", "value": 16, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 198}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012701, "event_type": "POINT_IN_TIME", "key": "lora_alpha", "value": 32, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 199}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012703, "event_type": "POINT_IN_TIME", "key": "seed", "value": 1, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 393}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012704, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_training_steps", "value": 800, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 350}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012712, "event_type": "POINT_IN_TIME", "key": "seed", "value": 1, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 393}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012712, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.00055, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 162}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012712, "event_type": "POINT_IN_TIME", "key": "opt_adamw_weight_decay", "value": 0.0001, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 163}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012712, "event_type": "POINT_IN_TIME", "key": "opt_gradient_clip_norm", "value": 0.3, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 164}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012712, "event_type": "POINT_IN_TIME", "key": "seed", "value": 1, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 393}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012712, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_factor", "value": 0.0, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 189}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012713, "event_type": "POINT_IN_TIME", "key": "lora_rank", "value": 16, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 198}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012714, "event_type": "POINT_IN_TIME", "key": "lora_alpha", "value": 32, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 199}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012718, "event_type": "POINT_IN_TIME", "key": "seed", "value": 1, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 393}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012719, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_training_steps", "value": 800, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 350}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012726, "event_type": "POINT_IN_TIME", "key": "opt_base_learning_rate", "value": 0.00055, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 162}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012726, "event_type": "POINT_IN_TIME", "key": "opt_adamw_weight_decay", "value": 0.0001, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 163}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012726, "event_type": "POINT_IN_TIME", "key": "opt_gradient_clip_norm", "value": 0.3, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 164}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012727, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_warmup_factor", "value": 0.0, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 189}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012728, "event_type": "POINT_IN_TIME", "key": "lora_rank", "value": 16, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 198}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012728, "event_type": "POINT_IN_TIME", "key": "lora_alpha", "value": 32, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 199}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012731, "event_type": "POINT_IN_TIME", "key": "opt_learning_rate_training_steps", "value": 800, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 350}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012734, "event_type": "POINT_IN_TIME", "key": "seed", "value": 1, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 393}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471012745, "event_type": "POINT_IN_TIME", "key": "seed", "value": 1, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 393}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471013232, "event_type": "POINT_IN_TIME", "key": "global_batch_size", "value": 8, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471013270, "event_type": "POINT_IN_TIME", "key": "train_samples", "value": 3901, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471013270, "event_type": "POINT_IN_TIME", "key": "eval_samples", "value": 173, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471013271, "event_type": "POINT_IN_TIME", "key": "gradient_accumulation_steps", "value": 1, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 328}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471013272, "event_type": "POINT_IN_TIME", "key": "seed", "value": 1, "metadata": {"file": "/workspace/ft-llm/train.py", "lineno": 393}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471019434, "event_type": "POINT_IN_TIME", "key": "tracked_stats", "value": {"before_model_init": 6.738934484000083}, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 168, "step": 0}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471019889, "event_type": "POINT_IN_TIME", "key": "tracked_stats", "value": {"after_model_init": 0.4552682889998323}, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 168, "step": 0}} | |
| Loading distributed checkpoint with TensorStoreLoadShardedStrategy | |
| :::MLLOG {"namespace": "", "time_ms": 1755471283851, "event_type": "POINT_IN_TIME", "key": "tracked_stats", "value": {"warmup_time": 263.961781042}, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 168, "step": 0}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471283851, "event_type": "POINT_IN_TIME", "key": "tracked_stats", "value": {"init_finished": 0.00043348400004106225}, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 168, "step": 0}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471283852, "event_type": "INTERVAL_END", "key": "init_stop", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 83}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471283853, "event_type": "INTERVAL_START", "key": "run_start", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 83}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471283854, "event_type": "INTERVAL_START", "key": "block_start", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 199, "samples_count": 0}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471298842, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 2.231356143951416, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 80, "lr": 0.0005497879849661988}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471314118, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.523491382598877, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 160, "lr": 0.0005491522667766103}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471329437, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.3142896890640259, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 240, "lr": 0.0005480938256626048}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471344748, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.3286864757537842, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 320, "lr": 0.0005466142936636629}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471360056, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.3191031217575073, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 400, "lr": 0.0005447159521108884}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471375386, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.3337409496307373, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 480, "lr": 0.0005424017281093611}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471390691, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.3527849912643433, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 560, "lr": 0.000539675190024753}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471405987, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.3663201332092285, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 640, "lr": 0.0005365405419811673}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471421321, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.436583161354065, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 720, "lr": 0.0005330026173786832}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471436648, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.2180163860321045, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 800, "lr": 0.0005290668714406038}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471451982, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.3267669677734375, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 880, "lr": 0.0005247393728018974}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471467318, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.3912932872772217, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 960, "lr": 0.0005200267941518012}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471482641, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.320623517036438, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 1040, "lr": 0.0005149364019450193}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471497959, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.2857550382614136, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 1120, "lr": 0.0005094760451973754}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471513310, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.356960415840149, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 1200, "lr": 0.0005036541433832}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471528663, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.325950026512146, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 1280, "lr": 0.0004974796734531106}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471544018, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.3832132816314697, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 1360, "lr": 0.0004909621559922049}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471559347, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.2853686809539795, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 1440, "lr": 0.0004841116405400086}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471574682, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.3469711542129517, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 1520, "lr": 0.00047693869009481353}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471578084, "event_type": "POINT_IN_TIME", "key": "tracked_stats", "value": {"throughput": 5.220374784959472, "train_step_time": 1.5324570226354173, "max_memory_usage": 168.827}, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 225, "step": 0}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471578084, "event_type": "INTERVAL_END", "key": "block_stop", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 208, "samples_count": 0}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471578084, "event_type": "INTERVAL_START", "key": "eval_start", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 130, "samples_count": 1536}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471592941, "event_type": "POINT_IN_TIME", "key": "eval_accuracy", "value": 0.9386916849654534, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 303, "samples_count": 1536}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471592942, "event_type": "POINT_IN_TIME", "key": "tracked_stats", "value": {"validation_throughput": 11.84517688090803}, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 245, "step": 1536}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471592942, "event_type": "INTERVAL_END", "key": "eval_stop", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 153, "samples_count": 1536}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471592942, "event_type": "INTERVAL_START", "key": "block_start", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 199, "samples_count": 1536}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471605223, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.2639729976654053, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 1600, "lr": 0.0004694543648263006}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471620533, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.359060525894165, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 1680, "lr": 0.00046167020502155905}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471635863, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.3872255086898804, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 1760, "lr": 0.00045359821329080054}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471651197, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.2579522132873535, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 1840, "lr": 0.00044525083606020437}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471666501, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.2951648235321045, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 1920, "lr": 0.0004366409443804302}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471666520, "event_type": "POINT_IN_TIME", "key": "tracked_stats", "value": {"throughput": 5.218929434605367, "train_step_time": 1.5328814271666669, "max_memory_usage": 168.827}, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 225, "step": 1536}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471666520, "event_type": "INTERVAL_END", "key": "block_stop", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 208, "samples_count": 1536}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471666520, "event_type": "INTERVAL_START", "key": "eval_start", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 130, "samples_count": 1920}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471681322, "event_type": "POINT_IN_TIME", "key": "eval_accuracy", "value": 0.9341272784106304, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 303, "samples_count": 1920}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471681322, "event_type": "POINT_IN_TIME", "key": "tracked_stats", "value": {"validation_throughput": 11.889916819247087}, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 245, "step": 1920}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471681322, "event_type": "INTERVAL_END", "key": "eval_stop", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 153, "samples_count": 1920}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471681323, "event_type": "INTERVAL_START", "key": "block_start", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 199, "samples_count": 1920}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471696647, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.311585783958435, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 2000, "lr": 0.0004277818140803907}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471711976, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.3409117460250854, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 2080, "lr": 0.00041868710529688595}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471727298, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.3523324728012085, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 2160, "lr": 0.00040937084141166267}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471742649, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.2437388896942139, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 2240, "lr": 0.0003998473874283754}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471754935, "event_type": "POINT_IN_TIME", "key": "tracked_stats", "value": {"throughput": 5.21650721148526, "train_step_time": 1.5335932024375012, "max_memory_usage": 168.827}, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 225, "step": 1920}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471754935, "event_type": "INTERVAL_END", "key": "block_stop", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 208, "samples_count": 1920}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471754935, "event_type": "INTERVAL_START", "key": "eval_start", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 130, "samples_count": 2304}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471769543, "event_type": "POINT_IN_TIME", "key": "eval_accuracy", "value": 0.9300852869287392, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 303, "samples_count": 2304}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471769543, "event_type": "POINT_IN_TIME", "key": "tracked_stats", "value": {"validation_throughput": 12.04804375654437}, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 245, "step": 2304}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471769543, "event_type": "INTERVAL_END", "key": "eval_stop", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 153, "samples_count": 2304}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471769543, "event_type": "INTERVAL_START", "key": "block_start", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 199, "samples_count": 2304}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471772623, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.3073469400405884, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 2320, "lr": 0.00039013142782279276}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471787980, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.260759711265564, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 2400, "lr": 0.00038023794390039975}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471803328, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.2937121391296387, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 2480, "lr": 0.00037018219069631056}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471818672, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.3011728525161743, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 2560, "lr": 0.00035997967345311057}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471834026, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.3377065658569336, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 2640, "lr": 0.00034964612371289557}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471843261, "event_type": "POINT_IN_TIME", "key": "tracked_stats", "value": {"throughput": 5.2090510322980235, "train_step_time": 1.5357883711250035, "max_memory_usage": 168.827}, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 225, "step": 2304}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471843262, "event_type": "INTERVAL_END", "key": "block_stop", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 208, "samples_count": 2304}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471843262, "event_type": "INTERVAL_START", "key": "eval_start", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 130, "samples_count": 2688}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471857902, "event_type": "POINT_IN_TIME", "key": "eval_accuracy", "value": 0.9281238710260116, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 303, "samples_count": 2688}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471857903, "event_type": "POINT_IN_TIME", "key": "tracked_stats", "value": {"validation_throughput": 12.02044647758139}, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 245, "step": 2688}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471857903, "event_type": "INTERVAL_END", "key": "eval_stop", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 153, "samples_count": 2688}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471857903, "event_type": "INTERVAL_START", "key": "block_start", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 199, "samples_count": 2688}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471864050, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.3667250871658325, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 2720, "lr": 0.000339197475060374}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471879394, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.3398393392562866, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 2800, "lr": 0.00032864983855443534}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471894731, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.2728362083435059, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 2880, "lr": 0.0003180194778860635}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471910087, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.2875945568084717, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 2960, "lr": 0.0003073227843009054}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471925418, "event_type": "POINT_IN_TIME", "key": "train_loss", "value": 1.3862403631210327, "metadata": {"file": "/workspace/ft-llm/custom_callbacks.py", "lineno": 71, "samples_count": 3040, "lr": 0.0002965762513251574}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471931575, "event_type": "POINT_IN_TIME", "key": "tracked_stats", "value": {"throughput": 5.212281417349078, "train_step_time": 1.5348365445833376, "max_memory_usage": 168.827}, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 225, "step": 2688}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471931575, "event_type": "INTERVAL_END", "key": "block_stop", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 208, "samples_count": 2688}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471931575, "event_type": "INTERVAL_START", "key": "eval_start", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 130, "samples_count": 3072}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471946223, "event_type": "POINT_IN_TIME", "key": "eval_accuracy", "value": 0.9221087328960441, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 303, "samples_count": 3072}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471946223, "event_type": "POINT_IN_TIME", "key": "tracked_stats", "value": {"validation_throughput": 12.014723364925022}, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 245, "step": 3072}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471946224, "event_type": "INTERVAL_END", "key": "eval_stop", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 153, "samples_count": 3072}} | |
| :::MLLOG {"namespace": "", "time_ms": 1755471946230, "event_type": "INTERVAL_END", "key": "run_stop", "value": null, "metadata": {"file": "/usr/local/lib/python3.12/dist-packages/mlperf_common/callbacks/logging.py", "lineno": 106, "samples_count": 3072, "status": "success"}} |