diff --git a/wandb/run-20240804_140603-q9i5g6sv/files/config.yaml b/wandb/run-20240804_140603-q9i5g6sv/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cdcb914e88db333a468a3998ecaf5b9a9d981413 --- /dev/null +++ b/wandb/run-20240804_140603-q9i5g6sv/files/config.yaml @@ -0,0 +1,335 @@ +wandb_version: 1 + +sharding_strategy: + desc: null + value: FULL_SHARD +checkpoint_type: + desc: null + value: LOCAL_STATE_DICT +fsdp_activation_checkpointing: + desc: null + value: true +fsdp_cpu_offload: + desc: null + value: false +low_cpu_fsdp: + desc: null + value: false +no_meta_device: + desc: null + value: false +data_path: + desc: null + value: null +split: + desc: null + value: 969, 30, 1 +train_data_path: + desc: null + value: + - '4013541' + - /work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document +valid_data_path: + desc: null + value: + - '4013541' + - /work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document +test_data_path: + desc: null + value: + - '4013541' + - /work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document +data_cache_path: + desc: null + value: null +vocab_size: + desc: null + value: null +vocab_file: + desc: null + value: null +merge_file: + desc: null + value: null +seq_length: + desc: null + value: 512 +num_workers: + desc: null + value: 2 +tokenizer_type: + desc: null + value: Llama2Tokenizer +tokenizer_model: + desc: null + value: /share/pretrained_lm/meta-llama/TinyLlama_v1.1/tokenizer.model +reset_position_ids: + desc: null + value: false +reset_attention_mask: + desc: null + value: false +eod_mask_loss: + desc: null + value: false +retro_return_doc_ids: + desc: null + value: false +short_seq_prob: + desc: null + value: 0.1 +vocab_extra_ids: + desc: null + value: 0 +seed: + desc: null + value: 1234 +use_mpi: + desc: null + value: false +wandb_entity: + desc: null + value: iwakawa-koichi-q5-tohoku-nlp6723 +wandb_name: + desc: null + value: tiny-llama_train_2024-08-04-14:05:53 +wandb_project: + desc: null + value: llm_tutorial +quantization: + desc: null + value: false +use_freeze_layers: + desc: null + value: false +freeze_layers: + desc: null + value: null +bf16: + desc: null + value: true +fp16: + desc: null + value: false +mixed_precision: + desc: null + value: true +param_dtype: + desc: null + value: null +load: + desc: null + value: /work/llm_recipes/models/tiny-llama +save: + desc: null + value: /work/llm_recipes/models/tiny-llama +base_model: + desc: null + value: /share/pretrained_lm/meta-llama/TinyLlama_v1.1 +use_better_transformer: + desc: null + value: false +grad_clip_norm: + desc: null + value: 1.0 +eval_interval: + desc: null + value: 200 +save_interval: + desc: null + value: 200 +eval_iters: + desc: null + value: 10 +optimizer: + desc: null + value: adam +lr: + desc: null + value: 2.0e-05 +lr_decay_style: + desc: null + value: cosine +lr_decay_iters: + desc: null + value: 2000 +lr_warmup_iters: + desc: null + value: 500 +min_lr: + desc: null + value: 1.0e-06 +train_iters: + desc: null + value: 2000 +train_samples: + desc: null + value: null +global_batch_size: + desc: null + value: 320 +micro_batch_size: + desc: null + value: 8 +make_vocab_size_divisible_by: + desc: null + value: 128 +sliding_window_size: + desc: null + value: 4096 +skip_batch: + desc: null + value: null +no_save_optimizer_state: + desc: null + value: false +continual_pretraining: + desc: null + value: false +instruction_tuning: + desc: null + value: false +direct_preference_optimization: + desc: null + value: false +attention_dropout: + desc: null + value: 0.1 +hidden_dropout: + desc: null + value: 0.1 +weight_decay: + desc: null + value: 0.1 +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.95 +adam_eps: + desc: null + value: 1.0e-06 +hf_transformer_model_dir: + desc: null + value: null +instruction_train_data_path: + desc: null + value: null +instruction_valid_data_path: + desc: null + value: null +epoch: + desc: null + value: null +instruction_dataset_size: + desc: null + value: null +save_sampler_state: + desc: null + value: false +label_smoothing: + desc: null + value: 0.0 +save_n_checkpoints: + desc: null + value: 10 +hf_repo_id: + desc: null + value: koichi12/tiny-llama +create_public_hf_repo: + desc: null + value: false +upload_all_checkpoints_to_hf: + desc: null + value: false +hf_upload_retry_limit: + desc: null + value: 2 +exit_duration_in_mins: + desc: null + value: null +source_key: + desc: null + value: null +target_key: + desc: null + value: null +attn_implementation: + desc: null + value: flash_attention_2 +efficient_instruction_tuning: + desc: null + value: false +remove_padding_masking: + desc: null + value: false +save_start_iter: + desc: null + value: null +rank: + desc: null + value: 0 +world_size: + desc: null + value: 1 +padded_vocab_size: + desc: null + value: 32000 +gradient_accumulation_steps: + desc: null + value: 40 +_wandb: + desc: null + value: + python_version: 3.10.12 + cli_version: 0.16.3 + framework: huggingface + huggingface_version: 4.43.3 + is_jupyter_run: false + is_kaggle_kernel: false + start_time: 1722747963.684337 + t: + 1: + - 1 + - 11 + - 49 + - 55 + - 71 + 2: + - 1 + - 11 + - 49 + - 55 + - 71 + 3: + - 13 + - 16 + - 23 + 4: 3.10.12 + 5: 0.16.3 + 6: 4.43.3 + 8: + - 5 + 13: linux-x86_64 +activation_function: + desc: null + value: silu +hidden_size: + desc: null + value: 2048 +model_type: + desc: null + value: llama +max_position_embeddings: + desc: null + value: 2048 +num_attention_heads: + desc: null + value: 32 +num_hidden_layers: + desc: null + value: 22 +model_architecture: + desc: null + value: LlamaForCausalLM diff --git a/wandb/run-20240804_140603-q9i5g6sv/files/output.log b/wandb/run-20240804_140603-q9i5g6sv/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..de7a73fcd437d531b7ad5c429bf3f5ca4b0a44e3 --- /dev/null +++ b/wandb/run-20240804_140603-q9i5g6sv/files/output.log @@ -0,0 +1,130 @@ +Created Hugging Face repository with ID koichi12/tiny-llama. +Clearing GPU cache for all ranks +--> Running with torch torch_distributed debug set to detail +File not found: /work/llm_recipes/models/tiny-llama/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/tiny-llama/latest_iteration.txt +File not found: /work/llm_recipes/models/tiny-llama/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/tiny-llama/latest_iteration.txt +File not found: /work/llm_recipes/models/tiny-llama/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/tiny-llama/latest_iteration.txt +No checkpoint found in /work/llm_recipes/models/tiny-llama, skipping model loading +--> Model /share/pretrained_lm/meta-llama/TinyLlama_v1.1 +--> /share/pretrained_lm/meta-llama/TinyLlama_v1.1 has 1100.048384 Million params +You are attempting to use Flash Attention 2.0 without specifying a torch dtype. This might lead to unexpected behaviour +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +Flash Attention 2.0 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in LlamaForCausalLM is torch.float32. You should run training or inference using Automatic Mixed-Precision via the `with torch.autocast(device_type='torch_device'):` decorator, or load the model with the `torch_dtype` argument. Example: `model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="flash_attention_2", torch_dtype=torch.float16)` +Flash Attention 2.0 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in LlamaModel is torch.float32. You should run training or inference using Automatic Mixed-Precision via the `with torch.autocast(device_type='torch_device'):` decorator, or load the model with the `torch_dtype` argument. Example: `model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="flash_attention_2", torch_dtype=torch.float16)` +BFloat16 enabled for mixed precision - using bfSixteen policy +--> applying fsdp activation checkpointing... + > datasets target sizes (minimum size): + train: 640000 + validation: 35200 + test: 3200 +> building train, validation, and test datasets for GPT ... +> finished creating GPT datasets ... +File not found: /work/llm_recipes/models/tiny-llama/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/tiny-llama/latest_iteration.txt +No checkpoint found in /work/llm_recipes/models/tiny-llama, skipping optimizer loading +File not found: /work/llm_recipes/models/tiny-llama/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/tiny-llama/latest_iteration.txt +model info: FullyShardedDataParallel( + (_fsdp_wrapped_module): LlamaForCausalLM( + (model): LlamaModel( + (embed_tokens): Embedding(32000, 2048) + (layers): ModuleList( + (0-21): 22 x FullyShardedDataParallel( + (_fsdp_wrapped_module): CheckpointWrapper( + (_checkpoint_wrapped_module): LlamaDecoderLayer( + (self_attn): LlamaFlashAttention2( + (q_proj): Linear(in_features=2048, out_features=2048, bias=False) + (k_proj): Linear(in_features=2048, out_features=256, bias=False) + (v_proj): Linear(in_features=2048, out_features=256, bias=False) + (o_proj): Linear(in_features=2048, out_features=2048, bias=False) + (rotary_emb): LlamaRotaryEmbedding() + ) + (mlp): LlamaMLP( + (gate_proj): Linear(in_features=2048, out_features=5632, bias=False) + (up_proj): Linear(in_features=2048, out_features=5632, bias=False) + (down_proj): Linear(in_features=5632, out_features=2048, bias=False) + (act_fn): SiLU() + ) + (input_layernorm): LlamaRMSNorm() + (post_attention_layernorm): LlamaRMSNorm() + ) + ) + ) + ) + (norm): LlamaRMSNorm() + (rotary_emb): LlamaRotaryEmbedding() + ) + (lm_head): Linear(in_features=2048, out_features=32000, bias=False) + ) +) +model config: LlamaConfig { + "_name_or_path": "/share/pretrained_lm/meta-llama/TinyLlama_v1.1", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 5632, + "label_smoothing": 0.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 22, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.43.3", + "use_cache": false, + "vocab_size": 32000 +} +/usr/local/lib/python3.10/dist-packages/torch/distributed/fsdp/_init_utils.py:441: UserWarning: FSDP is switching to use `NO_SHARD` instead of ShardingStrategy.FULL_SHARD since the world size is 1. + warnings.warn( +Let split = None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Traceback (most recent call last): + File "/project/examples/finetuning.py", line 13, in + main() + File "/project/src/llama_recipes/finetuning.py", line 281, in main + train( + File "/project/src/llama_recipes/utils/train_utils.py", line 110, in train + loss: torch.Tensor = model(**batch).loss + File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl + return forward_call(*args, **kwargs) + File "/usr/local/lib/python3.10/dist-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 849, in forward + output = self._fsdp_wrapped_module(*args, **kwargs) + File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl + return forward_call(*args, **kwargs) + File "/project/lib/transformers/src/transformers/models/llama/modeling_llama.py", line 1141, in forward + outputs = self.model( + File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl + return forward_call(*args, **kwargs) + File "/project/lib/transformers/src/transformers/models/llama/modeling_llama.py", line 908, in forward + cache_position = torch.arange( +RuntimeError: CUDA error: device-side assert triggered +CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. +For debugging consider passing CUDA_LAUNCH_BLOCKING=1. +Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. \ No newline at end of file diff --git a/wandb/run-20240804_140603-q9i5g6sv/files/requirements.txt b/wandb/run-20240804_140603-q9i5g6sv/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7c90fd1c54d0a9881f6b4c6465b2a4fa88c9056c --- /dev/null +++ b/wandb/run-20240804_140603-q9i5g6sv/files/requirements.txt @@ -0,0 +1,271 @@ +absl-py==2.1.0 +accelerate==0.33.0 +aiohttp==3.9.1 +aiosignal==1.3.1 +annotated-types==0.6.0 +apex==0.1 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==23.1.0 +asttokens==2.4.1 +astunparse==1.6.3 +async-timeout==4.0.3 +attrs==23.2.0 +audioread==3.0.1 +beautifulsoup4==4.12.3 +bleach==6.1.0 +blis==0.7.11 +cachetools==5.3.2 +catalogue==2.0.10 +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +cloudpathlib==0.16.0 +cloudpickle==3.0.0 +cmake==3.28.1 +colorama==0.4.6 +comm==0.2.1 +confection==0.1.4 +contourpy==1.2.0 +cubinlinker==0.3.0+2.g405ac64 +cuda-python==12.3.0rc4+9.gdb8c48a.dirty +cudf==23.12.0 +cugraph-dgl==23.12.0 +cugraph-service-client==23.12.0 +cugraph-service-server==23.12.0 +cugraph==23.12.0 +cuml==23.12.0 +cupy-cuda12x==12.3.0 +cycler==0.12.1 +cymem==2.0.8 +cython==3.0.8 +dask-cuda==23.12.0 +dask-cudf==23.12.0 +dask==2023.11.0 +debugpy==1.8.1 +decorator==5.1.1 +defusedxml==0.7.1 +distributed==2023.11.0 +dm-tree==0.1.8 +docker-pycreds==0.4.0 +einops==0.7.0 +exceptiongroup==1.2.0 +execnet==2.0.2 +executing==2.0.1 +expecttest==0.1.3 +fastjsonschema==2.19.1 +fastrlock==0.8.2 +filelock==3.13.1 +flash-attn==2.4.2 +fonttools==4.48.1 +frozenlist==1.4.1 +fsspec==2023.12.2 +gast==0.5.4 +gitdb==4.0.11 +gitpython==3.1.43 +google-auth-oauthlib==0.4.6 +google-auth==2.27.0 +graphsurgeon==0.4.6 +grpcio==1.60.1 +huggingface-hub==0.24.5 +hypothesis==5.35.1 +idna==3.6 +importlib-metadata==7.0.1 +iniconfig==2.0.0 +intel-openmp==2021.4.0 +ipadic==1.0.0 +ipykernel==6.29.2 +ipython-genutils==0.2.0 +ipython==8.21.0 +jedi==0.19.1 +jinja2==3.1.3 +joblib==1.3.2 +json5==0.9.14 +jsonnet==0.19.1 +jsonschema-specifications==2023.12.1 +jsonschema==4.21.1 +jupyter-client==8.6.0 +jupyter-core==5.7.1 +jupyter-tensorboard==0.2.0 +jupyterlab-pygments==0.3.0 +jupyterlab-server==1.2.0 +jupyterlab==2.3.2 +jupytext==1.16.1 +kiwisolver==1.4.5 +langcodes==3.3.0 +lazy-loader==0.3 +librosa==0.10.1 +llvmlite==0.40.1 +locket==1.0.0 +logzero==1.7.0 +lxml==5.2.2 +markdown-it-py==3.0.0 +markdown==3.5.2 +markupsafe==2.1.4 +matplotlib-inline==0.1.6 +matplotlib==3.8.2 +mdit-py-plugins==0.4.0 +mdurl==0.1.2 +mecab-python3==1.0.6 +mistune==3.0.2 +mkl-devel==2021.1.1 +mkl-include==2021.1.1 +mkl==2021.1.1 +mock==5.1.0 +more-itertools==9.1.0 +mpmath==1.3.0 +msgpack==1.0.7 +multidict==6.0.4 +murmurhash==1.0.10 +nbclient==0.9.0 +nbconvert==7.16.0 +nbformat==5.9.2 +nest-asyncio==1.6.0 +networkx==2.6.3 +ninja==1.11.1.1 +nltk==3.8.1 +notebook==6.4.10 +numba==0.57.1+1.g1ff679645 +numpy==1.24.4 +nvfuser==0.1.4a0+d0bb811 +nvidia-dali-cuda120==1.34.0 +nvidia-pyindex==1.0.9 +nvtx==0.2.5 +oauthlib==3.2.2 +onnx==1.15.0rc2 +opencv==4.7.0 +optree==0.10.0 +packaging==23.2 +pandas==1.5.3 +pandocfilters==1.5.1 +parso==0.8.3 +partd==1.4.1 +peft==0.11.1 +pexpect==4.9.0 +pillow==10.2.0 +pip==24.0 +platformdirs==4.2.0 +pluggy==1.4.0 +ply==3.11 +polygraphy==0.49.4 +pooch==1.8.0 +portalocker==2.10.1 +preshed==3.0.9 +prettytable==3.9.0 +prometheus-client==0.19.0 +prompt-toolkit==3.0.43 +protobuf==4.24.4 +psutil==5.9.4 +ptxcompiler==0.8.1+2.g0d406d6 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==14.0.1.dev0+gba5374836.d20240125 +pyasn1-modules==0.3.0 +pyasn1==0.5.1 +pybind11-global==2.11.1 +pybind11==2.11.1 +pycocotools==2.0+nv0.8.0 +pycparser==2.21 +pydantic-core==2.16.2 +pydantic==2.6.1 +pygments==2.17.2 +pylibcugraph==23.12.0 +pylibcugraphops==23.12.0 +pylibraft==23.12.0 +pynvml==11.4.1 +pyparsing==3.1.1 +pytest-flakefinder==1.1.0 +pytest-rerunfailures==13.0 +pytest-shard==0.1.2 +pytest-xdist==3.5.0 +pytest==8.0.0 +python-dateutil==2.8.2 +python-dotenv==1.0.0 +python-hostlist==1.23.0 +pytorch-quantization==2.1.2 +pytz==2023.3.post1 +pyyaml==6.0.1 +pyzmq==25.1.2 +raft-dask==23.12.0 +rapids-dask-dependency==23.12.1 +referencing==0.33.0 +regex==2023.12.25 +requests-oauthlib==1.3.1 +requests==2.31.0 +rich==13.7.0 +rmm==23.12.0 +rpds-py==0.17.1 +rsa==4.9 +sacrebleu==2.4.0 +safetensors==0.4.3 +scikit-learn==1.2.0 +scipy==1.12.0 +send2trash==1.8.2 +sentencepiece==0.1.99 +sentry-sdk==2.12.0 +setproctitle==1.3.3 +setuptools==68.2.2 +six==1.16.0 +smart-open==6.4.0 +smmap==5.0.1 +sortedcontainers==2.4.0 +soundfile==0.12.1 +soupsieve==2.5 +soxr==0.3.7 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 +spacy==3.7.2 +sphinx-glpi-theme==0.6 +srsly==2.4.8 +stack-data==0.6.3 +sympy==1.12 +tabulate==0.9.0 +tbb==2021.11.0 +tblib==3.0.0 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.9.0 +tensorrt==8.6.3 +terminado==0.18.0 +termplotlib==0.3.9 +thinc==8.2.3 +threadpoolctl==3.2.0 +thriftpy2==0.4.17 +tinycss2==1.2.1 +tokenizers==0.19.1 +toml==0.10.2 +tomli==2.0.1 +toolz==0.12.1 +torch-tensorrt==2.3.0a0 +torch==2.3.0a0+ebedce2 +torchdata==0.7.1a0 +torchtext==0.17.0a0 +torchvision==0.18.0a0 +tornado==6.4 +tqdm==4.66.1 +traitlets==5.9.0 +transformer-engine==1.3.0+5b90b7f +transformers==4.43.3 +treelite-runtime==3.9.1 +treelite==3.9.1 +triton==2.2.0+e28a256 +typer==0.9.0 +types-dataclasses==0.6.6 +typing-extensions==4.9.0 +ucx-py==0.35.0 +uff==0.6.9 +ujson==5.8.0 +urllib3==1.26.18 +wandb==0.16.3 +wasabi==1.1.2 +wcwidth==0.2.13 +weasel==0.3.4 +webencodings==0.5.1 +werkzeug==3.0.1 +wheel==0.42.0 +xdoctest==1.0.2 +xgboost==1.7.6 +yarl==1.9.4 +zict==3.0.0 +zipp==3.17.0 \ No newline at end of file diff --git a/wandb/run-20240804_140603-q9i5g6sv/files/wandb-metadata.json b/wandb/run-20240804_140603-q9i5g6sv/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..7dca6ec78e548f9f1f3677bf5350c0ab04d59191 --- /dev/null +++ b/wandb/run-20240804_140603-q9i5g6sv/files/wandb-metadata.json @@ -0,0 +1,215 @@ +{ + "os": "Linux-5.15.0-91-generic-x86_64-with-glibc2.35", + "python": "3.10.12", + "heartbeatAt": "2024-08-04T05:06:04.333644", + "startedAt": "2024-08-04T05:06:03.671763", + "docker": null, + "cuda": null, + "args": [ + "--seq-length", + "512", + "--sliding-window-size", + "4096", + "--micro-batch-size", + "8", + "--global-batch-size", + "320", + "--train-iters", + "2000", + "--tokenizer-type", + "Llama2Tokenizer", + "--tokenizer-model", + "/share/pretrained_lm/meta-llama/TinyLlama_v1.1/tokenizer.model", + "--train-data-path", + "4013541", + "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document", + "--valid-data-path", + "4013541", + "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document", + "--test-data-path", + "4013541", + "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document", + "--lr", + "2e-5", + "--min-lr", + "1e-6", + "--lr-decay-style", + "cosine", + "--lr-warmup-iters", + "500", + "--lr-decay-iters", + "2000", + "--weight-decay", + "0.1", + "--grad-clip-norm", + "1.0", + "--optimizer", + "adam", + "--adam-beta1", + "0.9", + "--adam-beta2", + "0.95", + "--adam-eps", + "1e-6", + "--save-interval", + "200", + "--eval-interval", + "200", + "--eval-iters", + "10", + "--bf16", + "--mixed-precision", + "--base-model", + "/share/pretrained_lm/meta-llama/TinyLlama_v1.1", + "--save", + "/work/llm_recipes/models/tiny-llama", + "--load", + "/work/llm_recipes/models/tiny-llama", + "--fsdp-activation-checkpointing", + "--sharding-strategy", + "FULL_SHARD", + "--checkpoint-type", + "LOCAL_STATE_DICT", + "--save-n-checkpoints", + "10", + "--hf-upload-retry-limit", + "2", + "--hf-repo-id", + "koichi12/tiny-llama", + "--wandb-entity", + "iwakawa-koichi-q5-tohoku-nlp6723", + "--wandb-project", + "llm_tutorial", + "--wandb-name", + "tiny-llama_train_2024-08-04-14:05:53" + ], + "state": "running", + "program": "/project/examples/finetuning.py", + "codePathLocal": "examples/finetuning.py", + "codePath": "examples/finetuning.py", + "git": { + "remote": "https://github.com/cl-tohoku/llm-recipes-failab-m1-yans.git", + "commit": "3be5353210a678dc7008f237fa16b99f2bdf36ea" + }, + "email": null, + "root": "/project", + "host": "gpu-koiwa-00", + "username": "koiwa", + "executable": "/usr/bin/python", + "cpu_count": 18, + "cpu_count_logical": 18, + "cpu_freq": { + "current": 2400.0389999999993, + "min": 0.0, + "max": 0.0 + }, + "cpu_freq_per_core": [ + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + } + ], + "disk": { + "/": { + "total": 0.0625, + "used": 1.1444091796875e-05 + } + }, + "gpu": "NVIDIA A100-SXM4-40GB", + "gpu_count": 1, + "gpu_devices": [ + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + } + ], + "memory": { + "total": 56.48781967163086 + } +} diff --git a/wandb/run-20240804_140603-q9i5g6sv/files/wandb-summary.json b/wandb/run-20240804_140603-q9i5g6sv/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..b1d4cf96d64955a2ae8b982ce021e29fde546a1a --- /dev/null +++ b/wandb/run-20240804_140603-q9i5g6sv/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 4}} \ No newline at end of file diff --git a/wandb/run-20240804_140603-q9i5g6sv/logs/debug-internal.log b/wandb/run-20240804_140603-q9i5g6sv/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..577b35fa89a810491a285f46e325978385fb1792 --- /dev/null +++ b/wandb/run-20240804_140603-q9i5g6sv/logs/debug-internal.log @@ -0,0 +1,186 @@ +2024-08-04 14:06:03,686 INFO StreamThr :9469 [internal.py:wandb_internal():86] W&B internal server running at pid: 9469, started at: 2024-08-04 14:06:03.685029 +2024-08-04 14:06:03,687 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: status +2024-08-04 14:06:03,689 INFO WriterThread:9469 [datastore.py:open_for_write():87] open: /project/wandb/run-20240804_140603-q9i5g6sv/run-q9i5g6sv.wandb +2024-08-04 14:06:03,690 DEBUG SenderThread:9469 [sender.py:send():382] send: header +2024-08-04 14:06:03,703 DEBUG SenderThread:9469 [sender.py:send():382] send: run +2024-08-04 14:06:04,218 INFO SenderThread:9469 [dir_watcher.py:__init__():211] watching files in: /project/wandb/run-20240804_140603-q9i5g6sv/files +2024-08-04 14:06:04,218 INFO SenderThread:9469 [sender.py:_start_run_threads():1136] run started: q9i5g6sv with start time 1722747963.684337 +2024-08-04 14:06:04,223 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: check_version +2024-08-04 14:06:04,223 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: check_version +2024-08-04 14:06:04,313 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: run_start +2024-08-04 14:06:04,320 DEBUG HandlerThread:9469 [system_info.py:__init__():27] System info init +2024-08-04 14:06:04,320 DEBUG HandlerThread:9469 [system_info.py:__init__():42] System info init done +2024-08-04 14:06:04,320 INFO HandlerThread:9469 [system_monitor.py:start():194] Starting system monitor +2024-08-04 14:06:04,320 INFO SystemMonitor:9469 [system_monitor.py:_start():158] Starting system asset monitoring threads +2024-08-04 14:06:04,320 INFO HandlerThread:9469 [system_monitor.py:probe():214] Collecting system info +2024-08-04 14:06:04,321 INFO SystemMonitor:9469 [interfaces.py:start():190] Started cpu monitoring +2024-08-04 14:06:04,321 INFO SystemMonitor:9469 [interfaces.py:start():190] Started disk monitoring +2024-08-04 14:06:04,322 INFO SystemMonitor:9469 [interfaces.py:start():190] Started gpu monitoring +2024-08-04 14:06:04,322 INFO SystemMonitor:9469 [interfaces.py:start():190] Started memory monitoring +2024-08-04 14:06:04,323 INFO SystemMonitor:9469 [interfaces.py:start():190] Started network monitoring +2024-08-04 14:06:04,333 DEBUG HandlerThread:9469 [system_info.py:probe():151] Probing system +2024-08-04 14:06:04,335 DEBUG HandlerThread:9469 [system_info.py:_probe_git():136] Probing git +2024-08-04 14:06:04,347 DEBUG HandlerThread:9469 [system_info.py:_probe_git():144] Probing git done +2024-08-04 14:06:04,347 DEBUG HandlerThread:9469 [system_info.py:probe():199] Probing system done +2024-08-04 14:06:04,347 DEBUG HandlerThread:9469 [system_monitor.py:probe():223] {'os': 'Linux-5.15.0-91-generic-x86_64-with-glibc2.35', 'python': '3.10.12', 'heartbeatAt': '2024-08-04T05:06:04.333644', 'startedAt': '2024-08-04T05:06:03.671763', 'docker': None, 'cuda': None, 'args': ('--seq-length', '512', '--sliding-window-size', '4096', '--micro-batch-size', '8', '--global-batch-size', '320', '--train-iters', '2000', '--tokenizer-type', 'Llama2Tokenizer', '--tokenizer-model', '/share/pretrained_lm/meta-llama/TinyLlama_v1.1/tokenizer.model', '--train-data-path', '4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document', '--valid-data-path', '4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document', '--test-data-path', '4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document', '--lr', '2e-5', '--min-lr', '1e-6', '--lr-decay-style', 'cosine', '--lr-warmup-iters', '500', '--lr-decay-iters', '2000', '--weight-decay', '0.1', '--grad-clip-norm', '1.0', '--optimizer', 'adam', '--adam-beta1', '0.9', '--adam-beta2', '0.95', '--adam-eps', '1e-6', '--save-interval', '200', '--eval-interval', '200', '--eval-iters', '10', '--bf16', '--mixed-precision', '--base-model', '/share/pretrained_lm/meta-llama/TinyLlama_v1.1', '--save', '/work/llm_recipes/models/tiny-llama', '--load', '/work/llm_recipes/models/tiny-llama', '--fsdp-activation-checkpointing', '--sharding-strategy', 'FULL_SHARD', '--checkpoint-type', 'LOCAL_STATE_DICT', '--save-n-checkpoints', '10', '--hf-upload-retry-limit', '2', '--hf-repo-id', 'koichi12/tiny-llama', '--wandb-entity', 'iwakawa-koichi-q5-tohoku-nlp6723', '--wandb-project', 'llm_tutorial', '--wandb-name', 'tiny-llama_train_2024-08-04-14:05:53'), 'state': 'running', 'program': '/project/examples/finetuning.py', 'codePathLocal': 'examples/finetuning.py', 'codePath': 'examples/finetuning.py', 'git': {'remote': 'https://github.com/cl-tohoku/llm-recipes-failab-m1-yans.git', 'commit': '3be5353210a678dc7008f237fa16b99f2bdf36ea'}, 'email': None, 'root': '/project', 'host': 'gpu-koiwa-00', 'username': 'koiwa', 'executable': '/usr/bin/python', 'cpu_count': 18, 'cpu_count_logical': 18, 'cpu_freq': {'current': 2400.0389999999993, 'min': 0.0, 'max': 0.0}, 'cpu_freq_per_core': [{'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}], 'disk': {'/': {'total': 0.0625, 'used': 1.1444091796875e-05}}, 'gpu': 'NVIDIA A100-SXM4-40GB', 'gpu_count': 1, 'gpu_devices': [{'name': 'NVIDIA A100-SXM4-40GB', 'memory_total': 42949672960}], 'memory': {'total': 56.48781967163086}} +2024-08-04 14:06:04,347 INFO HandlerThread:9469 [system_monitor.py:probe():224] Finished collecting system info +2024-08-04 14:06:04,347 INFO HandlerThread:9469 [system_monitor.py:probe():227] Publishing system info +2024-08-04 14:06:04,349 INFO HandlerThread:9469 [system_monitor.py:probe():229] Finished publishing system info +2024-08-04 14:06:04,354 DEBUG SenderThread:9469 [sender.py:send():382] send: files +2024-08-04 14:06:04,354 INFO SenderThread:9469 [sender.py:_save_file():1403] saving file wandb-metadata.json with policy now +2024-08-04 14:06:04,364 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: python_packages +2024-08-04 14:06:04,364 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: stop_status +2024-08-04 14:06:04,364 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 14:06:04,364 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: python_packages +2024-08-04 14:06:04,366 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: stop_status +2024-08-04 14:06:04,605 DEBUG SenderThread:9469 [sender.py:send():382] send: telemetry +2024-08-04 14:06:04,996 INFO wandb-upload_0:9469 [upload_job.py:push():131] Uploaded file /tmp/tmpz1emajybwandb/prws540s-wandb-metadata.json +2024-08-04 14:06:05,220 INFO Thread-12 :9469 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240804_140603-q9i5g6sv/files/requirements.txt +2024-08-04 14:06:05,220 INFO Thread-12 :9469 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240804_140603-q9i5g6sv/files/wandb-metadata.json +2024-08-04 14:06:05,220 INFO Thread-12 :9469 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240804_140603-q9i5g6sv/files/output.log +2024-08-04 14:06:07,221 INFO Thread-12 :9469 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_140603-q9i5g6sv/files/output.log +2024-08-04 14:06:07,604 DEBUG SenderThread:9469 [sender.py:send():382] send: config +2024-08-04 14:06:07,605 DEBUG SenderThread:9469 [sender.py:send():382] send: config +2024-08-04 14:06:08,222 INFO Thread-12 :9469 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_140603-q9i5g6sv/files/output.log +2024-08-04 14:06:08,620 DEBUG SenderThread:9469 [sender.py:send():382] send: exit +2024-08-04 14:06:08,620 INFO SenderThread:9469 [sender.py:send_exit():589] handling exit code: 1 +2024-08-04 14:06:08,620 INFO SenderThread:9469 [sender.py:send_exit():591] handling runtime: 4 +2024-08-04 14:06:08,621 INFO SenderThread:9469 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-04 14:06:08,622 INFO SenderThread:9469 [sender.py:send_exit():597] send defer +2024-08-04 14:06:08,622 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:06:08,622 INFO HandlerThread:9469 [handler.py:handle_request_defer():172] handle defer: 0 +2024-08-04 14:06:08,622 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: defer +2024-08-04 14:06:08,622 INFO SenderThread:9469 [sender.py:send_request_defer():613] handle sender defer: 0 +2024-08-04 14:06:08,622 INFO SenderThread:9469 [sender.py:transition_state():617] send defer: 1 +2024-08-04 14:06:08,622 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:06:08,622 INFO HandlerThread:9469 [handler.py:handle_request_defer():172] handle defer: 1 +2024-08-04 14:06:08,622 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: defer +2024-08-04 14:06:08,622 INFO SenderThread:9469 [sender.py:send_request_defer():613] handle sender defer: 1 +2024-08-04 14:06:08,623 INFO SenderThread:9469 [sender.py:transition_state():617] send defer: 2 +2024-08-04 14:06:08,623 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:06:08,623 INFO HandlerThread:9469 [handler.py:handle_request_defer():172] handle defer: 2 +2024-08-04 14:06:08,623 INFO HandlerThread:9469 [system_monitor.py:finish():203] Stopping system monitor +2024-08-04 14:06:08,623 DEBUG SystemMonitor:9469 [system_monitor.py:_start():172] Starting system metrics aggregation loop +2024-08-04 14:06:08,623 INFO HandlerThread:9469 [interfaces.py:finish():202] Joined cpu monitor +2024-08-04 14:06:08,623 DEBUG SystemMonitor:9469 [system_monitor.py:_start():179] Finished system metrics aggregation loop +2024-08-04 14:06:08,623 INFO HandlerThread:9469 [interfaces.py:finish():202] Joined disk monitor +2024-08-04 14:06:08,624 DEBUG SystemMonitor:9469 [system_monitor.py:_start():183] Publishing last batch of metrics +2024-08-04 14:06:08,656 INFO HandlerThread:9469 [interfaces.py:finish():202] Joined gpu monitor +2024-08-04 14:06:08,656 INFO HandlerThread:9469 [interfaces.py:finish():202] Joined memory monitor +2024-08-04 14:06:08,656 INFO HandlerThread:9469 [interfaces.py:finish():202] Joined network monitor +2024-08-04 14:06:08,657 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: defer +2024-08-04 14:06:08,657 INFO SenderThread:9469 [sender.py:send_request_defer():613] handle sender defer: 2 +2024-08-04 14:06:08,657 INFO SenderThread:9469 [sender.py:transition_state():617] send defer: 3 +2024-08-04 14:06:08,657 DEBUG SenderThread:9469 [sender.py:send():382] send: stats +2024-08-04 14:06:08,657 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:06:08,657 INFO HandlerThread:9469 [handler.py:handle_request_defer():172] handle defer: 3 +2024-08-04 14:06:08,657 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: defer +2024-08-04 14:06:08,657 INFO SenderThread:9469 [sender.py:send_request_defer():613] handle sender defer: 3 +2024-08-04 14:06:08,657 INFO SenderThread:9469 [sender.py:transition_state():617] send defer: 4 +2024-08-04 14:06:08,657 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:06:08,657 INFO HandlerThread:9469 [handler.py:handle_request_defer():172] handle defer: 4 +2024-08-04 14:06:08,658 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: defer +2024-08-04 14:06:08,658 INFO SenderThread:9469 [sender.py:send_request_defer():613] handle sender defer: 4 +2024-08-04 14:06:08,658 INFO SenderThread:9469 [sender.py:transition_state():617] send defer: 5 +2024-08-04 14:06:08,658 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:06:08,658 INFO HandlerThread:9469 [handler.py:handle_request_defer():172] handle defer: 5 +2024-08-04 14:06:08,658 DEBUG SenderThread:9469 [sender.py:send():382] send: summary +2024-08-04 14:06:08,659 INFO SenderThread:9469 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-04 14:06:08,659 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: defer +2024-08-04 14:06:08,659 INFO SenderThread:9469 [sender.py:send_request_defer():613] handle sender defer: 5 +2024-08-04 14:06:08,659 INFO SenderThread:9469 [sender.py:transition_state():617] send defer: 6 +2024-08-04 14:06:08,659 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:06:08,659 INFO HandlerThread:9469 [handler.py:handle_request_defer():172] handle defer: 6 +2024-08-04 14:06:08,659 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: defer +2024-08-04 14:06:08,660 INFO SenderThread:9469 [sender.py:send_request_defer():613] handle sender defer: 6 +2024-08-04 14:06:08,662 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 14:06:08,848 INFO SenderThread:9469 [sender.py:transition_state():617] send defer: 7 +2024-08-04 14:06:08,849 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:06:08,849 INFO HandlerThread:9469 [handler.py:handle_request_defer():172] handle defer: 7 +2024-08-04 14:06:08,849 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: defer +2024-08-04 14:06:08,849 INFO SenderThread:9469 [sender.py:send_request_defer():613] handle sender defer: 7 +2024-08-04 14:06:09,223 INFO Thread-12 :9469 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_140603-q9i5g6sv/files/config.yaml +2024-08-04 14:06:09,223 INFO Thread-12 :9469 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_140603-q9i5g6sv/files/output.log +2024-08-04 14:06:09,223 INFO Thread-12 :9469 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240804_140603-q9i5g6sv/files/wandb-summary.json +2024-08-04 14:06:09,360 INFO SenderThread:9469 [sender.py:transition_state():617] send defer: 8 +2024-08-04 14:06:09,361 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:06:09,361 INFO HandlerThread:9469 [handler.py:handle_request_defer():172] handle defer: 8 +2024-08-04 14:06:09,361 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: defer +2024-08-04 14:06:09,361 INFO SenderThread:9469 [sender.py:send_request_defer():613] handle sender defer: 8 +2024-08-04 14:06:09,361 INFO SenderThread:9469 [job_builder.py:build():296] Attempting to build job artifact +2024-08-04 14:06:09,362 INFO SenderThread:9469 [job_builder.py:_get_source_type():426] is repo sourced job +2024-08-04 14:06:09,376 INFO SenderThread:9469 [job_builder.py:build():402] adding wandb-job metadata file +2024-08-04 14:06:09,384 INFO SenderThread:9469 [sender.py:transition_state():617] send defer: 9 +2024-08-04 14:06:09,384 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:06:09,384 DEBUG SenderThread:9469 [sender.py:send():382] send: artifact +2024-08-04 14:06:09,384 INFO HandlerThread:9469 [handler.py:handle_request_defer():172] handle defer: 9 +2024-08-04 14:06:09,620 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 14:06:10,224 INFO Thread-12 :9469 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_140603-q9i5g6sv/files/output.log +2024-08-04 14:06:10,240 INFO SenderThread:9469 [sender.py:send_artifact():1494] sent artifact job-https___github.com_cl-tohoku_llm-recipes-failab-m1-yans.git_examples_finetuning.py - {'id': 'QXJ0aWZhY3Q6MTA5MTk2NTkzOA==', 'state': 'COMMITTED', 'artifactSequence': {'id': 'QXJ0aWZhY3RDb2xsZWN0aW9uOjM2MjY3MjMzNA==', 'latestArtifact': {'id': 'QXJ0aWZhY3Q6MTA5MzUzODM4NQ==', 'versionIndex': 3}}} +2024-08-04 14:06:10,240 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: defer +2024-08-04 14:06:10,240 INFO SenderThread:9469 [sender.py:send_request_defer():613] handle sender defer: 9 +2024-08-04 14:06:10,240 INFO SenderThread:9469 [dir_watcher.py:finish():358] shutting down directory watcher +2024-08-04 14:06:11,225 INFO SenderThread:9469 [dir_watcher.py:finish():388] scan: /project/wandb/run-20240804_140603-q9i5g6sv/files +2024-08-04 14:06:11,225 INFO SenderThread:9469 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_140603-q9i5g6sv/files/requirements.txt requirements.txt +2024-08-04 14:06:11,225 INFO SenderThread:9469 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_140603-q9i5g6sv/files/config.yaml config.yaml +2024-08-04 14:06:11,227 INFO SenderThread:9469 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_140603-q9i5g6sv/files/wandb-metadata.json wandb-metadata.json +2024-08-04 14:06:11,227 INFO SenderThread:9469 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_140603-q9i5g6sv/files/wandb-summary.json wandb-summary.json +2024-08-04 14:06:11,228 INFO SenderThread:9469 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_140603-q9i5g6sv/files/output.log output.log +2024-08-04 14:06:11,230 INFO SenderThread:9469 [sender.py:transition_state():617] send defer: 10 +2024-08-04 14:06:11,230 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 14:06:11,230 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:06:11,232 INFO HandlerThread:9469 [handler.py:handle_request_defer():172] handle defer: 10 +2024-08-04 14:06:11,232 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: defer +2024-08-04 14:06:11,232 INFO SenderThread:9469 [sender.py:send_request_defer():613] handle sender defer: 10 +2024-08-04 14:06:11,232 INFO SenderThread:9469 [file_pusher.py:finish():172] shutting down file pusher +2024-08-04 14:06:11,620 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 14:06:11,621 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 14:06:11,713 INFO wandb-upload_0:9469 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240804_140603-q9i5g6sv/files/requirements.txt +2024-08-04 14:06:11,733 INFO wandb-upload_1:9469 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240804_140603-q9i5g6sv/files/config.yaml +2024-08-04 14:06:11,829 INFO wandb-upload_2:9469 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240804_140603-q9i5g6sv/files/wandb-summary.json +2024-08-04 14:06:11,833 INFO wandb-upload_3:9469 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240804_140603-q9i5g6sv/files/output.log +2024-08-04 14:06:12,033 INFO Thread-11 (_thread_body):9469 [sender.py:transition_state():617] send defer: 11 +2024-08-04 14:06:12,034 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:06:12,034 INFO HandlerThread:9469 [handler.py:handle_request_defer():172] handle defer: 11 +2024-08-04 14:06:12,034 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: defer +2024-08-04 14:06:12,034 INFO SenderThread:9469 [sender.py:send_request_defer():613] handle sender defer: 11 +2024-08-04 14:06:12,034 INFO SenderThread:9469 [file_pusher.py:join():178] waiting for file pusher +2024-08-04 14:06:12,034 INFO SenderThread:9469 [sender.py:transition_state():617] send defer: 12 +2024-08-04 14:06:12,034 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:06:12,034 INFO HandlerThread:9469 [handler.py:handle_request_defer():172] handle defer: 12 +2024-08-04 14:06:12,035 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: defer +2024-08-04 14:06:12,035 INFO SenderThread:9469 [sender.py:send_request_defer():613] handle sender defer: 12 +2024-08-04 14:06:12,035 INFO SenderThread:9469 [file_stream.py:finish():595] file stream finish called +2024-08-04 14:06:12,204 INFO SenderThread:9469 [file_stream.py:finish():599] file stream finish is done +2024-08-04 14:06:12,204 INFO SenderThread:9469 [sender.py:transition_state():617] send defer: 13 +2024-08-04 14:06:12,205 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:06:12,205 INFO HandlerThread:9469 [handler.py:handle_request_defer():172] handle defer: 13 +2024-08-04 14:06:12,205 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: defer +2024-08-04 14:06:12,205 INFO SenderThread:9469 [sender.py:send_request_defer():613] handle sender defer: 13 +2024-08-04 14:06:12,205 INFO SenderThread:9469 [sender.py:transition_state():617] send defer: 14 +2024-08-04 14:06:12,205 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:06:12,205 DEBUG SenderThread:9469 [sender.py:send():382] send: final +2024-08-04 14:06:12,205 INFO HandlerThread:9469 [handler.py:handle_request_defer():172] handle defer: 14 +2024-08-04 14:06:12,205 DEBUG SenderThread:9469 [sender.py:send():382] send: footer +2024-08-04 14:06:12,206 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: defer +2024-08-04 14:06:12,206 INFO SenderThread:9469 [sender.py:send_request_defer():613] handle sender defer: 14 +2024-08-04 14:06:12,206 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 14:06:12,206 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 14:06:12,206 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 14:06:12,207 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 14:06:12,207 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: server_info +2024-08-04 14:06:12,207 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: get_summary +2024-08-04 14:06:12,207 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: server_info +2024-08-04 14:06:12,208 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: sampled_history +2024-08-04 14:06:12,209 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 14:06:12,209 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: job_info +2024-08-04 14:06:12,360 DEBUG SenderThread:9469 [sender.py:send_request():409] send_request: job_info +2024-08-04 14:06:12,360 INFO MainThread:9469 [wandb_run.py:_footer_history_summary_info():3866] rendering history +2024-08-04 14:06:12,360 INFO MainThread:9469 [wandb_run.py:_footer_history_summary_info():3898] rendering summary +2024-08-04 14:06:12,360 INFO MainThread:9469 [wandb_run.py:_footer_sync_info():3825] logging synced files +2024-08-04 14:06:12,360 DEBUG HandlerThread:9469 [handler.py:handle_request():146] handle_request: shutdown +2024-08-04 14:06:12,361 INFO HandlerThread:9469 [handler.py:finish():869] shutting down handler +2024-08-04 14:06:13,210 INFO WriterThread:9469 [datastore.py:close():296] close: /project/wandb/run-20240804_140603-q9i5g6sv/run-q9i5g6sv.wandb +2024-08-04 14:06:13,360 INFO SenderThread:9469 [sender.py:finish():1572] shutting down sender +2024-08-04 14:06:13,360 INFO SenderThread:9469 [file_pusher.py:finish():172] shutting down file pusher +2024-08-04 14:06:13,360 INFO SenderThread:9469 [file_pusher.py:join():178] waiting for file pusher diff --git a/wandb/run-20240804_140603-q9i5g6sv/logs/debug.log b/wandb/run-20240804_140603-q9i5g6sv/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..15347e1e314b6a1175df113dd48f52d185196a04 --- /dev/null +++ b/wandb/run-20240804_140603-q9i5g6sv/logs/debug.log @@ -0,0 +1,30 @@ +2024-08-04 14:06:03,677 INFO MainThread:9398 [wandb_setup.py:_flush():76] Current SDK version is 0.16.3 +2024-08-04 14:06:03,678 INFO MainThread:9398 [wandb_setup.py:_flush():76] Configure stats pid to 9398 +2024-08-04 14:06:03,678 INFO MainThread:9398 [wandb_setup.py:_flush():76] Loading settings from /singularity_home/.config/wandb/settings +2024-08-04 14:06:03,678 INFO MainThread:9398 [wandb_setup.py:_flush():76] Loading settings from /project/wandb/settings +2024-08-04 14:06:03,678 INFO MainThread:9398 [wandb_setup.py:_flush():76] Loading settings from environment variables: {'api_key': '***REDACTED***', 'run_notes': 'Train tiny llama sample'} +2024-08-04 14:06:03,678 INFO MainThread:9398 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False} +2024-08-04 14:06:03,678 INFO MainThread:9398 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'examples/finetuning.py', 'program_abspath': '/project/examples/finetuning.py', 'program': '/project/examples/finetuning.py'} +2024-08-04 14:06:03,678 INFO MainThread:9398 [wandb_init.py:_log_setup():526] Logging user logs to /project/wandb/run-20240804_140603-q9i5g6sv/logs/debug.log +2024-08-04 14:06:03,678 INFO MainThread:9398 [wandb_init.py:_log_setup():527] Logging internal logs to /project/wandb/run-20240804_140603-q9i5g6sv/logs/debug-internal.log +2024-08-04 14:06:03,678 INFO MainThread:9398 [wandb_init.py:init():566] calling init triggers +2024-08-04 14:06:03,678 INFO MainThread:9398 [wandb_init.py:init():573] wandb.init called with sweep_config: {} +config: {'sharding_strategy': 'FULL_SHARD', 'checkpoint_type': 'LOCAL_STATE_DICT', 'fsdp_activation_checkpointing': True, 'fsdp_cpu_offload': False, 'low_cpu_fsdp': False, 'no_meta_device': False, 'data_path': None, 'split': '969, 30, 1', 'train_data_path': ['4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document'], 'valid_data_path': ['4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document'], 'test_data_path': ['4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document'], 'data_cache_path': None, 'vocab_size': None, 'vocab_file': None, 'merge_file': None, 'seq_length': 512, 'num_workers': 2, 'tokenizer_type': 'Llama2Tokenizer', 'tokenizer_model': '/share/pretrained_lm/meta-llama/TinyLlama_v1.1/tokenizer.model', 'reset_position_ids': False, 'reset_attention_mask': False, 'eod_mask_loss': False, 'retro_return_doc_ids': False, 'short_seq_prob': 0.1, 'vocab_extra_ids': 0, 'seed': 1234, 'use_mpi': False, 'wandb_entity': 'iwakawa-koichi-q5-tohoku-nlp6723', 'wandb_name': 'tiny-llama_train_2024-08-04-14:05:53', 'wandb_project': 'llm_tutorial', 'quantization': False, 'use_freeze_layers': False, 'freeze_layers': None, 'bf16': True, 'fp16': False, 'mixed_precision': True, 'param_dtype': None, 'load': '/work/llm_recipes/models/tiny-llama', 'save': '/work/llm_recipes/models/tiny-llama', 'base_model': '/share/pretrained_lm/meta-llama/TinyLlama_v1.1', 'use_better_transformer': False, 'grad_clip_norm': 1.0, 'eval_interval': 200, 'save_interval': 200, 'eval_iters': 10, 'optimizer': 'adam', 'lr': 2e-05, 'lr_decay_style': 'cosine', 'lr_decay_iters': 2000, 'lr_warmup_iters': 500, 'min_lr': 1e-06, 'train_iters': 2000, 'train_samples': None, 'global_batch_size': 320, 'micro_batch_size': 8, 'make_vocab_size_divisible_by': 128, 'sliding_window_size': 4096, 'skip_batch': None, 'no_save_optimizer_state': False, 'continual_pretraining': False, 'instruction_tuning': False, 'direct_preference_optimization': False, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'weight_decay': 0.1, 'adam_beta1': 0.9, 'adam_beta2': 0.95, 'adam_eps': 1e-06, 'hf_transformer_model_dir': None, 'instruction_train_data_path': None, 'instruction_valid_data_path': None, 'epoch': None, 'instruction_dataset_size': None, 'save_sampler_state': False, 'label_smoothing': 0.0, 'save_n_checkpoints': 10, 'hf_repo_id': 'koichi12/tiny-llama', 'create_public_hf_repo': False, 'upload_all_checkpoints_to_hf': False, 'hf_upload_retry_limit': 2, 'exit_duration_in_mins': None, 'source_key': None, 'target_key': None, 'attn_implementation': 'flash_attention_2', 'efficient_instruction_tuning': False, 'remove_padding_masking': False, 'save_start_iter': None, 'rank': 0, 'world_size': 1, 'padded_vocab_size': 32000, 'gradient_accumulation_steps': 40} +2024-08-04 14:06:03,678 INFO MainThread:9398 [wandb_init.py:init():616] starting backend +2024-08-04 14:06:03,678 INFO MainThread:9398 [wandb_init.py:init():620] setting up manager +2024-08-04 14:06:03,683 INFO MainThread:9398 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-08-04 14:06:03,684 INFO MainThread:9398 [wandb_init.py:init():628] backend started and connected +2024-08-04 14:06:03,689 INFO MainThread:9398 [wandb_init.py:init():720] updated telemetry +2024-08-04 14:06:03,699 INFO MainThread:9398 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout +2024-08-04 14:06:04,223 INFO MainThread:9398 [wandb_run.py:_on_init():2262] communicating current version +2024-08-04 14:06:04,307 INFO MainThread:9398 [wandb_run.py:_on_init():2271] got version response upgrade_message: "wandb version 0.17.5 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2024-08-04 14:06:04,307 INFO MainThread:9398 [wandb_init.py:init():804] starting run threads in backend +2024-08-04 14:06:04,363 INFO MainThread:9398 [wandb_run.py:_console_start():2241] atexit reg +2024-08-04 14:06:04,363 INFO MainThread:9398 [wandb_run.py:_redirect():2096] redirect: wrap_raw +2024-08-04 14:06:04,363 INFO MainThread:9398 [wandb_run.py:_redirect():2161] Wrapping output streams. +2024-08-04 14:06:04,363 INFO MainThread:9398 [wandb_run.py:_redirect():2186] Redirects installed. +2024-08-04 14:06:04,364 INFO MainThread:9398 [wandb_init.py:init():847] run started, returning control to user process +2024-08-04 14:06:07,603 INFO MainThread:9398 [wandb_run.py:_config_callback():1343] config_cb None None {'activation_function': 'silu', 'hidden_size': 2048, 'model_type': 'llama', 'max_position_embeddings': 2048, 'num_attention_heads': 32, 'num_hidden_layers': 22, 'model_architecture': 'LlamaForCausalLM'} +2024-08-04 14:06:07,604 INFO MainThread:9398 [wandb_run.py:_config_callback():1343] config_cb None None {'world_size': 1} +2024-08-04 14:06:13,361 WARNING MsgRouterThr:9398 [router.py:message_loop():77] message_loop has been closed diff --git a/wandb/run-20240804_140603-q9i5g6sv/run-q9i5g6sv.wandb b/wandb/run-20240804_140603-q9i5g6sv/run-q9i5g6sv.wandb new file mode 100644 index 0000000000000000000000000000000000000000..3ec775a57228adb91094ca4f2b31432d3cd92163 Binary files /dev/null and b/wandb/run-20240804_140603-q9i5g6sv/run-q9i5g6sv.wandb differ diff --git a/wandb/run-20240804_142250-6p58tz1g/files/config.yaml b/wandb/run-20240804_142250-6p58tz1g/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cf98af8ef70360326bebb0ca7eca296de8f0ccac --- /dev/null +++ b/wandb/run-20240804_142250-6p58tz1g/files/config.yaml @@ -0,0 +1,335 @@ +wandb_version: 1 + +sharding_strategy: + desc: null + value: FULL_SHARD +checkpoint_type: + desc: null + value: LOCAL_STATE_DICT +fsdp_activation_checkpointing: + desc: null + value: true +fsdp_cpu_offload: + desc: null + value: false +low_cpu_fsdp: + desc: null + value: false +no_meta_device: + desc: null + value: false +data_path: + desc: null + value: null +split: + desc: null + value: 969, 30, 1 +train_data_path: + desc: null + value: + - '4013541' + - /work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document +valid_data_path: + desc: null + value: + - '4013541' + - /work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document +test_data_path: + desc: null + value: + - '4013541' + - /work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document +data_cache_path: + desc: null + value: null +vocab_size: + desc: null + value: null +vocab_file: + desc: null + value: null +merge_file: + desc: null + value: null +seq_length: + desc: null + value: 512 +num_workers: + desc: null + value: 2 +tokenizer_type: + desc: null + value: Llama2Tokenizer +tokenizer_model: + desc: null + value: /share/pretrained_lm/meta-llama/TinyLlama_v1.1/tokenizer.model +reset_position_ids: + desc: null + value: false +reset_attention_mask: + desc: null + value: false +eod_mask_loss: + desc: null + value: false +retro_return_doc_ids: + desc: null + value: false +short_seq_prob: + desc: null + value: 0.1 +vocab_extra_ids: + desc: null + value: 0 +seed: + desc: null + value: 1234 +use_mpi: + desc: null + value: false +wandb_entity: + desc: null + value: iwakawa-koichi-q5-tohoku-nlp6723 +wandb_name: + desc: null + value: tiny-llama_train_2024-08-04-14:22:39 +wandb_project: + desc: null + value: llm_tutorial +quantization: + desc: null + value: false +use_freeze_layers: + desc: null + value: false +freeze_layers: + desc: null + value: null +bf16: + desc: null + value: true +fp16: + desc: null + value: false +mixed_precision: + desc: null + value: true +param_dtype: + desc: null + value: null +load: + desc: null + value: /work/llm_recipes/models/tiny-llama +save: + desc: null + value: /work/llm_recipes/models/tiny-llama +base_model: + desc: null + value: /share/pretrained_lm/meta-llama/TinyLlama_v1.1 +use_better_transformer: + desc: null + value: false +grad_clip_norm: + desc: null + value: 1.0 +eval_interval: + desc: null + value: 200 +save_interval: + desc: null + value: 200 +eval_iters: + desc: null + value: 10 +optimizer: + desc: null + value: adam +lr: + desc: null + value: 2.0e-05 +lr_decay_style: + desc: null + value: cosine +lr_decay_iters: + desc: null + value: 2000 +lr_warmup_iters: + desc: null + value: 500 +min_lr: + desc: null + value: 1.0e-06 +train_iters: + desc: null + value: 2000 +train_samples: + desc: null + value: null +global_batch_size: + desc: null + value: 320 +micro_batch_size: + desc: null + value: 8 +make_vocab_size_divisible_by: + desc: null + value: 128 +sliding_window_size: + desc: null + value: 4096 +skip_batch: + desc: null + value: null +no_save_optimizer_state: + desc: null + value: false +continual_pretraining: + desc: null + value: false +instruction_tuning: + desc: null + value: false +direct_preference_optimization: + desc: null + value: false +attention_dropout: + desc: null + value: 0.1 +hidden_dropout: + desc: null + value: 0.1 +weight_decay: + desc: null + value: 0.1 +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.95 +adam_eps: + desc: null + value: 1.0e-06 +hf_transformer_model_dir: + desc: null + value: null +instruction_train_data_path: + desc: null + value: null +instruction_valid_data_path: + desc: null + value: null +epoch: + desc: null + value: null +instruction_dataset_size: + desc: null + value: null +save_sampler_state: + desc: null + value: false +label_smoothing: + desc: null + value: 0.0 +save_n_checkpoints: + desc: null + value: 10 +hf_repo_id: + desc: null + value: koichi12/tiny-llama +create_public_hf_repo: + desc: null + value: false +upload_all_checkpoints_to_hf: + desc: null + value: false +hf_upload_retry_limit: + desc: null + value: 2 +exit_duration_in_mins: + desc: null + value: null +source_key: + desc: null + value: null +target_key: + desc: null + value: null +attn_implementation: + desc: null + value: flash_attention_2 +efficient_instruction_tuning: + desc: null + value: false +remove_padding_masking: + desc: null + value: false +save_start_iter: + desc: null + value: null +rank: + desc: null + value: 0 +world_size: + desc: null + value: 1 +padded_vocab_size: + desc: null + value: 32000 +gradient_accumulation_steps: + desc: null + value: 40 +_wandb: + desc: null + value: + python_version: 3.10.12 + cli_version: 0.16.3 + framework: huggingface + huggingface_version: 4.43.3 + is_jupyter_run: false + is_kaggle_kernel: false + start_time: 1722748970.443993 + t: + 1: + - 1 + - 11 + - 49 + - 55 + - 71 + 2: + - 1 + - 11 + - 49 + - 55 + - 71 + 3: + - 13 + - 16 + - 23 + 4: 3.10.12 + 5: 0.16.3 + 6: 4.43.3 + 8: + - 5 + 13: linux-x86_64 +activation_function: + desc: null + value: silu +hidden_size: + desc: null + value: 2048 +model_type: + desc: null + value: llama +max_position_embeddings: + desc: null + value: 2048 +num_attention_heads: + desc: null + value: 32 +num_hidden_layers: + desc: null + value: 22 +model_architecture: + desc: null + value: LlamaForCausalLM diff --git a/wandb/run-20240804_142250-6p58tz1g/files/output.log b/wandb/run-20240804_142250-6p58tz1g/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..9abb53ace2c37c8a4d95ebea31daf244e7fa2440 --- /dev/null +++ b/wandb/run-20240804_142250-6p58tz1g/files/output.log @@ -0,0 +1,135 @@ +Created Hugging Face repository with ID koichi12/tiny-llama. +Clearing GPU cache for all ranks +--> Running with torch torch_distributed debug set to detail +File not found: /work/llm_recipes/models/tiny-llama/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/tiny-llama/latest_iteration.txt +File not found: /work/llm_recipes/models/tiny-llama/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/tiny-llama/latest_iteration.txt +File not found: /work/llm_recipes/models/tiny-llama/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/tiny-llama/latest_iteration.txt +No checkpoint found in /work/llm_recipes/models/tiny-llama, skipping model loading +--> Model /share/pretrained_lm/meta-llama/TinyLlama_v1.1 +--> /share/pretrained_lm/meta-llama/TinyLlama_v1.1 has 1100.048384 Million params +You are attempting to use Flash Attention 2.0 without specifying a torch dtype. This might lead to unexpected behaviour +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +Flash Attention 2.0 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in LlamaForCausalLM is torch.float32. You should run training or inference using Automatic Mixed-Precision via the `with torch.autocast(device_type='torch_device'):` decorator, or load the model with the `torch_dtype` argument. Example: `model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="flash_attention_2", torch_dtype=torch.float16)` +Flash Attention 2.0 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in LlamaModel is torch.float32. You should run training or inference using Automatic Mixed-Precision via the `with torch.autocast(device_type='torch_device'):` decorator, or load the model with the `torch_dtype` argument. Example: `model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="flash_attention_2", torch_dtype=torch.float16)` +/usr/local/lib/python3.10/dist-packages/torch/distributed/fsdp/_init_utils.py:441: UserWarning: FSDP is switching to use `NO_SHARD` instead of ShardingStrategy.FULL_SHARD since the world size is 1. + warnings.warn( +BFloat16 enabled for mixed precision - using bfSixteen policy +--> applying fsdp activation checkpointing... + > datasets target sizes (minimum size): + train: 640000 + validation: 35200 + test: 3200 +> building train, validation, and test datasets for GPT ... +> finished creating GPT datasets ... +File not found: /work/llm_recipes/models/tiny-llama/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/tiny-llama/latest_iteration.txt +No checkpoint found in /work/llm_recipes/models/tiny-llama, skipping optimizer loading +File not found: /work/llm_recipes/models/tiny-llama/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/tiny-llama/latest_iteration.txt +model info: FullyShardedDataParallel( + (_fsdp_wrapped_module): LlamaForCausalLM( + (model): LlamaModel( + (embed_tokens): Embedding(32000, 2048) + (layers): ModuleList( + (0-21): 22 x FullyShardedDataParallel( + (_fsdp_wrapped_module): CheckpointWrapper( + (_checkpoint_wrapped_module): LlamaDecoderLayer( + (self_attn): LlamaFlashAttention2( + (q_proj): Linear(in_features=2048, out_features=2048, bias=False) + (k_proj): Linear(in_features=2048, out_features=256, bias=False) + (v_proj): Linear(in_features=2048, out_features=256, bias=False) + (o_proj): Linear(in_features=2048, out_features=2048, bias=False) + (rotary_emb): LlamaRotaryEmbedding() + ) + (mlp): LlamaMLP( + (gate_proj): Linear(in_features=2048, out_features=5632, bias=False) + (up_proj): Linear(in_features=2048, out_features=5632, bias=False) + (down_proj): Linear(in_features=5632, out_features=2048, bias=False) + (act_fn): SiLU() + ) + (input_layernorm): LlamaRMSNorm() + (post_attention_layernorm): LlamaRMSNorm() + ) + ) + ) + ) + (norm): LlamaRMSNorm() + (rotary_emb): LlamaRotaryEmbedding() + ) + (lm_head): Linear(in_features=2048, out_features=32000, bias=False) + ) +) +model config: LlamaConfig { + "_name_or_path": "/share/pretrained_lm/meta-llama/TinyLlama_v1.1", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 5632, + "label_smoothing": 0.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 22, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.43.3", + "use_cache": false, + "vocab_size": 32000 +} +Let split = None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Traceback (most recent call last): + File "/project/examples/finetuning.py", line 13, in + main() + File "/project/src/llama_recipes/finetuning.py", line 281, in main + train( + File "/project/src/llama_recipes/utils/train_utils.py", line 104, in train + batch = next(train_dataloader) + File "/project/src/llama_recipes/utils/train_utils.py", line 24, in cyclic_iter + for x in iter: + File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 631, in __next__ + data = self._next_data() + File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1346, in _next_data + return self._process_data(data) + File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1372, in _process_data + data.reraise() + File "/usr/local/lib/python3.10/dist-packages/torch/_utils.py", line 705, in reraise + raise exception +RuntimeError: Caught RuntimeError in DataLoader worker process 0. +Original Traceback (most recent call last): + File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop + data = fetcher.fetch(index) + File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch + return self.collate_fn(data) + File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/collate.py", line 277, in default_collate + return collate(batch, collate_fn_map=default_collate_fn_map) + File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/collate.py", line 129, in collate + return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem}) + File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/collate.py", line 129, in + return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem}) + File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/collate.py", line 121, in collate + return collate_fn_map[elem_type](batch, collate_fn_map=collate_fn_map) + File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/collate.py", line 174, in collate_tensor_fn + return torch.stack(batch, 0, out=out) +RuntimeError: stack expects each tensor to be equal size, but got [513] at entry 0 and [543] at entry 1 \ No newline at end of file diff --git a/wandb/run-20240804_142250-6p58tz1g/files/requirements.txt b/wandb/run-20240804_142250-6p58tz1g/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7c90fd1c54d0a9881f6b4c6465b2a4fa88c9056c --- /dev/null +++ b/wandb/run-20240804_142250-6p58tz1g/files/requirements.txt @@ -0,0 +1,271 @@ +absl-py==2.1.0 +accelerate==0.33.0 +aiohttp==3.9.1 +aiosignal==1.3.1 +annotated-types==0.6.0 +apex==0.1 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==23.1.0 +asttokens==2.4.1 +astunparse==1.6.3 +async-timeout==4.0.3 +attrs==23.2.0 +audioread==3.0.1 +beautifulsoup4==4.12.3 +bleach==6.1.0 +blis==0.7.11 +cachetools==5.3.2 +catalogue==2.0.10 +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +cloudpathlib==0.16.0 +cloudpickle==3.0.0 +cmake==3.28.1 +colorama==0.4.6 +comm==0.2.1 +confection==0.1.4 +contourpy==1.2.0 +cubinlinker==0.3.0+2.g405ac64 +cuda-python==12.3.0rc4+9.gdb8c48a.dirty +cudf==23.12.0 +cugraph-dgl==23.12.0 +cugraph-service-client==23.12.0 +cugraph-service-server==23.12.0 +cugraph==23.12.0 +cuml==23.12.0 +cupy-cuda12x==12.3.0 +cycler==0.12.1 +cymem==2.0.8 +cython==3.0.8 +dask-cuda==23.12.0 +dask-cudf==23.12.0 +dask==2023.11.0 +debugpy==1.8.1 +decorator==5.1.1 +defusedxml==0.7.1 +distributed==2023.11.0 +dm-tree==0.1.8 +docker-pycreds==0.4.0 +einops==0.7.0 +exceptiongroup==1.2.0 +execnet==2.0.2 +executing==2.0.1 +expecttest==0.1.3 +fastjsonschema==2.19.1 +fastrlock==0.8.2 +filelock==3.13.1 +flash-attn==2.4.2 +fonttools==4.48.1 +frozenlist==1.4.1 +fsspec==2023.12.2 +gast==0.5.4 +gitdb==4.0.11 +gitpython==3.1.43 +google-auth-oauthlib==0.4.6 +google-auth==2.27.0 +graphsurgeon==0.4.6 +grpcio==1.60.1 +huggingface-hub==0.24.5 +hypothesis==5.35.1 +idna==3.6 +importlib-metadata==7.0.1 +iniconfig==2.0.0 +intel-openmp==2021.4.0 +ipadic==1.0.0 +ipykernel==6.29.2 +ipython-genutils==0.2.0 +ipython==8.21.0 +jedi==0.19.1 +jinja2==3.1.3 +joblib==1.3.2 +json5==0.9.14 +jsonnet==0.19.1 +jsonschema-specifications==2023.12.1 +jsonschema==4.21.1 +jupyter-client==8.6.0 +jupyter-core==5.7.1 +jupyter-tensorboard==0.2.0 +jupyterlab-pygments==0.3.0 +jupyterlab-server==1.2.0 +jupyterlab==2.3.2 +jupytext==1.16.1 +kiwisolver==1.4.5 +langcodes==3.3.0 +lazy-loader==0.3 +librosa==0.10.1 +llvmlite==0.40.1 +locket==1.0.0 +logzero==1.7.0 +lxml==5.2.2 +markdown-it-py==3.0.0 +markdown==3.5.2 +markupsafe==2.1.4 +matplotlib-inline==0.1.6 +matplotlib==3.8.2 +mdit-py-plugins==0.4.0 +mdurl==0.1.2 +mecab-python3==1.0.6 +mistune==3.0.2 +mkl-devel==2021.1.1 +mkl-include==2021.1.1 +mkl==2021.1.1 +mock==5.1.0 +more-itertools==9.1.0 +mpmath==1.3.0 +msgpack==1.0.7 +multidict==6.0.4 +murmurhash==1.0.10 +nbclient==0.9.0 +nbconvert==7.16.0 +nbformat==5.9.2 +nest-asyncio==1.6.0 +networkx==2.6.3 +ninja==1.11.1.1 +nltk==3.8.1 +notebook==6.4.10 +numba==0.57.1+1.g1ff679645 +numpy==1.24.4 +nvfuser==0.1.4a0+d0bb811 +nvidia-dali-cuda120==1.34.0 +nvidia-pyindex==1.0.9 +nvtx==0.2.5 +oauthlib==3.2.2 +onnx==1.15.0rc2 +opencv==4.7.0 +optree==0.10.0 +packaging==23.2 +pandas==1.5.3 +pandocfilters==1.5.1 +parso==0.8.3 +partd==1.4.1 +peft==0.11.1 +pexpect==4.9.0 +pillow==10.2.0 +pip==24.0 +platformdirs==4.2.0 +pluggy==1.4.0 +ply==3.11 +polygraphy==0.49.4 +pooch==1.8.0 +portalocker==2.10.1 +preshed==3.0.9 +prettytable==3.9.0 +prometheus-client==0.19.0 +prompt-toolkit==3.0.43 +protobuf==4.24.4 +psutil==5.9.4 +ptxcompiler==0.8.1+2.g0d406d6 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==14.0.1.dev0+gba5374836.d20240125 +pyasn1-modules==0.3.0 +pyasn1==0.5.1 +pybind11-global==2.11.1 +pybind11==2.11.1 +pycocotools==2.0+nv0.8.0 +pycparser==2.21 +pydantic-core==2.16.2 +pydantic==2.6.1 +pygments==2.17.2 +pylibcugraph==23.12.0 +pylibcugraphops==23.12.0 +pylibraft==23.12.0 +pynvml==11.4.1 +pyparsing==3.1.1 +pytest-flakefinder==1.1.0 +pytest-rerunfailures==13.0 +pytest-shard==0.1.2 +pytest-xdist==3.5.0 +pytest==8.0.0 +python-dateutil==2.8.2 +python-dotenv==1.0.0 +python-hostlist==1.23.0 +pytorch-quantization==2.1.2 +pytz==2023.3.post1 +pyyaml==6.0.1 +pyzmq==25.1.2 +raft-dask==23.12.0 +rapids-dask-dependency==23.12.1 +referencing==0.33.0 +regex==2023.12.25 +requests-oauthlib==1.3.1 +requests==2.31.0 +rich==13.7.0 +rmm==23.12.0 +rpds-py==0.17.1 +rsa==4.9 +sacrebleu==2.4.0 +safetensors==0.4.3 +scikit-learn==1.2.0 +scipy==1.12.0 +send2trash==1.8.2 +sentencepiece==0.1.99 +sentry-sdk==2.12.0 +setproctitle==1.3.3 +setuptools==68.2.2 +six==1.16.0 +smart-open==6.4.0 +smmap==5.0.1 +sortedcontainers==2.4.0 +soundfile==0.12.1 +soupsieve==2.5 +soxr==0.3.7 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 +spacy==3.7.2 +sphinx-glpi-theme==0.6 +srsly==2.4.8 +stack-data==0.6.3 +sympy==1.12 +tabulate==0.9.0 +tbb==2021.11.0 +tblib==3.0.0 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.9.0 +tensorrt==8.6.3 +terminado==0.18.0 +termplotlib==0.3.9 +thinc==8.2.3 +threadpoolctl==3.2.0 +thriftpy2==0.4.17 +tinycss2==1.2.1 +tokenizers==0.19.1 +toml==0.10.2 +tomli==2.0.1 +toolz==0.12.1 +torch-tensorrt==2.3.0a0 +torch==2.3.0a0+ebedce2 +torchdata==0.7.1a0 +torchtext==0.17.0a0 +torchvision==0.18.0a0 +tornado==6.4 +tqdm==4.66.1 +traitlets==5.9.0 +transformer-engine==1.3.0+5b90b7f +transformers==4.43.3 +treelite-runtime==3.9.1 +treelite==3.9.1 +triton==2.2.0+e28a256 +typer==0.9.0 +types-dataclasses==0.6.6 +typing-extensions==4.9.0 +ucx-py==0.35.0 +uff==0.6.9 +ujson==5.8.0 +urllib3==1.26.18 +wandb==0.16.3 +wasabi==1.1.2 +wcwidth==0.2.13 +weasel==0.3.4 +webencodings==0.5.1 +werkzeug==3.0.1 +wheel==0.42.0 +xdoctest==1.0.2 +xgboost==1.7.6 +yarl==1.9.4 +zict==3.0.0 +zipp==3.17.0 \ No newline at end of file diff --git a/wandb/run-20240804_142250-6p58tz1g/files/wandb-metadata.json b/wandb/run-20240804_142250-6p58tz1g/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..f0bf8c795abcc01b9e0c917e6807be75f14cabf2 --- /dev/null +++ b/wandb/run-20240804_142250-6p58tz1g/files/wandb-metadata.json @@ -0,0 +1,215 @@ +{ + "os": "Linux-5.15.0-91-generic-x86_64-with-glibc2.35", + "python": "3.10.12", + "heartbeatAt": "2024-08-04T05:22:51.055103", + "startedAt": "2024-08-04T05:22:50.431050", + "docker": null, + "cuda": null, + "args": [ + "--seq-length", + "512", + "--sliding-window-size", + "4096", + "--micro-batch-size", + "8", + "--global-batch-size", + "320", + "--train-iters", + "2000", + "--tokenizer-type", + "Llama2Tokenizer", + "--tokenizer-model", + "/share/pretrained_lm/meta-llama/TinyLlama_v1.1/tokenizer.model", + "--train-data-path", + "4013541", + "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document", + "--valid-data-path", + "4013541", + "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document", + "--test-data-path", + "4013541", + "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document", + "--lr", + "2e-5", + "--min-lr", + "1e-6", + "--lr-decay-style", + "cosine", + "--lr-warmup-iters", + "500", + "--lr-decay-iters", + "2000", + "--weight-decay", + "0.1", + "--grad-clip-norm", + "1.0", + "--optimizer", + "adam", + "--adam-beta1", + "0.9", + "--adam-beta2", + "0.95", + "--adam-eps", + "1e-6", + "--save-interval", + "200", + "--eval-interval", + "200", + "--eval-iters", + "10", + "--bf16", + "--mixed-precision", + "--base-model", + "/share/pretrained_lm/meta-llama/TinyLlama_v1.1", + "--save", + "/work/llm_recipes/models/tiny-llama", + "--load", + "/work/llm_recipes/models/tiny-llama", + "--fsdp-activation-checkpointing", + "--sharding-strategy", + "FULL_SHARD", + "--checkpoint-type", + "LOCAL_STATE_DICT", + "--save-n-checkpoints", + "10", + "--hf-upload-retry-limit", + "2", + "--hf-repo-id", + "koichi12/tiny-llama", + "--wandb-entity", + "iwakawa-koichi-q5-tohoku-nlp6723", + "--wandb-project", + "llm_tutorial", + "--wandb-name", + "tiny-llama_train_2024-08-04-14:22:39" + ], + "state": "running", + "program": "/project/examples/finetuning.py", + "codePathLocal": "examples/finetuning.py", + "codePath": "examples/finetuning.py", + "git": { + "remote": "https://github.com/cl-tohoku/llm-recipes-failab-m1-yans.git", + "commit": "3be5353210a678dc7008f237fa16b99f2bdf36ea" + }, + "email": null, + "root": "/project", + "host": "gpu-koiwa-00", + "username": "koiwa", + "executable": "/usr/bin/python", + "cpu_count": 18, + "cpu_count_logical": 18, + "cpu_freq": { + "current": 2400.0389999999993, + "min": 0.0, + "max": 0.0 + }, + "cpu_freq_per_core": [ + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + } + ], + "disk": { + "/": { + "total": 0.0625, + "used": 1.1444091796875e-05 + } + }, + "gpu": "NVIDIA A100-SXM4-40GB", + "gpu_count": 1, + "gpu_devices": [ + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + } + ], + "memory": { + "total": 56.48781967163086 + } +} diff --git a/wandb/run-20240804_142250-6p58tz1g/files/wandb-summary.json b/wandb/run-20240804_142250-6p58tz1g/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..6a2353df9a39aec28b5e444685dc5d7223bc37fd --- /dev/null +++ b/wandb/run-20240804_142250-6p58tz1g/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 2}} \ No newline at end of file diff --git a/wandb/run-20240804_142250-6p58tz1g/logs/debug-internal.log b/wandb/run-20240804_142250-6p58tz1g/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..047bf64aa643627e259f8a14b4504b6c75203c7b --- /dev/null +++ b/wandb/run-20240804_142250-6p58tz1g/logs/debug-internal.log @@ -0,0 +1,186 @@ +2024-08-04 14:22:50,445 INFO StreamThr :10451 [internal.py:wandb_internal():86] W&B internal server running at pid: 10451, started at: 2024-08-04 14:22:50.444819 +2024-08-04 14:22:50,447 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: status +2024-08-04 14:22:50,449 INFO WriterThread:10451 [datastore.py:open_for_write():87] open: /project/wandb/run-20240804_142250-6p58tz1g/run-6p58tz1g.wandb +2024-08-04 14:22:50,450 DEBUG SenderThread:10451 [sender.py:send():382] send: header +2024-08-04 14:22:50,463 DEBUG SenderThread:10451 [sender.py:send():382] send: run +2024-08-04 14:22:50,941 INFO SenderThread:10451 [dir_watcher.py:__init__():211] watching files in: /project/wandb/run-20240804_142250-6p58tz1g/files +2024-08-04 14:22:50,941 INFO SenderThread:10451 [sender.py:_start_run_threads():1136] run started: 6p58tz1g with start time 1722748970.443993 +2024-08-04 14:22:50,946 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: check_version +2024-08-04 14:22:50,946 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: check_version +2024-08-04 14:22:51,034 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: run_start +2024-08-04 14:22:51,041 DEBUG HandlerThread:10451 [system_info.py:__init__():27] System info init +2024-08-04 14:22:51,041 DEBUG HandlerThread:10451 [system_info.py:__init__():42] System info init done +2024-08-04 14:22:51,041 INFO HandlerThread:10451 [system_monitor.py:start():194] Starting system monitor +2024-08-04 14:22:51,041 INFO SystemMonitor:10451 [system_monitor.py:_start():158] Starting system asset monitoring threads +2024-08-04 14:22:51,042 INFO HandlerThread:10451 [system_monitor.py:probe():214] Collecting system info +2024-08-04 14:22:51,042 INFO SystemMonitor:10451 [interfaces.py:start():190] Started cpu monitoring +2024-08-04 14:22:51,043 INFO SystemMonitor:10451 [interfaces.py:start():190] Started disk monitoring +2024-08-04 14:22:51,044 INFO SystemMonitor:10451 [interfaces.py:start():190] Started gpu monitoring +2024-08-04 14:22:51,044 INFO SystemMonitor:10451 [interfaces.py:start():190] Started memory monitoring +2024-08-04 14:22:51,045 INFO SystemMonitor:10451 [interfaces.py:start():190] Started network monitoring +2024-08-04 14:22:51,055 DEBUG HandlerThread:10451 [system_info.py:probe():151] Probing system +2024-08-04 14:22:51,059 DEBUG HandlerThread:10451 [system_info.py:_probe_git():136] Probing git +2024-08-04 14:22:51,071 DEBUG HandlerThread:10451 [system_info.py:_probe_git():144] Probing git done +2024-08-04 14:22:51,071 DEBUG HandlerThread:10451 [system_info.py:probe():199] Probing system done +2024-08-04 14:22:51,071 DEBUG HandlerThread:10451 [system_monitor.py:probe():223] {'os': 'Linux-5.15.0-91-generic-x86_64-with-glibc2.35', 'python': '3.10.12', 'heartbeatAt': '2024-08-04T05:22:51.055103', 'startedAt': '2024-08-04T05:22:50.431050', 'docker': None, 'cuda': None, 'args': ('--seq-length', '512', '--sliding-window-size', '4096', '--micro-batch-size', '8', '--global-batch-size', '320', '--train-iters', '2000', '--tokenizer-type', 'Llama2Tokenizer', '--tokenizer-model', '/share/pretrained_lm/meta-llama/TinyLlama_v1.1/tokenizer.model', '--train-data-path', '4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document', '--valid-data-path', '4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document', '--test-data-path', '4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document', '--lr', '2e-5', '--min-lr', '1e-6', '--lr-decay-style', 'cosine', '--lr-warmup-iters', '500', '--lr-decay-iters', '2000', '--weight-decay', '0.1', '--grad-clip-norm', '1.0', '--optimizer', 'adam', '--adam-beta1', '0.9', '--adam-beta2', '0.95', '--adam-eps', '1e-6', '--save-interval', '200', '--eval-interval', '200', '--eval-iters', '10', '--bf16', '--mixed-precision', '--base-model', '/share/pretrained_lm/meta-llama/TinyLlama_v1.1', '--save', '/work/llm_recipes/models/tiny-llama', '--load', '/work/llm_recipes/models/tiny-llama', '--fsdp-activation-checkpointing', '--sharding-strategy', 'FULL_SHARD', '--checkpoint-type', 'LOCAL_STATE_DICT', '--save-n-checkpoints', '10', '--hf-upload-retry-limit', '2', '--hf-repo-id', 'koichi12/tiny-llama', '--wandb-entity', 'iwakawa-koichi-q5-tohoku-nlp6723', '--wandb-project', 'llm_tutorial', '--wandb-name', 'tiny-llama_train_2024-08-04-14:22:39'), 'state': 'running', 'program': '/project/examples/finetuning.py', 'codePathLocal': 'examples/finetuning.py', 'codePath': 'examples/finetuning.py', 'git': {'remote': 'https://github.com/cl-tohoku/llm-recipes-failab-m1-yans.git', 'commit': '3be5353210a678dc7008f237fa16b99f2bdf36ea'}, 'email': None, 'root': '/project', 'host': 'gpu-koiwa-00', 'username': 'koiwa', 'executable': '/usr/bin/python', 'cpu_count': 18, 'cpu_count_logical': 18, 'cpu_freq': {'current': 2400.0389999999993, 'min': 0.0, 'max': 0.0}, 'cpu_freq_per_core': [{'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}], 'disk': {'/': {'total': 0.0625, 'used': 1.1444091796875e-05}}, 'gpu': 'NVIDIA A100-SXM4-40GB', 'gpu_count': 1, 'gpu_devices': [{'name': 'NVIDIA A100-SXM4-40GB', 'memory_total': 42949672960}], 'memory': {'total': 56.48781967163086}} +2024-08-04 14:22:51,072 INFO HandlerThread:10451 [system_monitor.py:probe():224] Finished collecting system info +2024-08-04 14:22:51,072 INFO HandlerThread:10451 [system_monitor.py:probe():227] Publishing system info +2024-08-04 14:22:51,073 INFO HandlerThread:10451 [system_monitor.py:probe():229] Finished publishing system info +2024-08-04 14:22:51,079 DEBUG SenderThread:10451 [sender.py:send():382] send: files +2024-08-04 14:22:51,079 INFO SenderThread:10451 [sender.py:_save_file():1403] saving file wandb-metadata.json with policy now +2024-08-04 14:22:51,089 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: python_packages +2024-08-04 14:22:51,089 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: stop_status +2024-08-04 14:22:51,089 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: python_packages +2024-08-04 14:22:51,090 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 14:22:51,091 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: stop_status +2024-08-04 14:22:51,412 DEBUG SenderThread:10451 [sender.py:send():382] send: telemetry +2024-08-04 14:22:51,834 INFO wandb-upload_0:10451 [upload_job.py:push():131] Uploaded file /tmp/tmpvai5nc9ewandb/lc3l5ghh-wandb-metadata.json +2024-08-04 14:22:51,943 INFO Thread-12 :10451 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240804_142250-6p58tz1g/files/requirements.txt +2024-08-04 14:22:51,943 INFO Thread-12 :10451 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240804_142250-6p58tz1g/files/wandb-metadata.json +2024-08-04 14:22:51,943 INFO Thread-12 :10451 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240804_142250-6p58tz1g/files/output.log +2024-08-04 14:22:53,535 DEBUG SenderThread:10451 [sender.py:send():382] send: config +2024-08-04 14:22:53,536 DEBUG SenderThread:10451 [sender.py:send():382] send: config +2024-08-04 14:22:53,643 DEBUG SenderThread:10451 [sender.py:send():382] send: exit +2024-08-04 14:22:53,643 INFO SenderThread:10451 [sender.py:send_exit():589] handling exit code: 1 +2024-08-04 14:22:53,643 INFO SenderThread:10451 [sender.py:send_exit():591] handling runtime: 2 +2024-08-04 14:22:53,644 INFO SenderThread:10451 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-04 14:22:53,645 INFO SenderThread:10451 [sender.py:send_exit():597] send defer +2024-08-04 14:22:53,645 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:22:53,645 INFO HandlerThread:10451 [handler.py:handle_request_defer():172] handle defer: 0 +2024-08-04 14:22:53,645 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: defer +2024-08-04 14:22:53,645 INFO SenderThread:10451 [sender.py:send_request_defer():613] handle sender defer: 0 +2024-08-04 14:22:53,645 INFO SenderThread:10451 [sender.py:transition_state():617] send defer: 1 +2024-08-04 14:22:53,645 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:22:53,645 INFO HandlerThread:10451 [handler.py:handle_request_defer():172] handle defer: 1 +2024-08-04 14:22:53,645 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: defer +2024-08-04 14:22:53,645 INFO SenderThread:10451 [sender.py:send_request_defer():613] handle sender defer: 1 +2024-08-04 14:22:53,645 INFO SenderThread:10451 [sender.py:transition_state():617] send defer: 2 +2024-08-04 14:22:53,645 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:22:53,645 INFO HandlerThread:10451 [handler.py:handle_request_defer():172] handle defer: 2 +2024-08-04 14:22:53,645 INFO HandlerThread:10451 [system_monitor.py:finish():203] Stopping system monitor +2024-08-04 14:22:53,646 DEBUG SystemMonitor:10451 [system_monitor.py:_start():172] Starting system metrics aggregation loop +2024-08-04 14:22:53,646 INFO HandlerThread:10451 [interfaces.py:finish():202] Joined cpu monitor +2024-08-04 14:22:53,646 DEBUG SystemMonitor:10451 [system_monitor.py:_start():179] Finished system metrics aggregation loop +2024-08-04 14:22:53,646 INFO HandlerThread:10451 [interfaces.py:finish():202] Joined disk monitor +2024-08-04 14:22:53,646 DEBUG SystemMonitor:10451 [system_monitor.py:_start():183] Publishing last batch of metrics +2024-08-04 14:22:53,679 INFO HandlerThread:10451 [interfaces.py:finish():202] Joined gpu monitor +2024-08-04 14:22:53,679 INFO HandlerThread:10451 [interfaces.py:finish():202] Joined memory monitor +2024-08-04 14:22:53,679 INFO HandlerThread:10451 [interfaces.py:finish():202] Joined network monitor +2024-08-04 14:22:53,680 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: defer +2024-08-04 14:22:53,680 INFO SenderThread:10451 [sender.py:send_request_defer():613] handle sender defer: 2 +2024-08-04 14:22:53,680 INFO SenderThread:10451 [sender.py:transition_state():617] send defer: 3 +2024-08-04 14:22:53,680 DEBUG SenderThread:10451 [sender.py:send():382] send: stats +2024-08-04 14:22:53,680 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:22:53,680 INFO HandlerThread:10451 [handler.py:handle_request_defer():172] handle defer: 3 +2024-08-04 14:22:53,680 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: defer +2024-08-04 14:22:53,681 INFO SenderThread:10451 [sender.py:send_request_defer():613] handle sender defer: 3 +2024-08-04 14:22:53,681 INFO SenderThread:10451 [sender.py:transition_state():617] send defer: 4 +2024-08-04 14:22:53,681 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:22:53,681 INFO HandlerThread:10451 [handler.py:handle_request_defer():172] handle defer: 4 +2024-08-04 14:22:53,681 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: defer +2024-08-04 14:22:53,681 INFO SenderThread:10451 [sender.py:send_request_defer():613] handle sender defer: 4 +2024-08-04 14:22:53,681 INFO SenderThread:10451 [sender.py:transition_state():617] send defer: 5 +2024-08-04 14:22:53,681 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:22:53,681 INFO HandlerThread:10451 [handler.py:handle_request_defer():172] handle defer: 5 +2024-08-04 14:22:53,681 DEBUG SenderThread:10451 [sender.py:send():382] send: summary +2024-08-04 14:22:53,682 INFO SenderThread:10451 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-04 14:22:53,682 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: defer +2024-08-04 14:22:53,682 INFO SenderThread:10451 [sender.py:send_request_defer():613] handle sender defer: 5 +2024-08-04 14:22:53,682 INFO SenderThread:10451 [sender.py:transition_state():617] send defer: 6 +2024-08-04 14:22:53,683 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:22:53,683 INFO HandlerThread:10451 [handler.py:handle_request_defer():172] handle defer: 6 +2024-08-04 14:22:53,683 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: defer +2024-08-04 14:22:53,683 INFO SenderThread:10451 [sender.py:send_request_defer():613] handle sender defer: 6 +2024-08-04 14:22:53,685 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 14:22:53,891 INFO SenderThread:10451 [sender.py:transition_state():617] send defer: 7 +2024-08-04 14:22:53,891 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:22:53,891 INFO HandlerThread:10451 [handler.py:handle_request_defer():172] handle defer: 7 +2024-08-04 14:22:53,892 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: defer +2024-08-04 14:22:53,892 INFO SenderThread:10451 [sender.py:send_request_defer():613] handle sender defer: 7 +2024-08-04 14:22:53,944 INFO Thread-12 :10451 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_142250-6p58tz1g/files/config.yaml +2024-08-04 14:22:53,944 INFO Thread-12 :10451 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_142250-6p58tz1g/files/output.log +2024-08-04 14:22:53,944 INFO Thread-12 :10451 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240804_142250-6p58tz1g/files/wandb-summary.json +2024-08-04 14:22:54,643 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 14:22:55,782 INFO SenderThread:10451 [sender.py:transition_state():617] send defer: 8 +2024-08-04 14:22:55,783 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 14:22:55,783 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:22:55,783 INFO HandlerThread:10451 [handler.py:handle_request_defer():172] handle defer: 8 +2024-08-04 14:22:55,783 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: defer +2024-08-04 14:22:55,783 INFO SenderThread:10451 [sender.py:send_request_defer():613] handle sender defer: 8 +2024-08-04 14:22:55,783 INFO SenderThread:10451 [job_builder.py:build():296] Attempting to build job artifact +2024-08-04 14:22:55,784 INFO SenderThread:10451 [job_builder.py:_get_source_type():426] is repo sourced job +2024-08-04 14:22:55,883 INFO SenderThread:10451 [job_builder.py:build():402] adding wandb-job metadata file +2024-08-04 14:22:55,891 INFO SenderThread:10451 [sender.py:transition_state():617] send defer: 9 +2024-08-04 14:22:55,892 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:22:55,892 DEBUG SenderThread:10451 [sender.py:send():382] send: artifact +2024-08-04 14:22:55,892 INFO HandlerThread:10451 [handler.py:handle_request_defer():172] handle defer: 9 +2024-08-04 14:22:55,945 INFO Thread-12 :10451 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_142250-6p58tz1g/files/output.log +2024-08-04 14:22:56,644 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 14:22:57,777 INFO SenderThread:10451 [sender.py:send_artifact():1494] sent artifact job-https___github.com_cl-tohoku_llm-recipes-failab-m1-yans.git_examples_finetuning.py - {'id': 'QXJ0aWZhY3Q6MTA5MTk2NTkzOA==', 'state': 'COMMITTED', 'artifactSequence': {'id': 'QXJ0aWZhY3RDb2xsZWN0aW9uOjM2MjY3MjMzNA==', 'latestArtifact': {'id': 'QXJ0aWZhY3Q6MTA5MzUzODM4NQ==', 'versionIndex': 3}}} +2024-08-04 14:22:57,777 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: defer +2024-08-04 14:22:57,777 INFO SenderThread:10451 [sender.py:send_request_defer():613] handle sender defer: 9 +2024-08-04 14:22:57,777 INFO SenderThread:10451 [dir_watcher.py:finish():358] shutting down directory watcher +2024-08-04 14:22:57,946 INFO SenderThread:10451 [dir_watcher.py:finish():388] scan: /project/wandb/run-20240804_142250-6p58tz1g/files +2024-08-04 14:22:57,946 INFO SenderThread:10451 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_142250-6p58tz1g/files/requirements.txt requirements.txt +2024-08-04 14:22:57,947 INFO SenderThread:10451 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_142250-6p58tz1g/files/config.yaml config.yaml +2024-08-04 14:22:57,948 INFO SenderThread:10451 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_142250-6p58tz1g/files/wandb-metadata.json wandb-metadata.json +2024-08-04 14:22:57,948 INFO SenderThread:10451 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_142250-6p58tz1g/files/wandb-summary.json wandb-summary.json +2024-08-04 14:22:57,950 INFO SenderThread:10451 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_142250-6p58tz1g/files/output.log output.log +2024-08-04 14:22:57,952 INFO SenderThread:10451 [sender.py:transition_state():617] send defer: 10 +2024-08-04 14:22:57,952 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 14:22:57,952 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:22:57,952 INFO HandlerThread:10451 [handler.py:handle_request_defer():172] handle defer: 10 +2024-08-04 14:22:57,954 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: defer +2024-08-04 14:22:57,954 INFO SenderThread:10451 [sender.py:send_request_defer():613] handle sender defer: 10 +2024-08-04 14:22:57,954 INFO SenderThread:10451 [file_pusher.py:finish():172] shutting down file pusher +2024-08-04 14:22:58,363 INFO wandb-upload_1:10451 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240804_142250-6p58tz1g/files/config.yaml +2024-08-04 14:22:58,459 INFO wandb-upload_0:10451 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240804_142250-6p58tz1g/files/requirements.txt +2024-08-04 14:22:58,506 INFO wandb-upload_2:10451 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240804_142250-6p58tz1g/files/wandb-summary.json +2024-08-04 14:22:58,525 INFO wandb-upload_3:10451 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240804_142250-6p58tz1g/files/output.log +2024-08-04 14:22:58,645 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 14:22:58,645 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 14:22:58,725 INFO Thread-11 (_thread_body):10451 [sender.py:transition_state():617] send defer: 11 +2024-08-04 14:22:58,725 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:22:58,725 INFO HandlerThread:10451 [handler.py:handle_request_defer():172] handle defer: 11 +2024-08-04 14:22:58,726 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: defer +2024-08-04 14:22:58,726 INFO SenderThread:10451 [sender.py:send_request_defer():613] handle sender defer: 11 +2024-08-04 14:22:58,726 INFO SenderThread:10451 [file_pusher.py:join():178] waiting for file pusher +2024-08-04 14:22:58,726 INFO SenderThread:10451 [sender.py:transition_state():617] send defer: 12 +2024-08-04 14:22:58,726 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:22:58,726 INFO HandlerThread:10451 [handler.py:handle_request_defer():172] handle defer: 12 +2024-08-04 14:22:58,726 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: defer +2024-08-04 14:22:58,726 INFO SenderThread:10451 [sender.py:send_request_defer():613] handle sender defer: 12 +2024-08-04 14:22:58,726 INFO SenderThread:10451 [file_stream.py:finish():595] file stream finish called +2024-08-04 14:22:58,910 INFO SenderThread:10451 [file_stream.py:finish():599] file stream finish is done +2024-08-04 14:22:58,911 INFO SenderThread:10451 [sender.py:transition_state():617] send defer: 13 +2024-08-04 14:22:58,911 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:22:58,911 INFO HandlerThread:10451 [handler.py:handle_request_defer():172] handle defer: 13 +2024-08-04 14:22:58,911 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: defer +2024-08-04 14:22:58,911 INFO SenderThread:10451 [sender.py:send_request_defer():613] handle sender defer: 13 +2024-08-04 14:22:58,911 INFO SenderThread:10451 [sender.py:transition_state():617] send defer: 14 +2024-08-04 14:22:58,911 DEBUG SenderThread:10451 [sender.py:send():382] send: final +2024-08-04 14:22:58,911 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:22:58,912 DEBUG SenderThread:10451 [sender.py:send():382] send: footer +2024-08-04 14:22:58,912 INFO HandlerThread:10451 [handler.py:handle_request_defer():172] handle defer: 14 +2024-08-04 14:22:58,912 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: defer +2024-08-04 14:22:58,912 INFO SenderThread:10451 [sender.py:send_request_defer():613] handle sender defer: 14 +2024-08-04 14:22:58,912 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 14:22:58,912 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 14:22:58,913 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 14:22:58,913 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 14:22:58,913 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: server_info +2024-08-04 14:22:58,913 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: get_summary +2024-08-04 14:22:58,914 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: server_info +2024-08-04 14:22:58,915 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: sampled_history +2024-08-04 14:22:58,915 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 14:22:58,916 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: job_info +2024-08-04 14:22:59,080 DEBUG SenderThread:10451 [sender.py:send_request():409] send_request: job_info +2024-08-04 14:22:59,081 INFO MainThread:10451 [wandb_run.py:_footer_history_summary_info():3866] rendering history +2024-08-04 14:22:59,081 INFO MainThread:10451 [wandb_run.py:_footer_history_summary_info():3898] rendering summary +2024-08-04 14:22:59,081 INFO MainThread:10451 [wandb_run.py:_footer_sync_info():3825] logging synced files +2024-08-04 14:22:59,081 DEBUG HandlerThread:10451 [handler.py:handle_request():146] handle_request: shutdown +2024-08-04 14:22:59,081 INFO HandlerThread:10451 [handler.py:finish():869] shutting down handler +2024-08-04 14:22:59,916 INFO WriterThread:10451 [datastore.py:close():296] close: /project/wandb/run-20240804_142250-6p58tz1g/run-6p58tz1g.wandb +2024-08-04 14:23:00,081 INFO SenderThread:10451 [sender.py:finish():1572] shutting down sender +2024-08-04 14:23:00,081 INFO SenderThread:10451 [file_pusher.py:finish():172] shutting down file pusher +2024-08-04 14:23:00,081 INFO SenderThread:10451 [file_pusher.py:join():178] waiting for file pusher diff --git a/wandb/run-20240804_142250-6p58tz1g/logs/debug.log b/wandb/run-20240804_142250-6p58tz1g/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..64b333a22b598544a364945f025467f338769cca --- /dev/null +++ b/wandb/run-20240804_142250-6p58tz1g/logs/debug.log @@ -0,0 +1,30 @@ +2024-08-04 14:22:50,437 INFO MainThread:10380 [wandb_setup.py:_flush():76] Current SDK version is 0.16.3 +2024-08-04 14:22:50,437 INFO MainThread:10380 [wandb_setup.py:_flush():76] Configure stats pid to 10380 +2024-08-04 14:22:50,437 INFO MainThread:10380 [wandb_setup.py:_flush():76] Loading settings from /singularity_home/.config/wandb/settings +2024-08-04 14:22:50,437 INFO MainThread:10380 [wandb_setup.py:_flush():76] Loading settings from /project/wandb/settings +2024-08-04 14:22:50,437 INFO MainThread:10380 [wandb_setup.py:_flush():76] Loading settings from environment variables: {'api_key': '***REDACTED***', 'run_notes': 'Train tiny llama sample'} +2024-08-04 14:22:50,437 INFO MainThread:10380 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False} +2024-08-04 14:22:50,437 INFO MainThread:10380 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'examples/finetuning.py', 'program_abspath': '/project/examples/finetuning.py', 'program': '/project/examples/finetuning.py'} +2024-08-04 14:22:50,437 INFO MainThread:10380 [wandb_init.py:_log_setup():526] Logging user logs to /project/wandb/run-20240804_142250-6p58tz1g/logs/debug.log +2024-08-04 14:22:50,437 INFO MainThread:10380 [wandb_init.py:_log_setup():527] Logging internal logs to /project/wandb/run-20240804_142250-6p58tz1g/logs/debug-internal.log +2024-08-04 14:22:50,438 INFO MainThread:10380 [wandb_init.py:init():566] calling init triggers +2024-08-04 14:22:50,438 INFO MainThread:10380 [wandb_init.py:init():573] wandb.init called with sweep_config: {} +config: {'sharding_strategy': 'FULL_SHARD', 'checkpoint_type': 'LOCAL_STATE_DICT', 'fsdp_activation_checkpointing': True, 'fsdp_cpu_offload': False, 'low_cpu_fsdp': False, 'no_meta_device': False, 'data_path': None, 'split': '969, 30, 1', 'train_data_path': ['4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document'], 'valid_data_path': ['4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document'], 'test_data_path': ['4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document'], 'data_cache_path': None, 'vocab_size': None, 'vocab_file': None, 'merge_file': None, 'seq_length': 512, 'num_workers': 2, 'tokenizer_type': 'Llama2Tokenizer', 'tokenizer_model': '/share/pretrained_lm/meta-llama/TinyLlama_v1.1/tokenizer.model', 'reset_position_ids': False, 'reset_attention_mask': False, 'eod_mask_loss': False, 'retro_return_doc_ids': False, 'short_seq_prob': 0.1, 'vocab_extra_ids': 0, 'seed': 1234, 'use_mpi': False, 'wandb_entity': 'iwakawa-koichi-q5-tohoku-nlp6723', 'wandb_name': 'tiny-llama_train_2024-08-04-14:22:39', 'wandb_project': 'llm_tutorial', 'quantization': False, 'use_freeze_layers': False, 'freeze_layers': None, 'bf16': True, 'fp16': False, 'mixed_precision': True, 'param_dtype': None, 'load': '/work/llm_recipes/models/tiny-llama', 'save': '/work/llm_recipes/models/tiny-llama', 'base_model': '/share/pretrained_lm/meta-llama/TinyLlama_v1.1', 'use_better_transformer': False, 'grad_clip_norm': 1.0, 'eval_interval': 200, 'save_interval': 200, 'eval_iters': 10, 'optimizer': 'adam', 'lr': 2e-05, 'lr_decay_style': 'cosine', 'lr_decay_iters': 2000, 'lr_warmup_iters': 500, 'min_lr': 1e-06, 'train_iters': 2000, 'train_samples': None, 'global_batch_size': 320, 'micro_batch_size': 8, 'make_vocab_size_divisible_by': 128, 'sliding_window_size': 4096, 'skip_batch': None, 'no_save_optimizer_state': False, 'continual_pretraining': False, 'instruction_tuning': False, 'direct_preference_optimization': False, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'weight_decay': 0.1, 'adam_beta1': 0.9, 'adam_beta2': 0.95, 'adam_eps': 1e-06, 'hf_transformer_model_dir': None, 'instruction_train_data_path': None, 'instruction_valid_data_path': None, 'epoch': None, 'instruction_dataset_size': None, 'save_sampler_state': False, 'label_smoothing': 0.0, 'save_n_checkpoints': 10, 'hf_repo_id': 'koichi12/tiny-llama', 'create_public_hf_repo': False, 'upload_all_checkpoints_to_hf': False, 'hf_upload_retry_limit': 2, 'exit_duration_in_mins': None, 'source_key': None, 'target_key': None, 'attn_implementation': 'flash_attention_2', 'efficient_instruction_tuning': False, 'remove_padding_masking': False, 'save_start_iter': None, 'rank': 0, 'world_size': 1, 'padded_vocab_size': 32000, 'gradient_accumulation_steps': 40} +2024-08-04 14:22:50,438 INFO MainThread:10380 [wandb_init.py:init():616] starting backend +2024-08-04 14:22:50,438 INFO MainThread:10380 [wandb_init.py:init():620] setting up manager +2024-08-04 14:22:50,443 INFO MainThread:10380 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-08-04 14:22:50,443 INFO MainThread:10380 [wandb_init.py:init():628] backend started and connected +2024-08-04 14:22:50,448 INFO MainThread:10380 [wandb_init.py:init():720] updated telemetry +2024-08-04 14:22:50,459 INFO MainThread:10380 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout +2024-08-04 14:22:50,946 INFO MainThread:10380 [wandb_run.py:_on_init():2262] communicating current version +2024-08-04 14:22:51,027 INFO MainThread:10380 [wandb_run.py:_on_init():2271] got version response upgrade_message: "wandb version 0.17.5 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2024-08-04 14:22:51,027 INFO MainThread:10380 [wandb_init.py:init():804] starting run threads in backend +2024-08-04 14:22:51,088 INFO MainThread:10380 [wandb_run.py:_console_start():2241] atexit reg +2024-08-04 14:22:51,088 INFO MainThread:10380 [wandb_run.py:_redirect():2096] redirect: wrap_raw +2024-08-04 14:22:51,088 INFO MainThread:10380 [wandb_run.py:_redirect():2161] Wrapping output streams. +2024-08-04 14:22:51,088 INFO MainThread:10380 [wandb_run.py:_redirect():2186] Redirects installed. +2024-08-04 14:22:51,090 INFO MainThread:10380 [wandb_init.py:init():847] run started, returning control to user process +2024-08-04 14:22:53,535 INFO MainThread:10380 [wandb_run.py:_config_callback():1343] config_cb None None {'activation_function': 'silu', 'hidden_size': 2048, 'model_type': 'llama', 'max_position_embeddings': 2048, 'num_attention_heads': 32, 'num_hidden_layers': 22, 'model_architecture': 'LlamaForCausalLM'} +2024-08-04 14:22:53,535 INFO MainThread:10380 [wandb_run.py:_config_callback():1343] config_cb None None {'world_size': 1} +2024-08-04 14:23:00,082 WARNING MsgRouterThr:10380 [router.py:message_loop():77] message_loop has been closed diff --git a/wandb/run-20240804_142250-6p58tz1g/run-6p58tz1g.wandb b/wandb/run-20240804_142250-6p58tz1g/run-6p58tz1g.wandb new file mode 100644 index 0000000000000000000000000000000000000000..917f59683dfc6c37ecff744f35bccdbee32039df Binary files /dev/null and b/wandb/run-20240804_142250-6p58tz1g/run-6p58tz1g.wandb differ diff --git a/wandb/run-20240804_143607-h7fxlkpt/files/config.yaml b/wandb/run-20240804_143607-h7fxlkpt/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2c123198acbffce981a709d4cbb696aad919fc51 --- /dev/null +++ b/wandb/run-20240804_143607-h7fxlkpt/files/config.yaml @@ -0,0 +1,335 @@ +wandb_version: 1 + +sharding_strategy: + desc: null + value: FULL_SHARD +checkpoint_type: + desc: null + value: LOCAL_STATE_DICT +fsdp_activation_checkpointing: + desc: null + value: true +fsdp_cpu_offload: + desc: null + value: false +low_cpu_fsdp: + desc: null + value: false +no_meta_device: + desc: null + value: false +data_path: + desc: null + value: null +split: + desc: null + value: 969, 30, 1 +train_data_path: + desc: null + value: + - '4013541' + - /work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document +valid_data_path: + desc: null + value: + - '4013541' + - /work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document +test_data_path: + desc: null + value: + - '4013541' + - /work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document +data_cache_path: + desc: null + value: null +vocab_size: + desc: null + value: null +vocab_file: + desc: null + value: null +merge_file: + desc: null + value: null +seq_length: + desc: null + value: 512 +num_workers: + desc: null + value: 2 +tokenizer_type: + desc: null + value: Llama2Tokenizer +tokenizer_model: + desc: null + value: /share/pretrained_lm/meta-llama/TinyLlama_v1.1/tokenizer.model +reset_position_ids: + desc: null + value: false +reset_attention_mask: + desc: null + value: false +eod_mask_loss: + desc: null + value: false +retro_return_doc_ids: + desc: null + value: false +short_seq_prob: + desc: null + value: 0.1 +vocab_extra_ids: + desc: null + value: 0 +seed: + desc: null + value: 1234 +use_mpi: + desc: null + value: false +wandb_entity: + desc: null + value: iwakawa-koichi-q5-tohoku-nlp6723 +wandb_name: + desc: null + value: tiny-llama_train_2024-08-04-14:35:56 +wandb_project: + desc: null + value: llm_tutorial +quantization: + desc: null + value: false +use_freeze_layers: + desc: null + value: false +freeze_layers: + desc: null + value: null +bf16: + desc: null + value: true +fp16: + desc: null + value: false +mixed_precision: + desc: null + value: true +param_dtype: + desc: null + value: null +load: + desc: null + value: /work/llm_recipes/models/tiny-llama +save: + desc: null + value: /work/llm_recipes/models/tiny-llama +base_model: + desc: null + value: /share/pretrained_lm/meta-llama/TinyLlama_v1.1 +use_better_transformer: + desc: null + value: false +grad_clip_norm: + desc: null + value: 1.0 +eval_interval: + desc: null + value: 200 +save_interval: + desc: null + value: 200 +eval_iters: + desc: null + value: 10 +optimizer: + desc: null + value: adam +lr: + desc: null + value: 2.0e-05 +lr_decay_style: + desc: null + value: cosine +lr_decay_iters: + desc: null + value: 2000 +lr_warmup_iters: + desc: null + value: 500 +min_lr: + desc: null + value: 1.0e-06 +train_iters: + desc: null + value: 2000 +train_samples: + desc: null + value: null +global_batch_size: + desc: null + value: 320 +micro_batch_size: + desc: null + value: 8 +make_vocab_size_divisible_by: + desc: null + value: 128 +sliding_window_size: + desc: null + value: 4096 +skip_batch: + desc: null + value: null +no_save_optimizer_state: + desc: null + value: false +continual_pretraining: + desc: null + value: false +instruction_tuning: + desc: null + value: false +direct_preference_optimization: + desc: null + value: false +attention_dropout: + desc: null + value: 0.1 +hidden_dropout: + desc: null + value: 0.1 +weight_decay: + desc: null + value: 0.1 +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.95 +adam_eps: + desc: null + value: 1.0e-06 +hf_transformer_model_dir: + desc: null + value: null +instruction_train_data_path: + desc: null + value: null +instruction_valid_data_path: + desc: null + value: null +epoch: + desc: null + value: null +instruction_dataset_size: + desc: null + value: null +save_sampler_state: + desc: null + value: false +label_smoothing: + desc: null + value: 0.0 +save_n_checkpoints: + desc: null + value: 10 +hf_repo_id: + desc: null + value: koichi12/tiny-llama +create_public_hf_repo: + desc: null + value: false +upload_all_checkpoints_to_hf: + desc: null + value: false +hf_upload_retry_limit: + desc: null + value: 2 +exit_duration_in_mins: + desc: null + value: null +source_key: + desc: null + value: null +target_key: + desc: null + value: null +attn_implementation: + desc: null + value: flash_attention_2 +efficient_instruction_tuning: + desc: null + value: false +remove_padding_masking: + desc: null + value: false +save_start_iter: + desc: null + value: null +rank: + desc: null + value: 0 +world_size: + desc: null + value: 1 +padded_vocab_size: + desc: null + value: 32000 +gradient_accumulation_steps: + desc: null + value: 40 +_wandb: + desc: null + value: + python_version: 3.10.12 + cli_version: 0.16.3 + framework: huggingface + huggingface_version: 4.43.3 + is_jupyter_run: false + is_kaggle_kernel: false + start_time: 1722749767.220741 + t: + 1: + - 1 + - 11 + - 49 + - 55 + - 71 + 2: + - 1 + - 11 + - 49 + - 55 + - 71 + 3: + - 13 + - 16 + - 23 + 4: 3.10.12 + 5: 0.16.3 + 6: 4.43.3 + 8: + - 5 + 13: linux-x86_64 +activation_function: + desc: null + value: silu +hidden_size: + desc: null + value: 2048 +model_type: + desc: null + value: llama +max_position_embeddings: + desc: null + value: 2048 +num_attention_heads: + desc: null + value: 32 +num_hidden_layers: + desc: null + value: 22 +model_architecture: + desc: null + value: LlamaForCausalLM diff --git a/wandb/run-20240804_143607-h7fxlkpt/files/output.log b/wandb/run-20240804_143607-h7fxlkpt/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..9abb53ace2c37c8a4d95ebea31daf244e7fa2440 --- /dev/null +++ b/wandb/run-20240804_143607-h7fxlkpt/files/output.log @@ -0,0 +1,135 @@ +Created Hugging Face repository with ID koichi12/tiny-llama. +Clearing GPU cache for all ranks +--> Running with torch torch_distributed debug set to detail +File not found: /work/llm_recipes/models/tiny-llama/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/tiny-llama/latest_iteration.txt +File not found: /work/llm_recipes/models/tiny-llama/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/tiny-llama/latest_iteration.txt +File not found: /work/llm_recipes/models/tiny-llama/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/tiny-llama/latest_iteration.txt +No checkpoint found in /work/llm_recipes/models/tiny-llama, skipping model loading +--> Model /share/pretrained_lm/meta-llama/TinyLlama_v1.1 +--> /share/pretrained_lm/meta-llama/TinyLlama_v1.1 has 1100.048384 Million params +You are attempting to use Flash Attention 2.0 without specifying a torch dtype. This might lead to unexpected behaviour +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +Flash Attention 2.0 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in LlamaForCausalLM is torch.float32. You should run training or inference using Automatic Mixed-Precision via the `with torch.autocast(device_type='torch_device'):` decorator, or load the model with the `torch_dtype` argument. Example: `model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="flash_attention_2", torch_dtype=torch.float16)` +Flash Attention 2.0 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in LlamaModel is torch.float32. You should run training or inference using Automatic Mixed-Precision via the `with torch.autocast(device_type='torch_device'):` decorator, or load the model with the `torch_dtype` argument. Example: `model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="flash_attention_2", torch_dtype=torch.float16)` +/usr/local/lib/python3.10/dist-packages/torch/distributed/fsdp/_init_utils.py:441: UserWarning: FSDP is switching to use `NO_SHARD` instead of ShardingStrategy.FULL_SHARD since the world size is 1. + warnings.warn( +BFloat16 enabled for mixed precision - using bfSixteen policy +--> applying fsdp activation checkpointing... + > datasets target sizes (minimum size): + train: 640000 + validation: 35200 + test: 3200 +> building train, validation, and test datasets for GPT ... +> finished creating GPT datasets ... +File not found: /work/llm_recipes/models/tiny-llama/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/tiny-llama/latest_iteration.txt +No checkpoint found in /work/llm_recipes/models/tiny-llama, skipping optimizer loading +File not found: /work/llm_recipes/models/tiny-llama/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/tiny-llama/latest_iteration.txt +model info: FullyShardedDataParallel( + (_fsdp_wrapped_module): LlamaForCausalLM( + (model): LlamaModel( + (embed_tokens): Embedding(32000, 2048) + (layers): ModuleList( + (0-21): 22 x FullyShardedDataParallel( + (_fsdp_wrapped_module): CheckpointWrapper( + (_checkpoint_wrapped_module): LlamaDecoderLayer( + (self_attn): LlamaFlashAttention2( + (q_proj): Linear(in_features=2048, out_features=2048, bias=False) + (k_proj): Linear(in_features=2048, out_features=256, bias=False) + (v_proj): Linear(in_features=2048, out_features=256, bias=False) + (o_proj): Linear(in_features=2048, out_features=2048, bias=False) + (rotary_emb): LlamaRotaryEmbedding() + ) + (mlp): LlamaMLP( + (gate_proj): Linear(in_features=2048, out_features=5632, bias=False) + (up_proj): Linear(in_features=2048, out_features=5632, bias=False) + (down_proj): Linear(in_features=5632, out_features=2048, bias=False) + (act_fn): SiLU() + ) + (input_layernorm): LlamaRMSNorm() + (post_attention_layernorm): LlamaRMSNorm() + ) + ) + ) + ) + (norm): LlamaRMSNorm() + (rotary_emb): LlamaRotaryEmbedding() + ) + (lm_head): Linear(in_features=2048, out_features=32000, bias=False) + ) +) +model config: LlamaConfig { + "_name_or_path": "/share/pretrained_lm/meta-llama/TinyLlama_v1.1", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 5632, + "label_smoothing": 0.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 22, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.43.3", + "use_cache": false, + "vocab_size": 32000 +} +Let split = None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Traceback (most recent call last): + File "/project/examples/finetuning.py", line 13, in + main() + File "/project/src/llama_recipes/finetuning.py", line 281, in main + train( + File "/project/src/llama_recipes/utils/train_utils.py", line 104, in train + batch = next(train_dataloader) + File "/project/src/llama_recipes/utils/train_utils.py", line 24, in cyclic_iter + for x in iter: + File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 631, in __next__ + data = self._next_data() + File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1346, in _next_data + return self._process_data(data) + File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1372, in _process_data + data.reraise() + File "/usr/local/lib/python3.10/dist-packages/torch/_utils.py", line 705, in reraise + raise exception +RuntimeError: Caught RuntimeError in DataLoader worker process 0. +Original Traceback (most recent call last): + File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop + data = fetcher.fetch(index) + File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch + return self.collate_fn(data) + File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/collate.py", line 277, in default_collate + return collate(batch, collate_fn_map=default_collate_fn_map) + File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/collate.py", line 129, in collate + return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem}) + File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/collate.py", line 129, in + return elem_type({key: collate([d[key] for d in batch], collate_fn_map=collate_fn_map) for key in elem}) + File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/collate.py", line 121, in collate + return collate_fn_map[elem_type](batch, collate_fn_map=collate_fn_map) + File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/collate.py", line 174, in collate_tensor_fn + return torch.stack(batch, 0, out=out) +RuntimeError: stack expects each tensor to be equal size, but got [513] at entry 0 and [543] at entry 1 \ No newline at end of file diff --git a/wandb/run-20240804_143607-h7fxlkpt/files/requirements.txt b/wandb/run-20240804_143607-h7fxlkpt/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7c90fd1c54d0a9881f6b4c6465b2a4fa88c9056c --- /dev/null +++ b/wandb/run-20240804_143607-h7fxlkpt/files/requirements.txt @@ -0,0 +1,271 @@ +absl-py==2.1.0 +accelerate==0.33.0 +aiohttp==3.9.1 +aiosignal==1.3.1 +annotated-types==0.6.0 +apex==0.1 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==23.1.0 +asttokens==2.4.1 +astunparse==1.6.3 +async-timeout==4.0.3 +attrs==23.2.0 +audioread==3.0.1 +beautifulsoup4==4.12.3 +bleach==6.1.0 +blis==0.7.11 +cachetools==5.3.2 +catalogue==2.0.10 +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +cloudpathlib==0.16.0 +cloudpickle==3.0.0 +cmake==3.28.1 +colorama==0.4.6 +comm==0.2.1 +confection==0.1.4 +contourpy==1.2.0 +cubinlinker==0.3.0+2.g405ac64 +cuda-python==12.3.0rc4+9.gdb8c48a.dirty +cudf==23.12.0 +cugraph-dgl==23.12.0 +cugraph-service-client==23.12.0 +cugraph-service-server==23.12.0 +cugraph==23.12.0 +cuml==23.12.0 +cupy-cuda12x==12.3.0 +cycler==0.12.1 +cymem==2.0.8 +cython==3.0.8 +dask-cuda==23.12.0 +dask-cudf==23.12.0 +dask==2023.11.0 +debugpy==1.8.1 +decorator==5.1.1 +defusedxml==0.7.1 +distributed==2023.11.0 +dm-tree==0.1.8 +docker-pycreds==0.4.0 +einops==0.7.0 +exceptiongroup==1.2.0 +execnet==2.0.2 +executing==2.0.1 +expecttest==0.1.3 +fastjsonschema==2.19.1 +fastrlock==0.8.2 +filelock==3.13.1 +flash-attn==2.4.2 +fonttools==4.48.1 +frozenlist==1.4.1 +fsspec==2023.12.2 +gast==0.5.4 +gitdb==4.0.11 +gitpython==3.1.43 +google-auth-oauthlib==0.4.6 +google-auth==2.27.0 +graphsurgeon==0.4.6 +grpcio==1.60.1 +huggingface-hub==0.24.5 +hypothesis==5.35.1 +idna==3.6 +importlib-metadata==7.0.1 +iniconfig==2.0.0 +intel-openmp==2021.4.0 +ipadic==1.0.0 +ipykernel==6.29.2 +ipython-genutils==0.2.0 +ipython==8.21.0 +jedi==0.19.1 +jinja2==3.1.3 +joblib==1.3.2 +json5==0.9.14 +jsonnet==0.19.1 +jsonschema-specifications==2023.12.1 +jsonschema==4.21.1 +jupyter-client==8.6.0 +jupyter-core==5.7.1 +jupyter-tensorboard==0.2.0 +jupyterlab-pygments==0.3.0 +jupyterlab-server==1.2.0 +jupyterlab==2.3.2 +jupytext==1.16.1 +kiwisolver==1.4.5 +langcodes==3.3.0 +lazy-loader==0.3 +librosa==0.10.1 +llvmlite==0.40.1 +locket==1.0.0 +logzero==1.7.0 +lxml==5.2.2 +markdown-it-py==3.0.0 +markdown==3.5.2 +markupsafe==2.1.4 +matplotlib-inline==0.1.6 +matplotlib==3.8.2 +mdit-py-plugins==0.4.0 +mdurl==0.1.2 +mecab-python3==1.0.6 +mistune==3.0.2 +mkl-devel==2021.1.1 +mkl-include==2021.1.1 +mkl==2021.1.1 +mock==5.1.0 +more-itertools==9.1.0 +mpmath==1.3.0 +msgpack==1.0.7 +multidict==6.0.4 +murmurhash==1.0.10 +nbclient==0.9.0 +nbconvert==7.16.0 +nbformat==5.9.2 +nest-asyncio==1.6.0 +networkx==2.6.3 +ninja==1.11.1.1 +nltk==3.8.1 +notebook==6.4.10 +numba==0.57.1+1.g1ff679645 +numpy==1.24.4 +nvfuser==0.1.4a0+d0bb811 +nvidia-dali-cuda120==1.34.0 +nvidia-pyindex==1.0.9 +nvtx==0.2.5 +oauthlib==3.2.2 +onnx==1.15.0rc2 +opencv==4.7.0 +optree==0.10.0 +packaging==23.2 +pandas==1.5.3 +pandocfilters==1.5.1 +parso==0.8.3 +partd==1.4.1 +peft==0.11.1 +pexpect==4.9.0 +pillow==10.2.0 +pip==24.0 +platformdirs==4.2.0 +pluggy==1.4.0 +ply==3.11 +polygraphy==0.49.4 +pooch==1.8.0 +portalocker==2.10.1 +preshed==3.0.9 +prettytable==3.9.0 +prometheus-client==0.19.0 +prompt-toolkit==3.0.43 +protobuf==4.24.4 +psutil==5.9.4 +ptxcompiler==0.8.1+2.g0d406d6 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==14.0.1.dev0+gba5374836.d20240125 +pyasn1-modules==0.3.0 +pyasn1==0.5.1 +pybind11-global==2.11.1 +pybind11==2.11.1 +pycocotools==2.0+nv0.8.0 +pycparser==2.21 +pydantic-core==2.16.2 +pydantic==2.6.1 +pygments==2.17.2 +pylibcugraph==23.12.0 +pylibcugraphops==23.12.0 +pylibraft==23.12.0 +pynvml==11.4.1 +pyparsing==3.1.1 +pytest-flakefinder==1.1.0 +pytest-rerunfailures==13.0 +pytest-shard==0.1.2 +pytest-xdist==3.5.0 +pytest==8.0.0 +python-dateutil==2.8.2 +python-dotenv==1.0.0 +python-hostlist==1.23.0 +pytorch-quantization==2.1.2 +pytz==2023.3.post1 +pyyaml==6.0.1 +pyzmq==25.1.2 +raft-dask==23.12.0 +rapids-dask-dependency==23.12.1 +referencing==0.33.0 +regex==2023.12.25 +requests-oauthlib==1.3.1 +requests==2.31.0 +rich==13.7.0 +rmm==23.12.0 +rpds-py==0.17.1 +rsa==4.9 +sacrebleu==2.4.0 +safetensors==0.4.3 +scikit-learn==1.2.0 +scipy==1.12.0 +send2trash==1.8.2 +sentencepiece==0.1.99 +sentry-sdk==2.12.0 +setproctitle==1.3.3 +setuptools==68.2.2 +six==1.16.0 +smart-open==6.4.0 +smmap==5.0.1 +sortedcontainers==2.4.0 +soundfile==0.12.1 +soupsieve==2.5 +soxr==0.3.7 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 +spacy==3.7.2 +sphinx-glpi-theme==0.6 +srsly==2.4.8 +stack-data==0.6.3 +sympy==1.12 +tabulate==0.9.0 +tbb==2021.11.0 +tblib==3.0.0 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.9.0 +tensorrt==8.6.3 +terminado==0.18.0 +termplotlib==0.3.9 +thinc==8.2.3 +threadpoolctl==3.2.0 +thriftpy2==0.4.17 +tinycss2==1.2.1 +tokenizers==0.19.1 +toml==0.10.2 +tomli==2.0.1 +toolz==0.12.1 +torch-tensorrt==2.3.0a0 +torch==2.3.0a0+ebedce2 +torchdata==0.7.1a0 +torchtext==0.17.0a0 +torchvision==0.18.0a0 +tornado==6.4 +tqdm==4.66.1 +traitlets==5.9.0 +transformer-engine==1.3.0+5b90b7f +transformers==4.43.3 +treelite-runtime==3.9.1 +treelite==3.9.1 +triton==2.2.0+e28a256 +typer==0.9.0 +types-dataclasses==0.6.6 +typing-extensions==4.9.0 +ucx-py==0.35.0 +uff==0.6.9 +ujson==5.8.0 +urllib3==1.26.18 +wandb==0.16.3 +wasabi==1.1.2 +wcwidth==0.2.13 +weasel==0.3.4 +webencodings==0.5.1 +werkzeug==3.0.1 +wheel==0.42.0 +xdoctest==1.0.2 +xgboost==1.7.6 +yarl==1.9.4 +zict==3.0.0 +zipp==3.17.0 \ No newline at end of file diff --git a/wandb/run-20240804_143607-h7fxlkpt/files/wandb-metadata.json b/wandb/run-20240804_143607-h7fxlkpt/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..0e5d31e401f28a9d17c749c14922ecf37fd1421f --- /dev/null +++ b/wandb/run-20240804_143607-h7fxlkpt/files/wandb-metadata.json @@ -0,0 +1,215 @@ +{ + "os": "Linux-5.15.0-91-generic-x86_64-with-glibc2.35", + "python": "3.10.12", + "heartbeatAt": "2024-08-04T05:36:07.811618", + "startedAt": "2024-08-04T05:36:07.207201", + "docker": null, + "cuda": null, + "args": [ + "--seq-length", + "512", + "--sliding-window-size", + "4096", + "--micro-batch-size", + "8", + "--global-batch-size", + "320", + "--train-iters", + "2000", + "--tokenizer-type", + "Llama2Tokenizer", + "--tokenizer-model", + "/share/pretrained_lm/meta-llama/TinyLlama_v1.1/tokenizer.model", + "--train-data-path", + "4013541", + "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document", + "--valid-data-path", + "4013541", + "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document", + "--test-data-path", + "4013541", + "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document", + "--lr", + "2e-5", + "--min-lr", + "1e-6", + "--lr-decay-style", + "cosine", + "--lr-warmup-iters", + "500", + "--lr-decay-iters", + "2000", + "--weight-decay", + "0.1", + "--grad-clip-norm", + "1.0", + "--optimizer", + "adam", + "--adam-beta1", + "0.9", + "--adam-beta2", + "0.95", + "--adam-eps", + "1e-6", + "--save-interval", + "200", + "--eval-interval", + "200", + "--eval-iters", + "10", + "--bf16", + "--mixed-precision", + "--base-model", + "/share/pretrained_lm/meta-llama/TinyLlama_v1.1", + "--save", + "/work/llm_recipes/models/tiny-llama", + "--load", + "/work/llm_recipes/models/tiny-llama", + "--fsdp-activation-checkpointing", + "--sharding-strategy", + "FULL_SHARD", + "--checkpoint-type", + "LOCAL_STATE_DICT", + "--save-n-checkpoints", + "10", + "--hf-upload-retry-limit", + "2", + "--hf-repo-id", + "koichi12/tiny-llama", + "--wandb-entity", + "iwakawa-koichi-q5-tohoku-nlp6723", + "--wandb-project", + "llm_tutorial", + "--wandb-name", + "tiny-llama_train_2024-08-04-14:35:56" + ], + "state": "running", + "program": "/project/examples/finetuning.py", + "codePathLocal": "examples/finetuning.py", + "codePath": "examples/finetuning.py", + "git": { + "remote": "https://github.com/cl-tohoku/llm-recipes-failab-m1-yans.git", + "commit": "3be5353210a678dc7008f237fa16b99f2bdf36ea" + }, + "email": null, + "root": "/project", + "host": "gpu-koiwa-00", + "username": "koiwa", + "executable": "/usr/bin/python", + "cpu_count": 18, + "cpu_count_logical": 18, + "cpu_freq": { + "current": 2400.0389999999993, + "min": 0.0, + "max": 0.0 + }, + "cpu_freq_per_core": [ + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + } + ], + "disk": { + "/": { + "total": 0.0625, + "used": 1.1444091796875e-05 + } + }, + "gpu": "NVIDIA A100-SXM4-40GB", + "gpu_count": 1, + "gpu_devices": [ + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + } + ], + "memory": { + "total": 56.48781967163086 + } +} diff --git a/wandb/run-20240804_143607-h7fxlkpt/files/wandb-summary.json b/wandb/run-20240804_143607-h7fxlkpt/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..6a2353df9a39aec28b5e444685dc5d7223bc37fd --- /dev/null +++ b/wandb/run-20240804_143607-h7fxlkpt/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 2}} \ No newline at end of file diff --git a/wandb/run-20240804_143607-h7fxlkpt/logs/debug-internal.log b/wandb/run-20240804_143607-h7fxlkpt/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..f4e1969313f5f61b995436f53b86452aa954ea90 --- /dev/null +++ b/wandb/run-20240804_143607-h7fxlkpt/logs/debug-internal.log @@ -0,0 +1,186 @@ +2024-08-04 14:36:07,222 INFO StreamThr :11584 [internal.py:wandb_internal():86] W&B internal server running at pid: 11584, started at: 2024-08-04 14:36:07.221438 +2024-08-04 14:36:07,223 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: status +2024-08-04 14:36:07,225 INFO WriterThread:11584 [datastore.py:open_for_write():87] open: /project/wandb/run-20240804_143607-h7fxlkpt/run-h7fxlkpt.wandb +2024-08-04 14:36:07,226 DEBUG SenderThread:11584 [sender.py:send():382] send: header +2024-08-04 14:36:07,240 DEBUG SenderThread:11584 [sender.py:send():382] send: run +2024-08-04 14:36:07,696 INFO SenderThread:11584 [dir_watcher.py:__init__():211] watching files in: /project/wandb/run-20240804_143607-h7fxlkpt/files +2024-08-04 14:36:07,696 INFO SenderThread:11584 [sender.py:_start_run_threads():1136] run started: h7fxlkpt with start time 1722749767.220741 +2024-08-04 14:36:07,701 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: check_version +2024-08-04 14:36:07,701 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: check_version +2024-08-04 14:36:07,791 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: run_start +2024-08-04 14:36:07,798 DEBUG HandlerThread:11584 [system_info.py:__init__():27] System info init +2024-08-04 14:36:07,798 DEBUG HandlerThread:11584 [system_info.py:__init__():42] System info init done +2024-08-04 14:36:07,798 INFO HandlerThread:11584 [system_monitor.py:start():194] Starting system monitor +2024-08-04 14:36:07,798 INFO SystemMonitor:11584 [system_monitor.py:_start():158] Starting system asset monitoring threads +2024-08-04 14:36:07,799 INFO HandlerThread:11584 [system_monitor.py:probe():214] Collecting system info +2024-08-04 14:36:07,799 INFO SystemMonitor:11584 [interfaces.py:start():190] Started cpu monitoring +2024-08-04 14:36:07,799 INFO SystemMonitor:11584 [interfaces.py:start():190] Started disk monitoring +2024-08-04 14:36:07,800 INFO SystemMonitor:11584 [interfaces.py:start():190] Started gpu monitoring +2024-08-04 14:36:07,801 INFO SystemMonitor:11584 [interfaces.py:start():190] Started memory monitoring +2024-08-04 14:36:07,802 INFO SystemMonitor:11584 [interfaces.py:start():190] Started network monitoring +2024-08-04 14:36:07,811 DEBUG HandlerThread:11584 [system_info.py:probe():151] Probing system +2024-08-04 14:36:07,813 DEBUG HandlerThread:11584 [system_info.py:_probe_git():136] Probing git +2024-08-04 14:36:07,825 DEBUG HandlerThread:11584 [system_info.py:_probe_git():144] Probing git done +2024-08-04 14:36:07,825 DEBUG HandlerThread:11584 [system_info.py:probe():199] Probing system done +2024-08-04 14:36:07,825 DEBUG HandlerThread:11584 [system_monitor.py:probe():223] {'os': 'Linux-5.15.0-91-generic-x86_64-with-glibc2.35', 'python': '3.10.12', 'heartbeatAt': '2024-08-04T05:36:07.811618', 'startedAt': '2024-08-04T05:36:07.207201', 'docker': None, 'cuda': None, 'args': ('--seq-length', '512', '--sliding-window-size', '4096', '--micro-batch-size', '8', '--global-batch-size', '320', '--train-iters', '2000', '--tokenizer-type', 'Llama2Tokenizer', '--tokenizer-model', '/share/pretrained_lm/meta-llama/TinyLlama_v1.1/tokenizer.model', '--train-data-path', '4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document', '--valid-data-path', '4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document', '--test-data-path', '4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document', '--lr', '2e-5', '--min-lr', '1e-6', '--lr-decay-style', 'cosine', '--lr-warmup-iters', '500', '--lr-decay-iters', '2000', '--weight-decay', '0.1', '--grad-clip-norm', '1.0', '--optimizer', 'adam', '--adam-beta1', '0.9', '--adam-beta2', '0.95', '--adam-eps', '1e-6', '--save-interval', '200', '--eval-interval', '200', '--eval-iters', '10', '--bf16', '--mixed-precision', '--base-model', '/share/pretrained_lm/meta-llama/TinyLlama_v1.1', '--save', '/work/llm_recipes/models/tiny-llama', '--load', '/work/llm_recipes/models/tiny-llama', '--fsdp-activation-checkpointing', '--sharding-strategy', 'FULL_SHARD', '--checkpoint-type', 'LOCAL_STATE_DICT', '--save-n-checkpoints', '10', '--hf-upload-retry-limit', '2', '--hf-repo-id', 'koichi12/tiny-llama', '--wandb-entity', 'iwakawa-koichi-q5-tohoku-nlp6723', '--wandb-project', 'llm_tutorial', '--wandb-name', 'tiny-llama_train_2024-08-04-14:35:56'), 'state': 'running', 'program': '/project/examples/finetuning.py', 'codePathLocal': 'examples/finetuning.py', 'codePath': 'examples/finetuning.py', 'git': {'remote': 'https://github.com/cl-tohoku/llm-recipes-failab-m1-yans.git', 'commit': '3be5353210a678dc7008f237fa16b99f2bdf36ea'}, 'email': None, 'root': '/project', 'host': 'gpu-koiwa-00', 'username': 'koiwa', 'executable': '/usr/bin/python', 'cpu_count': 18, 'cpu_count_logical': 18, 'cpu_freq': {'current': 2400.0389999999993, 'min': 0.0, 'max': 0.0}, 'cpu_freq_per_core': [{'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}], 'disk': {'/': {'total': 0.0625, 'used': 1.1444091796875e-05}}, 'gpu': 'NVIDIA A100-SXM4-40GB', 'gpu_count': 1, 'gpu_devices': [{'name': 'NVIDIA A100-SXM4-40GB', 'memory_total': 42949672960}], 'memory': {'total': 56.48781967163086}} +2024-08-04 14:36:07,825 INFO HandlerThread:11584 [system_monitor.py:probe():224] Finished collecting system info +2024-08-04 14:36:07,825 INFO HandlerThread:11584 [system_monitor.py:probe():227] Publishing system info +2024-08-04 14:36:07,827 INFO HandlerThread:11584 [system_monitor.py:probe():229] Finished publishing system info +2024-08-04 14:36:07,833 DEBUG SenderThread:11584 [sender.py:send():382] send: files +2024-08-04 14:36:07,833 INFO SenderThread:11584 [sender.py:_save_file():1403] saving file wandb-metadata.json with policy now +2024-08-04 14:36:07,842 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: python_packages +2024-08-04 14:36:07,842 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: stop_status +2024-08-04 14:36:07,842 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 14:36:07,843 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: python_packages +2024-08-04 14:36:07,845 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: stop_status +2024-08-04 14:36:08,168 DEBUG SenderThread:11584 [sender.py:send():382] send: telemetry +2024-08-04 14:36:08,499 INFO wandb-upload_0:11584 [upload_job.py:push():131] Uploaded file /tmp/tmp7k_0gn43wandb/ux980mno-wandb-metadata.json +2024-08-04 14:36:08,698 INFO Thread-12 :11584 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240804_143607-h7fxlkpt/files/output.log +2024-08-04 14:36:08,698 INFO Thread-12 :11584 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240804_143607-h7fxlkpt/files/requirements.txt +2024-08-04 14:36:08,698 INFO Thread-12 :11584 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240804_143607-h7fxlkpt/files/wandb-metadata.json +2024-08-04 14:36:10,261 DEBUG SenderThread:11584 [sender.py:send():382] send: config +2024-08-04 14:36:10,262 DEBUG SenderThread:11584 [sender.py:send():382] send: config +2024-08-04 14:36:10,349 DEBUG SenderThread:11584 [sender.py:send():382] send: exit +2024-08-04 14:36:10,349 INFO SenderThread:11584 [sender.py:send_exit():589] handling exit code: 1 +2024-08-04 14:36:10,349 INFO SenderThread:11584 [sender.py:send_exit():591] handling runtime: 2 +2024-08-04 14:36:10,351 INFO SenderThread:11584 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-04 14:36:10,351 INFO SenderThread:11584 [sender.py:send_exit():597] send defer +2024-08-04 14:36:10,351 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:36:10,351 INFO HandlerThread:11584 [handler.py:handle_request_defer():172] handle defer: 0 +2024-08-04 14:36:10,351 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: defer +2024-08-04 14:36:10,351 INFO SenderThread:11584 [sender.py:send_request_defer():613] handle sender defer: 0 +2024-08-04 14:36:10,351 INFO SenderThread:11584 [sender.py:transition_state():617] send defer: 1 +2024-08-04 14:36:10,352 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:36:10,352 INFO HandlerThread:11584 [handler.py:handle_request_defer():172] handle defer: 1 +2024-08-04 14:36:10,352 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: defer +2024-08-04 14:36:10,352 INFO SenderThread:11584 [sender.py:send_request_defer():613] handle sender defer: 1 +2024-08-04 14:36:10,352 INFO SenderThread:11584 [sender.py:transition_state():617] send defer: 2 +2024-08-04 14:36:10,352 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:36:10,352 INFO HandlerThread:11584 [handler.py:handle_request_defer():172] handle defer: 2 +2024-08-04 14:36:10,352 INFO HandlerThread:11584 [system_monitor.py:finish():203] Stopping system monitor +2024-08-04 14:36:10,352 DEBUG SystemMonitor:11584 [system_monitor.py:_start():172] Starting system metrics aggregation loop +2024-08-04 14:36:10,352 INFO HandlerThread:11584 [interfaces.py:finish():202] Joined cpu monitor +2024-08-04 14:36:10,352 DEBUG SystemMonitor:11584 [system_monitor.py:_start():179] Finished system metrics aggregation loop +2024-08-04 14:36:10,353 INFO HandlerThread:11584 [interfaces.py:finish():202] Joined disk monitor +2024-08-04 14:36:10,353 DEBUG SystemMonitor:11584 [system_monitor.py:_start():183] Publishing last batch of metrics +2024-08-04 14:36:10,385 INFO HandlerThread:11584 [interfaces.py:finish():202] Joined gpu monitor +2024-08-04 14:36:10,385 INFO HandlerThread:11584 [interfaces.py:finish():202] Joined memory monitor +2024-08-04 14:36:10,386 INFO HandlerThread:11584 [interfaces.py:finish():202] Joined network monitor +2024-08-04 14:36:10,386 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: defer +2024-08-04 14:36:10,386 INFO SenderThread:11584 [sender.py:send_request_defer():613] handle sender defer: 2 +2024-08-04 14:36:10,386 INFO SenderThread:11584 [sender.py:transition_state():617] send defer: 3 +2024-08-04 14:36:10,386 DEBUG SenderThread:11584 [sender.py:send():382] send: stats +2024-08-04 14:36:10,386 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:36:10,386 INFO HandlerThread:11584 [handler.py:handle_request_defer():172] handle defer: 3 +2024-08-04 14:36:10,387 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: defer +2024-08-04 14:36:10,387 INFO SenderThread:11584 [sender.py:send_request_defer():613] handle sender defer: 3 +2024-08-04 14:36:10,387 INFO SenderThread:11584 [sender.py:transition_state():617] send defer: 4 +2024-08-04 14:36:10,387 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:36:10,387 INFO HandlerThread:11584 [handler.py:handle_request_defer():172] handle defer: 4 +2024-08-04 14:36:10,387 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: defer +2024-08-04 14:36:10,387 INFO SenderThread:11584 [sender.py:send_request_defer():613] handle sender defer: 4 +2024-08-04 14:36:10,387 INFO SenderThread:11584 [sender.py:transition_state():617] send defer: 5 +2024-08-04 14:36:10,387 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:36:10,387 INFO HandlerThread:11584 [handler.py:handle_request_defer():172] handle defer: 5 +2024-08-04 14:36:10,387 DEBUG SenderThread:11584 [sender.py:send():382] send: summary +2024-08-04 14:36:10,388 INFO SenderThread:11584 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-04 14:36:10,388 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: defer +2024-08-04 14:36:10,388 INFO SenderThread:11584 [sender.py:send_request_defer():613] handle sender defer: 5 +2024-08-04 14:36:10,388 INFO SenderThread:11584 [sender.py:transition_state():617] send defer: 6 +2024-08-04 14:36:10,389 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:36:10,389 INFO HandlerThread:11584 [handler.py:handle_request_defer():172] handle defer: 6 +2024-08-04 14:36:10,389 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: defer +2024-08-04 14:36:10,389 INFO SenderThread:11584 [sender.py:send_request_defer():613] handle sender defer: 6 +2024-08-04 14:36:10,391 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 14:36:10,576 INFO SenderThread:11584 [sender.py:transition_state():617] send defer: 7 +2024-08-04 14:36:10,577 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:36:10,577 INFO HandlerThread:11584 [handler.py:handle_request_defer():172] handle defer: 7 +2024-08-04 14:36:10,577 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: defer +2024-08-04 14:36:10,577 INFO SenderThread:11584 [sender.py:send_request_defer():613] handle sender defer: 7 +2024-08-04 14:36:10,699 INFO Thread-12 :11584 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_143607-h7fxlkpt/files/output.log +2024-08-04 14:36:10,699 INFO Thread-12 :11584 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_143607-h7fxlkpt/files/config.yaml +2024-08-04 14:36:10,699 INFO Thread-12 :11584 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240804_143607-h7fxlkpt/files/wandb-summary.json +2024-08-04 14:36:11,349 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 14:36:12,530 INFO SenderThread:11584 [sender.py:transition_state():617] send defer: 8 +2024-08-04 14:36:12,530 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 14:36:12,530 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:36:12,531 INFO HandlerThread:11584 [handler.py:handle_request_defer():172] handle defer: 8 +2024-08-04 14:36:12,531 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: defer +2024-08-04 14:36:12,531 INFO SenderThread:11584 [sender.py:send_request_defer():613] handle sender defer: 8 +2024-08-04 14:36:12,531 INFO SenderThread:11584 [job_builder.py:build():296] Attempting to build job artifact +2024-08-04 14:36:12,532 INFO SenderThread:11584 [job_builder.py:_get_source_type():426] is repo sourced job +2024-08-04 14:36:12,546 INFO SenderThread:11584 [job_builder.py:build():402] adding wandb-job metadata file +2024-08-04 14:36:12,554 INFO SenderThread:11584 [sender.py:transition_state():617] send defer: 9 +2024-08-04 14:36:12,555 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:36:12,555 DEBUG SenderThread:11584 [sender.py:send():382] send: artifact +2024-08-04 14:36:12,555 INFO HandlerThread:11584 [handler.py:handle_request_defer():172] handle defer: 9 +2024-08-04 14:36:12,700 INFO Thread-12 :11584 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_143607-h7fxlkpt/files/output.log +2024-08-04 14:36:13,350 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 14:36:13,435 INFO SenderThread:11584 [sender.py:send_artifact():1494] sent artifact job-https___github.com_cl-tohoku_llm-recipes-failab-m1-yans.git_examples_finetuning.py - {'id': 'QXJ0aWZhY3Q6MTA5MTk2NTkzOA==', 'state': 'COMMITTED', 'artifactSequence': {'id': 'QXJ0aWZhY3RDb2xsZWN0aW9uOjM2MjY3MjMzNA==', 'latestArtifact': {'id': 'QXJ0aWZhY3Q6MTA5MzUzODM4NQ==', 'versionIndex': 3}}} +2024-08-04 14:36:13,435 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: defer +2024-08-04 14:36:13,435 INFO SenderThread:11584 [sender.py:send_request_defer():613] handle sender defer: 9 +2024-08-04 14:36:13,435 INFO SenderThread:11584 [dir_watcher.py:finish():358] shutting down directory watcher +2024-08-04 14:36:13,701 INFO SenderThread:11584 [dir_watcher.py:finish():388] scan: /project/wandb/run-20240804_143607-h7fxlkpt/files +2024-08-04 14:36:13,701 INFO SenderThread:11584 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_143607-h7fxlkpt/files/requirements.txt requirements.txt +2024-08-04 14:36:13,702 INFO SenderThread:11584 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_143607-h7fxlkpt/files/config.yaml config.yaml +2024-08-04 14:36:13,703 INFO SenderThread:11584 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_143607-h7fxlkpt/files/wandb-metadata.json wandb-metadata.json +2024-08-04 14:36:13,703 INFO SenderThread:11584 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_143607-h7fxlkpt/files/wandb-summary.json wandb-summary.json +2024-08-04 14:36:13,705 INFO SenderThread:11584 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_143607-h7fxlkpt/files/output.log output.log +2024-08-04 14:36:13,706 INFO SenderThread:11584 [sender.py:transition_state():617] send defer: 10 +2024-08-04 14:36:13,707 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 14:36:13,707 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:36:13,707 INFO HandlerThread:11584 [handler.py:handle_request_defer():172] handle defer: 10 +2024-08-04 14:36:13,708 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: defer +2024-08-04 14:36:13,708 INFO SenderThread:11584 [sender.py:send_request_defer():613] handle sender defer: 10 +2024-08-04 14:36:13,709 INFO SenderThread:11584 [file_pusher.py:finish():172] shutting down file pusher +2024-08-04 14:36:14,120 INFO wandb-upload_0:11584 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240804_143607-h7fxlkpt/files/requirements.txt +2024-08-04 14:36:14,203 INFO wandb-upload_1:11584 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240804_143607-h7fxlkpt/files/config.yaml +2024-08-04 14:36:14,309 INFO wandb-upload_3:11584 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240804_143607-h7fxlkpt/files/output.log +2024-08-04 14:36:14,324 INFO wandb-upload_2:11584 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240804_143607-h7fxlkpt/files/wandb-summary.json +2024-08-04 14:36:14,351 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 14:36:14,351 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 14:36:14,524 INFO Thread-11 (_thread_body):11584 [sender.py:transition_state():617] send defer: 11 +2024-08-04 14:36:14,524 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:36:14,524 INFO HandlerThread:11584 [handler.py:handle_request_defer():172] handle defer: 11 +2024-08-04 14:36:14,524 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: defer +2024-08-04 14:36:14,524 INFO SenderThread:11584 [sender.py:send_request_defer():613] handle sender defer: 11 +2024-08-04 14:36:14,524 INFO SenderThread:11584 [file_pusher.py:join():178] waiting for file pusher +2024-08-04 14:36:14,525 INFO SenderThread:11584 [sender.py:transition_state():617] send defer: 12 +2024-08-04 14:36:14,525 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:36:14,525 INFO HandlerThread:11584 [handler.py:handle_request_defer():172] handle defer: 12 +2024-08-04 14:36:14,525 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: defer +2024-08-04 14:36:14,525 INFO SenderThread:11584 [sender.py:send_request_defer():613] handle sender defer: 12 +2024-08-04 14:36:14,525 INFO SenderThread:11584 [file_stream.py:finish():595] file stream finish called +2024-08-04 14:36:14,732 INFO SenderThread:11584 [file_stream.py:finish():599] file stream finish is done +2024-08-04 14:36:14,732 INFO SenderThread:11584 [sender.py:transition_state():617] send defer: 13 +2024-08-04 14:36:14,732 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:36:14,732 INFO HandlerThread:11584 [handler.py:handle_request_defer():172] handle defer: 13 +2024-08-04 14:36:14,732 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: defer +2024-08-04 14:36:14,732 INFO SenderThread:11584 [sender.py:send_request_defer():613] handle sender defer: 13 +2024-08-04 14:36:14,732 INFO SenderThread:11584 [sender.py:transition_state():617] send defer: 14 +2024-08-04 14:36:14,732 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: defer +2024-08-04 14:36:14,733 DEBUG SenderThread:11584 [sender.py:send():382] send: final +2024-08-04 14:36:14,733 INFO HandlerThread:11584 [handler.py:handle_request_defer():172] handle defer: 14 +2024-08-04 14:36:14,733 DEBUG SenderThread:11584 [sender.py:send():382] send: footer +2024-08-04 14:36:14,733 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: defer +2024-08-04 14:36:14,733 INFO SenderThread:11584 [sender.py:send_request_defer():613] handle sender defer: 14 +2024-08-04 14:36:14,733 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 14:36:14,733 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 14:36:14,734 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 14:36:14,734 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: server_info +2024-08-04 14:36:14,734 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 14:36:14,734 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: server_info +2024-08-04 14:36:14,734 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: get_summary +2024-08-04 14:36:14,736 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: sampled_history +2024-08-04 14:36:14,736 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 14:36:14,736 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: job_info +2024-08-04 14:36:14,893 DEBUG SenderThread:11584 [sender.py:send_request():409] send_request: job_info +2024-08-04 14:36:14,893 INFO MainThread:11584 [wandb_run.py:_footer_history_summary_info():3866] rendering history +2024-08-04 14:36:14,894 INFO MainThread:11584 [wandb_run.py:_footer_history_summary_info():3898] rendering summary +2024-08-04 14:36:14,894 INFO MainThread:11584 [wandb_run.py:_footer_sync_info():3825] logging synced files +2024-08-04 14:36:14,894 DEBUG HandlerThread:11584 [handler.py:handle_request():146] handle_request: shutdown +2024-08-04 14:36:14,894 INFO HandlerThread:11584 [handler.py:finish():869] shutting down handler +2024-08-04 14:36:15,737 INFO WriterThread:11584 [datastore.py:close():296] close: /project/wandb/run-20240804_143607-h7fxlkpt/run-h7fxlkpt.wandb +2024-08-04 14:36:15,893 INFO SenderThread:11584 [sender.py:finish():1572] shutting down sender +2024-08-04 14:36:15,894 INFO SenderThread:11584 [file_pusher.py:finish():172] shutting down file pusher +2024-08-04 14:36:15,894 INFO SenderThread:11584 [file_pusher.py:join():178] waiting for file pusher diff --git a/wandb/run-20240804_143607-h7fxlkpt/logs/debug.log b/wandb/run-20240804_143607-h7fxlkpt/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..d91551a3c7b8d7c39bb59b76ef6b665dd0cfb390 --- /dev/null +++ b/wandb/run-20240804_143607-h7fxlkpt/logs/debug.log @@ -0,0 +1,30 @@ +2024-08-04 14:36:07,213 INFO MainThread:11513 [wandb_setup.py:_flush():76] Current SDK version is 0.16.3 +2024-08-04 14:36:07,214 INFO MainThread:11513 [wandb_setup.py:_flush():76] Configure stats pid to 11513 +2024-08-04 14:36:07,214 INFO MainThread:11513 [wandb_setup.py:_flush():76] Loading settings from /singularity_home/.config/wandb/settings +2024-08-04 14:36:07,214 INFO MainThread:11513 [wandb_setup.py:_flush():76] Loading settings from /project/wandb/settings +2024-08-04 14:36:07,214 INFO MainThread:11513 [wandb_setup.py:_flush():76] Loading settings from environment variables: {'api_key': '***REDACTED***', 'run_notes': 'Train tiny llama sample'} +2024-08-04 14:36:07,214 INFO MainThread:11513 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False} +2024-08-04 14:36:07,214 INFO MainThread:11513 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'examples/finetuning.py', 'program_abspath': '/project/examples/finetuning.py', 'program': '/project/examples/finetuning.py'} +2024-08-04 14:36:07,214 INFO MainThread:11513 [wandb_init.py:_log_setup():526] Logging user logs to /project/wandb/run-20240804_143607-h7fxlkpt/logs/debug.log +2024-08-04 14:36:07,214 INFO MainThread:11513 [wandb_init.py:_log_setup():527] Logging internal logs to /project/wandb/run-20240804_143607-h7fxlkpt/logs/debug-internal.log +2024-08-04 14:36:07,214 INFO MainThread:11513 [wandb_init.py:init():566] calling init triggers +2024-08-04 14:36:07,215 INFO MainThread:11513 [wandb_init.py:init():573] wandb.init called with sweep_config: {} +config: {'sharding_strategy': 'FULL_SHARD', 'checkpoint_type': 'LOCAL_STATE_DICT', 'fsdp_activation_checkpointing': True, 'fsdp_cpu_offload': False, 'low_cpu_fsdp': False, 'no_meta_device': False, 'data_path': None, 'split': '969, 30, 1', 'train_data_path': ['4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document'], 'valid_data_path': ['4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document'], 'test_data_path': ['4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document'], 'data_cache_path': None, 'vocab_size': None, 'vocab_file': None, 'merge_file': None, 'seq_length': 512, 'num_workers': 2, 'tokenizer_type': 'Llama2Tokenizer', 'tokenizer_model': '/share/pretrained_lm/meta-llama/TinyLlama_v1.1/tokenizer.model', 'reset_position_ids': False, 'reset_attention_mask': False, 'eod_mask_loss': False, 'retro_return_doc_ids': False, 'short_seq_prob': 0.1, 'vocab_extra_ids': 0, 'seed': 1234, 'use_mpi': False, 'wandb_entity': 'iwakawa-koichi-q5-tohoku-nlp6723', 'wandb_name': 'tiny-llama_train_2024-08-04-14:35:56', 'wandb_project': 'llm_tutorial', 'quantization': False, 'use_freeze_layers': False, 'freeze_layers': None, 'bf16': True, 'fp16': False, 'mixed_precision': True, 'param_dtype': None, 'load': '/work/llm_recipes/models/tiny-llama', 'save': '/work/llm_recipes/models/tiny-llama', 'base_model': '/share/pretrained_lm/meta-llama/TinyLlama_v1.1', 'use_better_transformer': False, 'grad_clip_norm': 1.0, 'eval_interval': 200, 'save_interval': 200, 'eval_iters': 10, 'optimizer': 'adam', 'lr': 2e-05, 'lr_decay_style': 'cosine', 'lr_decay_iters': 2000, 'lr_warmup_iters': 500, 'min_lr': 1e-06, 'train_iters': 2000, 'train_samples': None, 'global_batch_size': 320, 'micro_batch_size': 8, 'make_vocab_size_divisible_by': 128, 'sliding_window_size': 4096, 'skip_batch': None, 'no_save_optimizer_state': False, 'continual_pretraining': False, 'instruction_tuning': False, 'direct_preference_optimization': False, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'weight_decay': 0.1, 'adam_beta1': 0.9, 'adam_beta2': 0.95, 'adam_eps': 1e-06, 'hf_transformer_model_dir': None, 'instruction_train_data_path': None, 'instruction_valid_data_path': None, 'epoch': None, 'instruction_dataset_size': None, 'save_sampler_state': False, 'label_smoothing': 0.0, 'save_n_checkpoints': 10, 'hf_repo_id': 'koichi12/tiny-llama', 'create_public_hf_repo': False, 'upload_all_checkpoints_to_hf': False, 'hf_upload_retry_limit': 2, 'exit_duration_in_mins': None, 'source_key': None, 'target_key': None, 'attn_implementation': 'flash_attention_2', 'efficient_instruction_tuning': False, 'remove_padding_masking': False, 'save_start_iter': None, 'rank': 0, 'world_size': 1, 'padded_vocab_size': 32000, 'gradient_accumulation_steps': 40} +2024-08-04 14:36:07,215 INFO MainThread:11513 [wandb_init.py:init():616] starting backend +2024-08-04 14:36:07,215 INFO MainThread:11513 [wandb_init.py:init():620] setting up manager +2024-08-04 14:36:07,219 INFO MainThread:11513 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-08-04 14:36:07,220 INFO MainThread:11513 [wandb_init.py:init():628] backend started and connected +2024-08-04 14:36:07,225 INFO MainThread:11513 [wandb_init.py:init():720] updated telemetry +2024-08-04 14:36:07,236 INFO MainThread:11513 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout +2024-08-04 14:36:07,701 INFO MainThread:11513 [wandb_run.py:_on_init():2262] communicating current version +2024-08-04 14:36:07,784 INFO MainThread:11513 [wandb_run.py:_on_init():2271] got version response upgrade_message: "wandb version 0.17.5 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2024-08-04 14:36:07,784 INFO MainThread:11513 [wandb_init.py:init():804] starting run threads in backend +2024-08-04 14:36:07,841 INFO MainThread:11513 [wandb_run.py:_console_start():2241] atexit reg +2024-08-04 14:36:07,842 INFO MainThread:11513 [wandb_run.py:_redirect():2096] redirect: wrap_raw +2024-08-04 14:36:07,842 INFO MainThread:11513 [wandb_run.py:_redirect():2161] Wrapping output streams. +2024-08-04 14:36:07,842 INFO MainThread:11513 [wandb_run.py:_redirect():2186] Redirects installed. +2024-08-04 14:36:07,843 INFO MainThread:11513 [wandb_init.py:init():847] run started, returning control to user process +2024-08-04 14:36:10,261 INFO MainThread:11513 [wandb_run.py:_config_callback():1343] config_cb None None {'activation_function': 'silu', 'hidden_size': 2048, 'model_type': 'llama', 'max_position_embeddings': 2048, 'num_attention_heads': 32, 'num_hidden_layers': 22, 'model_architecture': 'LlamaForCausalLM'} +2024-08-04 14:36:10,261 INFO MainThread:11513 [wandb_run.py:_config_callback():1343] config_cb None None {'world_size': 1} +2024-08-04 14:36:15,895 WARNING MsgRouterThr:11513 [router.py:message_loop():77] message_loop has been closed diff --git a/wandb/run-20240804_143607-h7fxlkpt/run-h7fxlkpt.wandb b/wandb/run-20240804_143607-h7fxlkpt/run-h7fxlkpt.wandb new file mode 100644 index 0000000000000000000000000000000000000000..7b55ecdecb9f09c96ad47ba4e2075260deb3f28d Binary files /dev/null and b/wandb/run-20240804_143607-h7fxlkpt/run-h7fxlkpt.wandb differ diff --git a/wandb/run-20240804_221132-o8ieoj9i/files/config.yaml b/wandb/run-20240804_221132-o8ieoj9i/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8599eb92c45d6a081585ce488a41976769c14bc9 --- /dev/null +++ b/wandb/run-20240804_221132-o8ieoj9i/files/config.yaml @@ -0,0 +1,335 @@ +wandb_version: 1 + +sharding_strategy: + desc: null + value: FULL_SHARD +checkpoint_type: + desc: null + value: LOCAL_STATE_DICT +fsdp_activation_checkpointing: + desc: null + value: true +fsdp_cpu_offload: + desc: null + value: false +low_cpu_fsdp: + desc: null + value: false +no_meta_device: + desc: null + value: false +data_path: + desc: null + value: null +split: + desc: null + value: 969, 30, 1 +train_data_path: + desc: null + value: + - '235289369' + - /work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v1_ja_wiki_train_0/data_text_document +valid_data_path: + desc: null + value: + - '235289369' + - /work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v1_ja_wiki_train_0/data_text_document +test_data_path: + desc: null + value: + - '235289369' + - /work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v1_ja_wiki_train_0/data_text_document +data_cache_path: + desc: null + value: null +vocab_size: + desc: null + value: null +vocab_file: + desc: null + value: null +merge_file: + desc: null + value: null +seq_length: + desc: null + value: 4096 +num_workers: + desc: null + value: 2 +tokenizer_type: + desc: null + value: HFPreTrainedTokenizer +tokenizer_model: + desc: null + value: /share/pretrained_lm/google/gemma-2-2b +reset_position_ids: + desc: null + value: false +reset_attention_mask: + desc: null + value: false +eod_mask_loss: + desc: null + value: false +retro_return_doc_ids: + desc: null + value: false +short_seq_prob: + desc: null + value: 0.1 +vocab_extra_ids: + desc: null + value: 0 +seed: + desc: null + value: 1234 +use_mpi: + desc: null + value: false +wandb_entity: + desc: null + value: iwakawa-koichi-q5-tohoku-nlp6723 +wandb_name: + desc: null + value: yans-sample-gemma-2-2b_train_2024-08-04-22:11:21 +wandb_project: + desc: null + value: llm_tutorial +quantization: + desc: null + value: false +use_freeze_layers: + desc: null + value: false +freeze_layers: + desc: null + value: null +bf16: + desc: null + value: true +fp16: + desc: null + value: false +mixed_precision: + desc: null + value: true +param_dtype: + desc: null + value: null +load: + desc: null + value: /work/llm_recipes/models/yans-sample-gemma-2-2b +save: + desc: null + value: /work/llm_recipes/models/yans-sample-gemma-2-2b +base_model: + desc: null + value: /share/pretrained_lm/google/gemma-2-2b +use_better_transformer: + desc: null + value: false +grad_clip_norm: + desc: null + value: 1.0 +eval_interval: + desc: null + value: 200 +save_interval: + desc: null + value: 200 +eval_iters: + desc: null + value: 10 +optimizer: + desc: null + value: anyprecision +lr: + desc: null + value: 2.0e-05 +lr_decay_style: + desc: null + value: cosine +lr_decay_iters: + desc: null + value: 20000 +lr_warmup_iters: + desc: null + value: 500 +min_lr: + desc: null + value: 1.0e-06 +train_iters: + desc: null + value: 20000 +train_samples: + desc: null + value: null +global_batch_size: + desc: null + value: 320 +micro_batch_size: + desc: null + value: 2 +make_vocab_size_divisible_by: + desc: null + value: 128 +sliding_window_size: + desc: null + value: 4096 +skip_batch: + desc: null + value: null +no_save_optimizer_state: + desc: null + value: false +continual_pretraining: + desc: null + value: false +instruction_tuning: + desc: null + value: false +direct_preference_optimization: + desc: null + value: false +attention_dropout: + desc: null + value: 0.1 +hidden_dropout: + desc: null + value: 0.1 +weight_decay: + desc: null + value: 0.1 +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.95 +adam_eps: + desc: null + value: 1.0e-06 +hf_transformer_model_dir: + desc: null + value: null +instruction_train_data_path: + desc: null + value: null +instruction_valid_data_path: + desc: null + value: null +epoch: + desc: null + value: null +instruction_dataset_size: + desc: null + value: null +save_sampler_state: + desc: null + value: false +label_smoothing: + desc: null + value: 0.0 +save_n_checkpoints: + desc: null + value: 10 +hf_repo_id: + desc: null + value: koichi12/yans-sample-gemma-2-2b +create_public_hf_repo: + desc: null + value: false +upload_all_checkpoints_to_hf: + desc: null + value: false +hf_upload_retry_limit: + desc: null + value: 2 +exit_duration_in_mins: + desc: null + value: null +source_key: + desc: null + value: null +target_key: + desc: null + value: null +attn_implementation: + desc: null + value: flash_attention_2 +efficient_instruction_tuning: + desc: null + value: false +remove_padding_masking: + desc: null + value: false +save_start_iter: + desc: null + value: null +rank: + desc: null + value: 0 +world_size: + desc: null + value: 1 +padded_vocab_size: + desc: null + value: 256000 +gradient_accumulation_steps: + desc: null + value: 160 +_wandb: + desc: null + value: + python_version: 3.10.12 + cli_version: 0.16.3 + framework: huggingface + huggingface_version: 4.43.3 + is_jupyter_run: false + is_kaggle_kernel: false + start_time: 1722777092.265577 + t: + 1: + - 1 + - 11 + - 49 + - 55 + - 71 + 2: + - 1 + - 11 + - 49 + - 55 + - 71 + 3: + - 13 + - 16 + - 23 + 4: 3.10.12 + 5: 0.16.3 + 6: 4.43.3 + 8: + - 5 + 13: linux-x86_64 +activation_function: + desc: null + value: gelu_pytorch_tanh +hidden_size: + desc: null + value: 2304 +model_type: + desc: null + value: gemma2 +max_position_embeddings: + desc: null + value: 4096 +num_attention_heads: + desc: null + value: 8 +num_hidden_layers: + desc: null + value: 26 +model_architecture: + desc: null + value: Gemma2ForCausalLM diff --git a/wandb/run-20240804_221132-o8ieoj9i/files/output.log b/wandb/run-20240804_221132-o8ieoj9i/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..d016448ef4cc8170f8f007666ba866a26f563c46 --- /dev/null +++ b/wandb/run-20240804_221132-o8ieoj9i/files/output.log @@ -0,0 +1,135 @@ +Created Hugging Face repository with ID koichi12/yans-sample-gemma-2-2b. +Clearing GPU cache for all ranks +--> Running with torch torch_distributed debug set to detail +File not found: /work/llm_recipes/models/yans-sample-gemma-2-2b/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-sample-gemma-2-2b/latest_iteration.txt +File not found: /work/llm_recipes/models/yans-sample-gemma-2-2b/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-sample-gemma-2-2b/latest_iteration.txt +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + + +Loading checkpoint shards: 67%|██████▋ | 2/3 [02:29<01:15, 75.36s/it] +File not found: /work/llm_recipes/models/yans-sample-gemma-2-2b/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-sample-gemma-2-2b/latest_iteration.txt +No checkpoint found in /work/llm_recipes/models/yans-sample-gemma-2-2b, skipping model loading +--> Model /share/pretrained_lm/google/gemma-2-2b +--> /share/pretrained_lm/google/gemma-2-2b has 2614.341888 Million params +BFloat16 enabled for mixed precision - using bfSixteen policy +--> applying fsdp activation checkpointing... + > datasets target sizes (minimum size): + train: 6400000 + validation: 323200 + test: 3200 +Loading checkpoint shards: 100%|██████████| 3/3 [02:38<00:00, 52.69s/it] +/usr/local/lib/python3.10/dist-packages/torch/distributed/fsdp/_init_utils.py:441: UserWarning: FSDP is switching to use `NO_SHARD` instead of ShardingStrategy.FULL_SHARD since the world size is 1. + warnings.warn( +Let split = None +Building a BlendedDataset for a single MegatronDataset +> finished creating GPT datasets ... +File not found: /work/llm_recipes/models/yans-sample-gemma-2-2b/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-sample-gemma-2-2b/latest_iteration.txt +No checkpoint found in /work/llm_recipes/models/yans-sample-gemma-2-2b, skipping optimizer loading +File not found: /work/llm_recipes/models/yans-sample-gemma-2-2b/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-sample-gemma-2-2b/latest_iteration.txt +model info: FullyShardedDataParallel( + (_fsdp_wrapped_module): Gemma2ForCausalLM( + (model): Gemma2Model( + (embed_tokens): Embedding(256000, 2304, padding_idx=0) + (layers): ModuleList( + (0-25): 26 x FullyShardedDataParallel( + (_fsdp_wrapped_module): CheckpointWrapper( + (_checkpoint_wrapped_module): Gemma2DecoderLayer( + (self_attn): Gemma2FlashAttention2( + (q_proj): Linear(in_features=2304, out_features=2048, bias=False) + (k_proj): Linear(in_features=2304, out_features=1024, bias=False) + (v_proj): Linear(in_features=2304, out_features=1024, bias=False) + (o_proj): Linear(in_features=2048, out_features=2304, bias=False) + (rotary_emb): Gemma2RotaryEmbedding() + ) + (mlp): Gemma2MLP( + (gate_proj): Linear(in_features=2304, out_features=9216, bias=False) + (up_proj): Linear(in_features=2304, out_features=9216, bias=False) + (down_proj): Linear(in_features=9216, out_features=2304, bias=False) + (act_fn): PytorchGELUTanh() + ) + (input_layernorm): Gemma2RMSNorm() + (post_attention_layernorm): Gemma2RMSNorm() + (pre_feedforward_layernorm): Gemma2RMSNorm() + (post_feedforward_layernorm): Gemma2RMSNorm() + ) + ) + ) + ) + (norm): Gemma2RMSNorm() + ) + (lm_head): Linear(in_features=2304, out_features=256000, bias=False) + ) +) +model config: Gemma2Config { + "_name_or_path": "/share/pretrained_lm/google/gemma-2-2b", + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": 1, + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2304, + "initializer_range": 0.02, + "intermediate_size": 9216, + "label_smoothing": 0.0, + "max_position_embeddings": 4096, + "model_type": "gemma2", + "num_attention_heads": 8, + "num_hidden_layers": 26, + "num_key_value_heads": 4, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "torch_dtype": "float32", + "transformers_version": "4.43.3", + "use_cache": false, + "vocab_size": 256000 +} +Unable to save the indexes because path_to_cache is None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +It is strongly recommended to train Gemma2 models with the `eager` attention implementation instead of `flash_attention_2`. Use `eager` with `AutoModelForCausalLM.from_pretrained('', attn_implementation='eager')`. +Traceback (most recent call last): + File "/project/examples/finetuning.py", line 13, in + main() + File "/project/src/llama_recipes/finetuning.py", line 281, in main + train( + File "/project/src/llama_recipes/utils/train_utils.py", line 110, in train + loss: torch.Tensor = model(**batch).loss + File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl + return forward_call(*args, **kwargs) + File "/usr/local/lib/python3.10/dist-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 849, in forward + output = self._fsdp_wrapped_module(*args, **kwargs) + File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl + return forward_call(*args, **kwargs) + File "/project/lib/transformers/src/transformers/models/gemma2/modeling_gemma2.py", line 976, in forward + loss = loss_fct(shift_logits, shift_labels) + File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl + return forward_call(*args, **kwargs) + File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/loss.py", line 1179, in forward + return F.cross_entropy(input, target, weight=self.weight, + File "/usr/local/lib/python3.10/dist-packages/torch/nn/functional.py", line 3086, in cross_entropy + return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing) +torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 7.81 GiB. GPU 0 has a total capacity of 39.39 GiB of which 7.81 GiB is free. Including non-PyTorch memory, this process has 31.58 GiB memory in use. Of the allocated memory 30.38 GiB is allocated by PyTorch, and 385.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) \ No newline at end of file diff --git a/wandb/run-20240804_221132-o8ieoj9i/files/requirements.txt b/wandb/run-20240804_221132-o8ieoj9i/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7c90fd1c54d0a9881f6b4c6465b2a4fa88c9056c --- /dev/null +++ b/wandb/run-20240804_221132-o8ieoj9i/files/requirements.txt @@ -0,0 +1,271 @@ +absl-py==2.1.0 +accelerate==0.33.0 +aiohttp==3.9.1 +aiosignal==1.3.1 +annotated-types==0.6.0 +apex==0.1 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==23.1.0 +asttokens==2.4.1 +astunparse==1.6.3 +async-timeout==4.0.3 +attrs==23.2.0 +audioread==3.0.1 +beautifulsoup4==4.12.3 +bleach==6.1.0 +blis==0.7.11 +cachetools==5.3.2 +catalogue==2.0.10 +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +cloudpathlib==0.16.0 +cloudpickle==3.0.0 +cmake==3.28.1 +colorama==0.4.6 +comm==0.2.1 +confection==0.1.4 +contourpy==1.2.0 +cubinlinker==0.3.0+2.g405ac64 +cuda-python==12.3.0rc4+9.gdb8c48a.dirty +cudf==23.12.0 +cugraph-dgl==23.12.0 +cugraph-service-client==23.12.0 +cugraph-service-server==23.12.0 +cugraph==23.12.0 +cuml==23.12.0 +cupy-cuda12x==12.3.0 +cycler==0.12.1 +cymem==2.0.8 +cython==3.0.8 +dask-cuda==23.12.0 +dask-cudf==23.12.0 +dask==2023.11.0 +debugpy==1.8.1 +decorator==5.1.1 +defusedxml==0.7.1 +distributed==2023.11.0 +dm-tree==0.1.8 +docker-pycreds==0.4.0 +einops==0.7.0 +exceptiongroup==1.2.0 +execnet==2.0.2 +executing==2.0.1 +expecttest==0.1.3 +fastjsonschema==2.19.1 +fastrlock==0.8.2 +filelock==3.13.1 +flash-attn==2.4.2 +fonttools==4.48.1 +frozenlist==1.4.1 +fsspec==2023.12.2 +gast==0.5.4 +gitdb==4.0.11 +gitpython==3.1.43 +google-auth-oauthlib==0.4.6 +google-auth==2.27.0 +graphsurgeon==0.4.6 +grpcio==1.60.1 +huggingface-hub==0.24.5 +hypothesis==5.35.1 +idna==3.6 +importlib-metadata==7.0.1 +iniconfig==2.0.0 +intel-openmp==2021.4.0 +ipadic==1.0.0 +ipykernel==6.29.2 +ipython-genutils==0.2.0 +ipython==8.21.0 +jedi==0.19.1 +jinja2==3.1.3 +joblib==1.3.2 +json5==0.9.14 +jsonnet==0.19.1 +jsonschema-specifications==2023.12.1 +jsonschema==4.21.1 +jupyter-client==8.6.0 +jupyter-core==5.7.1 +jupyter-tensorboard==0.2.0 +jupyterlab-pygments==0.3.0 +jupyterlab-server==1.2.0 +jupyterlab==2.3.2 +jupytext==1.16.1 +kiwisolver==1.4.5 +langcodes==3.3.0 +lazy-loader==0.3 +librosa==0.10.1 +llvmlite==0.40.1 +locket==1.0.0 +logzero==1.7.0 +lxml==5.2.2 +markdown-it-py==3.0.0 +markdown==3.5.2 +markupsafe==2.1.4 +matplotlib-inline==0.1.6 +matplotlib==3.8.2 +mdit-py-plugins==0.4.0 +mdurl==0.1.2 +mecab-python3==1.0.6 +mistune==3.0.2 +mkl-devel==2021.1.1 +mkl-include==2021.1.1 +mkl==2021.1.1 +mock==5.1.0 +more-itertools==9.1.0 +mpmath==1.3.0 +msgpack==1.0.7 +multidict==6.0.4 +murmurhash==1.0.10 +nbclient==0.9.0 +nbconvert==7.16.0 +nbformat==5.9.2 +nest-asyncio==1.6.0 +networkx==2.6.3 +ninja==1.11.1.1 +nltk==3.8.1 +notebook==6.4.10 +numba==0.57.1+1.g1ff679645 +numpy==1.24.4 +nvfuser==0.1.4a0+d0bb811 +nvidia-dali-cuda120==1.34.0 +nvidia-pyindex==1.0.9 +nvtx==0.2.5 +oauthlib==3.2.2 +onnx==1.15.0rc2 +opencv==4.7.0 +optree==0.10.0 +packaging==23.2 +pandas==1.5.3 +pandocfilters==1.5.1 +parso==0.8.3 +partd==1.4.1 +peft==0.11.1 +pexpect==4.9.0 +pillow==10.2.0 +pip==24.0 +platformdirs==4.2.0 +pluggy==1.4.0 +ply==3.11 +polygraphy==0.49.4 +pooch==1.8.0 +portalocker==2.10.1 +preshed==3.0.9 +prettytable==3.9.0 +prometheus-client==0.19.0 +prompt-toolkit==3.0.43 +protobuf==4.24.4 +psutil==5.9.4 +ptxcompiler==0.8.1+2.g0d406d6 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==14.0.1.dev0+gba5374836.d20240125 +pyasn1-modules==0.3.0 +pyasn1==0.5.1 +pybind11-global==2.11.1 +pybind11==2.11.1 +pycocotools==2.0+nv0.8.0 +pycparser==2.21 +pydantic-core==2.16.2 +pydantic==2.6.1 +pygments==2.17.2 +pylibcugraph==23.12.0 +pylibcugraphops==23.12.0 +pylibraft==23.12.0 +pynvml==11.4.1 +pyparsing==3.1.1 +pytest-flakefinder==1.1.0 +pytest-rerunfailures==13.0 +pytest-shard==0.1.2 +pytest-xdist==3.5.0 +pytest==8.0.0 +python-dateutil==2.8.2 +python-dotenv==1.0.0 +python-hostlist==1.23.0 +pytorch-quantization==2.1.2 +pytz==2023.3.post1 +pyyaml==6.0.1 +pyzmq==25.1.2 +raft-dask==23.12.0 +rapids-dask-dependency==23.12.1 +referencing==0.33.0 +regex==2023.12.25 +requests-oauthlib==1.3.1 +requests==2.31.0 +rich==13.7.0 +rmm==23.12.0 +rpds-py==0.17.1 +rsa==4.9 +sacrebleu==2.4.0 +safetensors==0.4.3 +scikit-learn==1.2.0 +scipy==1.12.0 +send2trash==1.8.2 +sentencepiece==0.1.99 +sentry-sdk==2.12.0 +setproctitle==1.3.3 +setuptools==68.2.2 +six==1.16.0 +smart-open==6.4.0 +smmap==5.0.1 +sortedcontainers==2.4.0 +soundfile==0.12.1 +soupsieve==2.5 +soxr==0.3.7 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 +spacy==3.7.2 +sphinx-glpi-theme==0.6 +srsly==2.4.8 +stack-data==0.6.3 +sympy==1.12 +tabulate==0.9.0 +tbb==2021.11.0 +tblib==3.0.0 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.9.0 +tensorrt==8.6.3 +terminado==0.18.0 +termplotlib==0.3.9 +thinc==8.2.3 +threadpoolctl==3.2.0 +thriftpy2==0.4.17 +tinycss2==1.2.1 +tokenizers==0.19.1 +toml==0.10.2 +tomli==2.0.1 +toolz==0.12.1 +torch-tensorrt==2.3.0a0 +torch==2.3.0a0+ebedce2 +torchdata==0.7.1a0 +torchtext==0.17.0a0 +torchvision==0.18.0a0 +tornado==6.4 +tqdm==4.66.1 +traitlets==5.9.0 +transformer-engine==1.3.0+5b90b7f +transformers==4.43.3 +treelite-runtime==3.9.1 +treelite==3.9.1 +triton==2.2.0+e28a256 +typer==0.9.0 +types-dataclasses==0.6.6 +typing-extensions==4.9.0 +ucx-py==0.35.0 +uff==0.6.9 +ujson==5.8.0 +urllib3==1.26.18 +wandb==0.16.3 +wasabi==1.1.2 +wcwidth==0.2.13 +weasel==0.3.4 +webencodings==0.5.1 +werkzeug==3.0.1 +wheel==0.42.0 +xdoctest==1.0.2 +xgboost==1.7.6 +yarl==1.9.4 +zict==3.0.0 +zipp==3.17.0 \ No newline at end of file diff --git a/wandb/run-20240804_221132-o8ieoj9i/files/wandb-metadata.json b/wandb/run-20240804_221132-o8ieoj9i/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..1b9c989557b14e85f4162c195efed7f3c3348d92 --- /dev/null +++ b/wandb/run-20240804_221132-o8ieoj9i/files/wandb-metadata.json @@ -0,0 +1,215 @@ +{ + "os": "Linux-5.15.0-91-generic-x86_64-with-glibc2.35", + "python": "3.10.12", + "heartbeatAt": "2024-08-04T13:11:32.902217", + "startedAt": "2024-08-04T13:11:32.253120", + "docker": null, + "cuda": null, + "args": [ + "--seq-length", + "4096", + "--sliding-window-size", + "4096", + "--micro-batch-size", + "2", + "--global-batch-size", + "320", + "--train-iters", + "20000", + "--tokenizer-type", + "HFPreTrainedTokenizer", + "--tokenizer-model", + "/share/pretrained_lm/google/gemma-2-2b", + "--train-data-path", + "235289369", + "/work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v1_ja_wiki_train_0/data_text_document", + "--valid-data-path", + "235289369", + "/work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v1_ja_wiki_train_0/data_text_document", + "--test-data-path", + "235289369", + "/work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v1_ja_wiki_train_0/data_text_document", + "--lr", + "2e-5", + "--min-lr", + "1e-6", + "--lr-decay-style", + "cosine", + "--lr-warmup-iters", + "500", + "--lr-decay-iters", + "20000", + "--weight-decay", + "0.1", + "--grad-clip-norm", + "1.0", + "--optimizer", + "anyprecision", + "--adam-beta1", + "0.9", + "--adam-beta2", + "0.95", + "--adam-eps", + "1e-6", + "--save-interval", + "200", + "--eval-interval", + "200", + "--eval-iters", + "10", + "--bf16", + "--mixed-precision", + "--base-model", + "/share/pretrained_lm/google/gemma-2-2b", + "--save", + "/work/llm_recipes/models/yans-sample-gemma-2-2b", + "--load", + "/work/llm_recipes/models/yans-sample-gemma-2-2b", + "--fsdp-activation-checkpointing", + "--sharding-strategy", + "FULL_SHARD", + "--checkpoint-type", + "LOCAL_STATE_DICT", + "--save-n-checkpoints", + "10", + "--hf-upload-retry-limit", + "2", + "--hf-repo-id", + "koichi12/yans-sample-gemma-2-2b", + "--wandb-entity", + "iwakawa-koichi-q5-tohoku-nlp6723", + "--wandb-project", + "llm_tutorial", + "--wandb-name", + "yans-sample-gemma-2-2b_train_2024-08-04-22:11:21" + ], + "state": "running", + "program": "/project/examples/finetuning.py", + "codePathLocal": "examples/finetuning.py", + "codePath": "examples/finetuning.py", + "git": { + "remote": "https://github.com/cl-tohoku/llm-recipes-failab-m1-yans.git", + "commit": "0336bd6c20fe25d78eda1d14afa66c1ae2e6d687" + }, + "email": null, + "root": "/project", + "host": "gpu-koiwa-00", + "username": "koiwa", + "executable": "/usr/bin/python", + "cpu_count": 18, + "cpu_count_logical": 18, + "cpu_freq": { + "current": 2400.044999999999, + "min": 0.0, + "max": 0.0 + }, + "cpu_freq_per_core": [ + { + "current": 2400.045, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.045, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.045, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.045, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.045, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.045, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.045, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.045, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.045, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.045, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.045, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.045, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.045, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.045, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.045, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.045, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.045, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.045, + "min": 0.0, + "max": 0.0 + } + ], + "disk": { + "/": { + "total": 0.0625, + "used": 1.1444091796875e-05 + } + }, + "gpu": "NVIDIA A100-SXM4-40GB", + "gpu_count": 1, + "gpu_devices": [ + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + } + ], + "memory": { + "total": 56.48782730102539 + } +} diff --git a/wandb/run-20240804_221132-o8ieoj9i/files/wandb-summary.json b/wandb/run-20240804_221132-o8ieoj9i/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..5618cc108bc65b21dc8a97695d74886db918ac58 --- /dev/null +++ b/wandb/run-20240804_221132-o8ieoj9i/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 166}} \ No newline at end of file diff --git a/wandb/run-20240804_221132-o8ieoj9i/logs/debug-internal.log b/wandb/run-20240804_221132-o8ieoj9i/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..d51f9e9b7cf4935d2db1490050052858dac08074 --- /dev/null +++ b/wandb/run-20240804_221132-o8ieoj9i/logs/debug-internal.log @@ -0,0 +1,263 @@ +2024-08-04 22:11:32,267 INFO StreamThr :12237 [internal.py:wandb_internal():86] W&B internal server running at pid: 12237, started at: 2024-08-04 22:11:32.266168 +2024-08-04 22:11:32,268 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status +2024-08-04 22:11:32,270 INFO WriterThread:12237 [datastore.py:open_for_write():87] open: /project/wandb/run-20240804_221132-o8ieoj9i/run-o8ieoj9i.wandb +2024-08-04 22:11:32,271 DEBUG SenderThread:12237 [sender.py:send():382] send: header +2024-08-04 22:11:32,285 DEBUG SenderThread:12237 [sender.py:send():382] send: run +2024-08-04 22:11:32,779 INFO SenderThread:12237 [dir_watcher.py:__init__():211] watching files in: /project/wandb/run-20240804_221132-o8ieoj9i/files +2024-08-04 22:11:32,779 INFO SenderThread:12237 [sender.py:_start_run_threads():1136] run started: o8ieoj9i with start time 1722777092.265577 +2024-08-04 22:11:32,784 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: check_version +2024-08-04 22:11:32,784 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: check_version +2024-08-04 22:11:32,884 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: run_start +2024-08-04 22:11:32,890 DEBUG HandlerThread:12237 [system_info.py:__init__():27] System info init +2024-08-04 22:11:32,890 DEBUG HandlerThread:12237 [system_info.py:__init__():42] System info init done +2024-08-04 22:11:32,890 INFO HandlerThread:12237 [system_monitor.py:start():194] Starting system monitor +2024-08-04 22:11:32,890 INFO SystemMonitor:12237 [system_monitor.py:_start():158] Starting system asset monitoring threads +2024-08-04 22:11:32,890 INFO HandlerThread:12237 [system_monitor.py:probe():214] Collecting system info +2024-08-04 22:11:32,891 INFO SystemMonitor:12237 [interfaces.py:start():190] Started cpu monitoring +2024-08-04 22:11:32,891 INFO SystemMonitor:12237 [interfaces.py:start():190] Started disk monitoring +2024-08-04 22:11:32,892 INFO SystemMonitor:12237 [interfaces.py:start():190] Started gpu monitoring +2024-08-04 22:11:32,893 INFO SystemMonitor:12237 [interfaces.py:start():190] Started memory monitoring +2024-08-04 22:11:32,893 INFO SystemMonitor:12237 [interfaces.py:start():190] Started network monitoring +2024-08-04 22:11:32,902 DEBUG HandlerThread:12237 [system_info.py:probe():151] Probing system +2024-08-04 22:11:32,904 DEBUG HandlerThread:12237 [system_info.py:_probe_git():136] Probing git +2024-08-04 22:11:32,916 DEBUG HandlerThread:12237 [system_info.py:_probe_git():144] Probing git done +2024-08-04 22:11:32,916 DEBUG HandlerThread:12237 [system_info.py:probe():199] Probing system done +2024-08-04 22:11:32,916 DEBUG HandlerThread:12237 [system_monitor.py:probe():223] {'os': 'Linux-5.15.0-91-generic-x86_64-with-glibc2.35', 'python': '3.10.12', 'heartbeatAt': '2024-08-04T13:11:32.902217', 'startedAt': '2024-08-04T13:11:32.253120', 'docker': None, 'cuda': None, 'args': ('--seq-length', '4096', '--sliding-window-size', '4096', '--micro-batch-size', '2', '--global-batch-size', '320', '--train-iters', '20000', '--tokenizer-type', 'HFPreTrainedTokenizer', '--tokenizer-model', '/share/pretrained_lm/google/gemma-2-2b', '--train-data-path', '235289369', '/work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v1_ja_wiki_train_0/data_text_document', '--valid-data-path', '235289369', '/work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v1_ja_wiki_train_0/data_text_document', '--test-data-path', '235289369', '/work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v1_ja_wiki_train_0/data_text_document', '--lr', '2e-5', '--min-lr', '1e-6', '--lr-decay-style', 'cosine', '--lr-warmup-iters', '500', '--lr-decay-iters', '20000', '--weight-decay', '0.1', '--grad-clip-norm', '1.0', '--optimizer', 'anyprecision', '--adam-beta1', '0.9', '--adam-beta2', '0.95', '--adam-eps', '1e-6', '--save-interval', '200', '--eval-interval', '200', '--eval-iters', '10', '--bf16', '--mixed-precision', '--base-model', '/share/pretrained_lm/google/gemma-2-2b', '--save', '/work/llm_recipes/models/yans-sample-gemma-2-2b', '--load', '/work/llm_recipes/models/yans-sample-gemma-2-2b', '--fsdp-activation-checkpointing', '--sharding-strategy', 'FULL_SHARD', '--checkpoint-type', 'LOCAL_STATE_DICT', '--save-n-checkpoints', '10', '--hf-upload-retry-limit', '2', '--hf-repo-id', 'koichi12/yans-sample-gemma-2-2b', '--wandb-entity', 'iwakawa-koichi-q5-tohoku-nlp6723', '--wandb-project', 'llm_tutorial', '--wandb-name', 'yans-sample-gemma-2-2b_train_2024-08-04-22:11:21'), 'state': 'running', 'program': '/project/examples/finetuning.py', 'codePathLocal': 'examples/finetuning.py', 'codePath': 'examples/finetuning.py', 'git': {'remote': 'https://github.com/cl-tohoku/llm-recipes-failab-m1-yans.git', 'commit': '0336bd6c20fe25d78eda1d14afa66c1ae2e6d687'}, 'email': None, 'root': '/project', 'host': 'gpu-koiwa-00', 'username': 'koiwa', 'executable': '/usr/bin/python', 'cpu_count': 18, 'cpu_count_logical': 18, 'cpu_freq': {'current': 2400.044999999999, 'min': 0.0, 'max': 0.0}, 'cpu_freq_per_core': [{'current': 2400.045, 'min': 0.0, 'max': 0.0}, {'current': 2400.045, 'min': 0.0, 'max': 0.0}, {'current': 2400.045, 'min': 0.0, 'max': 0.0}, {'current': 2400.045, 'min': 0.0, 'max': 0.0}, {'current': 2400.045, 'min': 0.0, 'max': 0.0}, {'current': 2400.045, 'min': 0.0, 'max': 0.0}, {'current': 2400.045, 'min': 0.0, 'max': 0.0}, {'current': 2400.045, 'min': 0.0, 'max': 0.0}, {'current': 2400.045, 'min': 0.0, 'max': 0.0}, {'current': 2400.045, 'min': 0.0, 'max': 0.0}, {'current': 2400.045, 'min': 0.0, 'max': 0.0}, {'current': 2400.045, 'min': 0.0, 'max': 0.0}, {'current': 2400.045, 'min': 0.0, 'max': 0.0}, {'current': 2400.045, 'min': 0.0, 'max': 0.0}, {'current': 2400.045, 'min': 0.0, 'max': 0.0}, {'current': 2400.045, 'min': 0.0, 'max': 0.0}, {'current': 2400.045, 'min': 0.0, 'max': 0.0}, {'current': 2400.045, 'min': 0.0, 'max': 0.0}], 'disk': {'/': {'total': 0.0625, 'used': 1.1444091796875e-05}}, 'gpu': 'NVIDIA A100-SXM4-40GB', 'gpu_count': 1, 'gpu_devices': [{'name': 'NVIDIA A100-SXM4-40GB', 'memory_total': 42949672960}], 'memory': {'total': 56.48782730102539}} +2024-08-04 22:11:32,916 INFO HandlerThread:12237 [system_monitor.py:probe():224] Finished collecting system info +2024-08-04 22:11:32,916 INFO HandlerThread:12237 [system_monitor.py:probe():227] Publishing system info +2024-08-04 22:11:32,917 INFO HandlerThread:12237 [system_monitor.py:probe():229] Finished publishing system info +2024-08-04 22:11:32,923 DEBUG SenderThread:12237 [sender.py:send():382] send: files +2024-08-04 22:11:32,923 INFO SenderThread:12237 [sender.py:_save_file():1403] saving file wandb-metadata.json with policy now +2024-08-04 22:11:32,932 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: python_packages +2024-08-04 22:11:32,933 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: stop_status +2024-08-04 22:11:32,933 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 22:11:32,933 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: python_packages +2024-08-04 22:11:32,935 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: stop_status +2024-08-04 22:11:33,202 DEBUG SenderThread:12237 [sender.py:send():382] send: telemetry +2024-08-04 22:11:33,617 INFO wandb-upload_0:12237 [upload_job.py:push():131] Uploaded file /tmp/tmpntsoky67wandb/ybme98wl-wandb-metadata.json +2024-08-04 22:11:33,780 INFO Thread-12 :12237 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240804_221132-o8ieoj9i/files/requirements.txt +2024-08-04 22:11:33,781 INFO Thread-12 :12237 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240804_221132-o8ieoj9i/files/wandb-metadata.json +2024-08-04 22:11:33,781 INFO Thread-12 :12237 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240804_221132-o8ieoj9i/files/output.log +2024-08-04 22:11:35,781 INFO Thread-12 :12237 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_221132-o8ieoj9i/files/output.log +2024-08-04 22:11:37,800 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:11:42,801 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:11:47,802 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:11:47,932 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: stop_status +2024-08-04 22:11:47,932 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 22:11:47,933 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: stop_status +2024-08-04 22:11:53,184 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:11:58,184 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:12:02,932 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: stop_status +2024-08-04 22:12:02,932 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: stop_status +2024-08-04 22:12:02,972 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 22:12:04,128 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:12:04,797 INFO Thread-12 :12237 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_221132-o8ieoj9i/files/config.yaml +2024-08-04 22:12:09,335 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:12:14,336 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:12:17,932 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: stop_status +2024-08-04 22:12:17,932 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: stop_status +2024-08-04 22:12:17,972 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 22:12:20,198 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:12:25,199 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:12:30,199 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:12:32,894 DEBUG SystemMonitor:12237 [system_monitor.py:_start():172] Starting system metrics aggregation loop +2024-08-04 22:12:32,895 DEBUG SenderThread:12237 [sender.py:send():382] send: stats +2024-08-04 22:12:32,932 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: stop_status +2024-08-04 22:12:32,932 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: stop_status +2024-08-04 22:12:32,972 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 22:12:36,110 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:12:41,111 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:12:45,820 INFO Thread-12 :12237 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_221132-o8ieoj9i/files/output.log +2024-08-04 22:12:46,558 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:12:47,932 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: stop_status +2024-08-04 22:12:47,933 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 22:12:47,933 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: stop_status +2024-08-04 22:12:52,156 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:12:57,157 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:13:02,157 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:13:02,897 DEBUG SenderThread:12237 [sender.py:send():382] send: stats +2024-08-04 22:13:02,932 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: stop_status +2024-08-04 22:13:02,932 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: stop_status +2024-08-04 22:13:02,972 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 22:13:08,124 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:13:13,125 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:13:17,932 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: stop_status +2024-08-04 22:13:17,933 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: stop_status +2024-08-04 22:13:17,976 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 22:13:18,132 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:13:23,133 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:13:28,134 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:13:32,898 DEBUG SenderThread:12237 [sender.py:send():382] send: stats +2024-08-04 22:13:32,932 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: stop_status +2024-08-04 22:13:32,933 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: stop_status +2024-08-04 22:13:32,976 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 22:13:33,205 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:13:38,206 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:13:43,207 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:13:47,932 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: stop_status +2024-08-04 22:13:47,933 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: stop_status +2024-08-04 22:13:47,976 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 22:13:49,120 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:13:54,121 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:13:59,122 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:14:02,898 DEBUG SenderThread:12237 [sender.py:send():382] send: stats +2024-08-04 22:14:02,932 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: stop_status +2024-08-04 22:14:02,933 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: stop_status +2024-08-04 22:14:02,976 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 22:14:04,197 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:14:04,864 INFO Thread-12 :12237 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_221132-o8ieoj9i/files/output.log +2024-08-04 22:14:09,198 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:14:13,453 DEBUG SenderThread:12237 [sender.py:send():382] send: config +2024-08-04 22:14:13,453 DEBUG SenderThread:12237 [sender.py:send():382] send: config +2024-08-04 22:14:13,869 INFO Thread-12 :12237 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_221132-o8ieoj9i/files/output.log +2024-08-04 22:14:14,550 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:14:15,870 INFO Thread-12 :12237 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_221132-o8ieoj9i/files/output.log +2024-08-04 22:14:17,933 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: stop_status +2024-08-04 22:14:17,934 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 22:14:17,934 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: stop_status +2024-08-04 22:14:19,437 DEBUG SenderThread:12237 [sender.py:send():382] send: exit +2024-08-04 22:14:19,437 INFO SenderThread:12237 [sender.py:send_exit():589] handling exit code: 1 +2024-08-04 22:14:19,437 INFO SenderThread:12237 [sender.py:send_exit():591] handling runtime: 166 +2024-08-04 22:14:19,438 INFO SenderThread:12237 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-04 22:14:19,439 INFO SenderThread:12237 [sender.py:send_exit():597] send defer +2024-08-04 22:14:19,439 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: defer +2024-08-04 22:14:19,439 INFO HandlerThread:12237 [handler.py:handle_request_defer():172] handle defer: 0 +2024-08-04 22:14:19,439 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: defer +2024-08-04 22:14:19,439 INFO SenderThread:12237 [sender.py:send_request_defer():613] handle sender defer: 0 +2024-08-04 22:14:19,439 INFO SenderThread:12237 [sender.py:transition_state():617] send defer: 1 +2024-08-04 22:14:19,439 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: defer +2024-08-04 22:14:19,439 INFO HandlerThread:12237 [handler.py:handle_request_defer():172] handle defer: 1 +2024-08-04 22:14:19,439 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: defer +2024-08-04 22:14:19,439 INFO SenderThread:12237 [sender.py:send_request_defer():613] handle sender defer: 1 +2024-08-04 22:14:19,439 INFO SenderThread:12237 [sender.py:transition_state():617] send defer: 2 +2024-08-04 22:14:19,439 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: defer +2024-08-04 22:14:19,440 INFO HandlerThread:12237 [handler.py:handle_request_defer():172] handle defer: 2 +2024-08-04 22:14:19,440 INFO HandlerThread:12237 [system_monitor.py:finish():203] Stopping system monitor +2024-08-04 22:14:19,440 DEBUG SystemMonitor:12237 [system_monitor.py:_start():179] Finished system metrics aggregation loop +2024-08-04 22:14:19,440 DEBUG SystemMonitor:12237 [system_monitor.py:_start():183] Publishing last batch of metrics +2024-08-04 22:14:19,440 INFO HandlerThread:12237 [interfaces.py:finish():202] Joined cpu monitor +2024-08-04 22:14:19,441 INFO HandlerThread:12237 [interfaces.py:finish():202] Joined disk monitor +2024-08-04 22:14:19,474 INFO HandlerThread:12237 [interfaces.py:finish():202] Joined gpu monitor +2024-08-04 22:14:19,474 INFO HandlerThread:12237 [interfaces.py:finish():202] Joined memory monitor +2024-08-04 22:14:19,474 INFO HandlerThread:12237 [interfaces.py:finish():202] Joined network monitor +2024-08-04 22:14:19,475 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: defer +2024-08-04 22:14:19,475 INFO SenderThread:12237 [sender.py:send_request_defer():613] handle sender defer: 2 +2024-08-04 22:14:19,475 INFO SenderThread:12237 [sender.py:transition_state():617] send defer: 3 +2024-08-04 22:14:19,475 DEBUG SenderThread:12237 [sender.py:send():382] send: stats +2024-08-04 22:14:19,475 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: defer +2024-08-04 22:14:19,475 INFO HandlerThread:12237 [handler.py:handle_request_defer():172] handle defer: 3 +2024-08-04 22:14:19,475 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: defer +2024-08-04 22:14:19,475 INFO SenderThread:12237 [sender.py:send_request_defer():613] handle sender defer: 3 +2024-08-04 22:14:19,475 INFO SenderThread:12237 [sender.py:transition_state():617] send defer: 4 +2024-08-04 22:14:19,475 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: defer +2024-08-04 22:14:19,475 INFO HandlerThread:12237 [handler.py:handle_request_defer():172] handle defer: 4 +2024-08-04 22:14:19,476 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: defer +2024-08-04 22:14:19,476 INFO SenderThread:12237 [sender.py:send_request_defer():613] handle sender defer: 4 +2024-08-04 22:14:19,476 INFO SenderThread:12237 [sender.py:transition_state():617] send defer: 5 +2024-08-04 22:14:19,476 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: defer +2024-08-04 22:14:19,476 INFO HandlerThread:12237 [handler.py:handle_request_defer():172] handle defer: 5 +2024-08-04 22:14:19,476 DEBUG SenderThread:12237 [sender.py:send():382] send: summary +2024-08-04 22:14:19,477 INFO SenderThread:12237 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-04 22:14:19,477 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: defer +2024-08-04 22:14:19,477 INFO SenderThread:12237 [sender.py:send_request_defer():613] handle sender defer: 5 +2024-08-04 22:14:19,477 INFO SenderThread:12237 [sender.py:transition_state():617] send defer: 6 +2024-08-04 22:14:19,477 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: defer +2024-08-04 22:14:19,477 INFO HandlerThread:12237 [handler.py:handle_request_defer():172] handle defer: 6 +2024-08-04 22:14:19,477 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: defer +2024-08-04 22:14:19,477 INFO SenderThread:12237 [sender.py:send_request_defer():613] handle sender defer: 6 +2024-08-04 22:14:19,480 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:14:19,712 INFO SenderThread:12237 [sender.py:transition_state():617] send defer: 7 +2024-08-04 22:14:19,712 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: defer +2024-08-04 22:14:19,712 INFO HandlerThread:12237 [handler.py:handle_request_defer():172] handle defer: 7 +2024-08-04 22:14:19,712 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: defer +2024-08-04 22:14:19,712 INFO SenderThread:12237 [sender.py:send_request_defer():613] handle sender defer: 7 +2024-08-04 22:14:19,873 INFO Thread-12 :12237 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_221132-o8ieoj9i/files/config.yaml +2024-08-04 22:14:19,874 INFO Thread-12 :12237 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240804_221132-o8ieoj9i/files/wandb-summary.json +2024-08-04 22:14:20,437 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 22:14:20,874 INFO Thread-12 :12237 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_221132-o8ieoj9i/files/output.log +2024-08-04 22:14:21,905 INFO SenderThread:12237 [sender.py:transition_state():617] send defer: 8 +2024-08-04 22:14:21,905 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 22:14:21,905 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: defer +2024-08-04 22:14:21,906 INFO HandlerThread:12237 [handler.py:handle_request_defer():172] handle defer: 8 +2024-08-04 22:14:21,906 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: defer +2024-08-04 22:14:21,906 INFO SenderThread:12237 [sender.py:send_request_defer():613] handle sender defer: 8 +2024-08-04 22:14:21,906 INFO SenderThread:12237 [job_builder.py:build():296] Attempting to build job artifact +2024-08-04 22:14:21,907 INFO SenderThread:12237 [job_builder.py:_get_source_type():426] is repo sourced job +2024-08-04 22:14:21,921 INFO SenderThread:12237 [job_builder.py:build():402] adding wandb-job metadata file +2024-08-04 22:14:21,929 INFO SenderThread:12237 [sender.py:transition_state():617] send defer: 9 +2024-08-04 22:14:21,929 DEBUG SenderThread:12237 [sender.py:send():382] send: artifact +2024-08-04 22:14:21,929 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: defer +2024-08-04 22:14:21,931 INFO HandlerThread:12237 [handler.py:handle_request_defer():172] handle defer: 9 +2024-08-04 22:14:22,437 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 22:14:22,875 INFO Thread-12 :12237 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_221132-o8ieoj9i/files/output.log +2024-08-04 22:14:23,127 INFO wandb-upload_0:12237 [upload_job.py:push():86] Skipped uploading /singularity_home/.local/share/wandb/artifacts/staging/tmpaydno9il +2024-08-04 22:14:23,543 INFO wandb-upload_1:12237 [upload_job.py:push():89] Uploaded file /singularity_home/.local/share/wandb/artifacts/staging/tmpaetcwljm +2024-08-04 22:14:24,702 INFO SenderThread:12237 [sender.py:send_artifact():1494] sent artifact job-https___github.com_cl-tohoku_llm-recipes-failab-m1-yans.git_examples_finetuning.py - {'id': 'QXJ0aWZhY3Q6MTA5ODUzNDkwNw==', 'state': 'PENDING', 'artifactSequence': {'id': 'QXJ0aWZhY3RDb2xsZWN0aW9uOjM2MjY3MjMzNA==', 'latestArtifact': {'id': 'QXJ0aWZhY3Q6MTA5MzUzODM4NQ==', 'versionIndex': 3}}} +2024-08-04 22:14:24,702 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: defer +2024-08-04 22:14:24,702 INFO SenderThread:12237 [sender.py:send_request_defer():613] handle sender defer: 9 +2024-08-04 22:14:24,702 INFO SenderThread:12237 [dir_watcher.py:finish():358] shutting down directory watcher +2024-08-04 22:14:24,876 INFO SenderThread:12237 [dir_watcher.py:finish():388] scan: /project/wandb/run-20240804_221132-o8ieoj9i/files +2024-08-04 22:14:24,876 INFO SenderThread:12237 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_221132-o8ieoj9i/files/requirements.txt requirements.txt +2024-08-04 22:14:24,876 INFO SenderThread:12237 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_221132-o8ieoj9i/files/config.yaml config.yaml +2024-08-04 22:14:24,878 INFO SenderThread:12237 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_221132-o8ieoj9i/files/wandb-metadata.json wandb-metadata.json +2024-08-04 22:14:24,878 INFO SenderThread:12237 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_221132-o8ieoj9i/files/wandb-summary.json wandb-summary.json +2024-08-04 22:14:24,879 INFO SenderThread:12237 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_221132-o8ieoj9i/files/output.log output.log +2024-08-04 22:14:24,881 INFO SenderThread:12237 [sender.py:transition_state():617] send defer: 10 +2024-08-04 22:14:24,881 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 22:14:24,881 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: defer +2024-08-04 22:14:24,882 INFO HandlerThread:12237 [handler.py:handle_request_defer():172] handle defer: 10 +2024-08-04 22:14:24,882 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 22:14:24,883 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: defer +2024-08-04 22:14:24,883 INFO SenderThread:12237 [sender.py:send_request_defer():613] handle sender defer: 10 +2024-08-04 22:14:24,883 INFO SenderThread:12237 [file_pusher.py:finish():172] shutting down file pusher +2024-08-04 22:14:25,282 INFO wandb-upload_0:12237 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240804_221132-o8ieoj9i/files/requirements.txt +2024-08-04 22:14:25,375 INFO wandb-upload_1:12237 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240804_221132-o8ieoj9i/files/config.yaml +2024-08-04 22:14:25,438 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 22:14:25,438 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 22:14:25,461 INFO wandb-upload_2:12237 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240804_221132-o8ieoj9i/files/wandb-summary.json +2024-08-04 22:14:25,480 INFO wandb-upload_3:12237 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240804_221132-o8ieoj9i/files/output.log +2024-08-04 22:14:25,680 INFO Thread-11 (_thread_body):12237 [sender.py:transition_state():617] send defer: 11 +2024-08-04 22:14:25,681 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: defer +2024-08-04 22:14:25,681 INFO HandlerThread:12237 [handler.py:handle_request_defer():172] handle defer: 11 +2024-08-04 22:14:25,681 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: defer +2024-08-04 22:14:25,681 INFO SenderThread:12237 [sender.py:send_request_defer():613] handle sender defer: 11 +2024-08-04 22:14:25,681 INFO SenderThread:12237 [file_pusher.py:join():178] waiting for file pusher +2024-08-04 22:14:25,681 INFO SenderThread:12237 [sender.py:transition_state():617] send defer: 12 +2024-08-04 22:14:25,681 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: defer +2024-08-04 22:14:25,681 INFO HandlerThread:12237 [handler.py:handle_request_defer():172] handle defer: 12 +2024-08-04 22:14:25,681 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: defer +2024-08-04 22:14:25,681 INFO SenderThread:12237 [sender.py:send_request_defer():613] handle sender defer: 12 +2024-08-04 22:14:25,681 INFO SenderThread:12237 [file_stream.py:finish():595] file stream finish called +2024-08-04 22:14:25,848 INFO SenderThread:12237 [file_stream.py:finish():599] file stream finish is done +2024-08-04 22:14:25,848 INFO SenderThread:12237 [sender.py:transition_state():617] send defer: 13 +2024-08-04 22:14:25,849 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: defer +2024-08-04 22:14:25,849 INFO HandlerThread:12237 [handler.py:handle_request_defer():172] handle defer: 13 +2024-08-04 22:14:25,849 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: defer +2024-08-04 22:14:25,849 INFO SenderThread:12237 [sender.py:send_request_defer():613] handle sender defer: 13 +2024-08-04 22:14:25,849 INFO SenderThread:12237 [sender.py:transition_state():617] send defer: 14 +2024-08-04 22:14:25,849 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: defer +2024-08-04 22:14:25,849 DEBUG SenderThread:12237 [sender.py:send():382] send: final +2024-08-04 22:14:25,849 INFO HandlerThread:12237 [handler.py:handle_request_defer():172] handle defer: 14 +2024-08-04 22:14:25,849 DEBUG SenderThread:12237 [sender.py:send():382] send: footer +2024-08-04 22:14:25,850 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: defer +2024-08-04 22:14:25,850 INFO SenderThread:12237 [sender.py:send_request_defer():613] handle sender defer: 14 +2024-08-04 22:14:25,850 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 22:14:25,850 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 22:14:25,850 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 22:14:25,851 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 22:14:25,851 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: server_info +2024-08-04 22:14:25,851 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: get_summary +2024-08-04 22:14:25,851 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: sampled_history +2024-08-04 22:14:25,852 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: server_info +2024-08-04 22:14:25,852 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 22:14:25,853 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: job_info +2024-08-04 22:14:26,030 DEBUG SenderThread:12237 [sender.py:send_request():409] send_request: job_info +2024-08-04 22:14:26,030 INFO MainThread:12237 [wandb_run.py:_footer_history_summary_info():3866] rendering history +2024-08-04 22:14:26,030 INFO MainThread:12237 [wandb_run.py:_footer_history_summary_info():3898] rendering summary +2024-08-04 22:14:26,030 INFO MainThread:12237 [wandb_run.py:_footer_sync_info():3825] logging synced files +2024-08-04 22:14:26,031 DEBUG HandlerThread:12237 [handler.py:handle_request():146] handle_request: shutdown +2024-08-04 22:14:26,031 INFO HandlerThread:12237 [handler.py:finish():869] shutting down handler +2024-08-04 22:14:26,853 INFO WriterThread:12237 [datastore.py:close():296] close: /project/wandb/run-20240804_221132-o8ieoj9i/run-o8ieoj9i.wandb +2024-08-04 22:14:27,030 INFO SenderThread:12237 [sender.py:finish():1572] shutting down sender +2024-08-04 22:14:27,030 INFO SenderThread:12237 [file_pusher.py:finish():172] shutting down file pusher +2024-08-04 22:14:27,030 INFO SenderThread:12237 [file_pusher.py:join():178] waiting for file pusher diff --git a/wandb/run-20240804_221132-o8ieoj9i/logs/debug.log b/wandb/run-20240804_221132-o8ieoj9i/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..c1393a52039e4046520813c7f62b2ee456172d66 --- /dev/null +++ b/wandb/run-20240804_221132-o8ieoj9i/logs/debug.log @@ -0,0 +1,30 @@ +2024-08-04 22:11:32,259 INFO MainThread:12166 [wandb_setup.py:_flush():76] Current SDK version is 0.16.3 +2024-08-04 22:11:32,259 INFO MainThread:12166 [wandb_setup.py:_flush():76] Configure stats pid to 12166 +2024-08-04 22:11:32,259 INFO MainThread:12166 [wandb_setup.py:_flush():76] Loading settings from /singularity_home/.config/wandb/settings +2024-08-04 22:11:32,259 INFO MainThread:12166 [wandb_setup.py:_flush():76] Loading settings from /project/wandb/settings +2024-08-04 22:11:32,259 INFO MainThread:12166 [wandb_setup.py:_flush():76] Loading settings from environment variables: {'api_key': '***REDACTED***', 'run_notes': 'Train sample'} +2024-08-04 22:11:32,259 INFO MainThread:12166 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False} +2024-08-04 22:11:32,259 INFO MainThread:12166 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'examples/finetuning.py', 'program_abspath': '/project/examples/finetuning.py', 'program': '/project/examples/finetuning.py'} +2024-08-04 22:11:32,259 INFO MainThread:12166 [wandb_init.py:_log_setup():526] Logging user logs to /project/wandb/run-20240804_221132-o8ieoj9i/logs/debug.log +2024-08-04 22:11:32,259 INFO MainThread:12166 [wandb_init.py:_log_setup():527] Logging internal logs to /project/wandb/run-20240804_221132-o8ieoj9i/logs/debug-internal.log +2024-08-04 22:11:32,259 INFO MainThread:12166 [wandb_init.py:init():566] calling init triggers +2024-08-04 22:11:32,259 INFO MainThread:12166 [wandb_init.py:init():573] wandb.init called with sweep_config: {} +config: {'sharding_strategy': 'FULL_SHARD', 'checkpoint_type': 'LOCAL_STATE_DICT', 'fsdp_activation_checkpointing': True, 'fsdp_cpu_offload': False, 'low_cpu_fsdp': False, 'no_meta_device': False, 'data_path': None, 'split': '969, 30, 1', 'train_data_path': ['235289369', '/work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v1_ja_wiki_train_0/data_text_document'], 'valid_data_path': ['235289369', '/work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v1_ja_wiki_train_0/data_text_document'], 'test_data_path': ['235289369', '/work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v1_ja_wiki_train_0/data_text_document'], 'data_cache_path': None, 'vocab_size': None, 'vocab_file': None, 'merge_file': None, 'seq_length': 4096, 'num_workers': 2, 'tokenizer_type': 'HFPreTrainedTokenizer', 'tokenizer_model': '/share/pretrained_lm/google/gemma-2-2b', 'reset_position_ids': False, 'reset_attention_mask': False, 'eod_mask_loss': False, 'retro_return_doc_ids': False, 'short_seq_prob': 0.1, 'vocab_extra_ids': 0, 'seed': 1234, 'use_mpi': False, 'wandb_entity': 'iwakawa-koichi-q5-tohoku-nlp6723', 'wandb_name': 'yans-sample-gemma-2-2b_train_2024-08-04-22:11:21', 'wandb_project': 'llm_tutorial', 'quantization': False, 'use_freeze_layers': False, 'freeze_layers': None, 'bf16': True, 'fp16': False, 'mixed_precision': True, 'param_dtype': None, 'load': '/work/llm_recipes/models/yans-sample-gemma-2-2b', 'save': '/work/llm_recipes/models/yans-sample-gemma-2-2b', 'base_model': '/share/pretrained_lm/google/gemma-2-2b', 'use_better_transformer': False, 'grad_clip_norm': 1.0, 'eval_interval': 200, 'save_interval': 200, 'eval_iters': 10, 'optimizer': 'anyprecision', 'lr': 2e-05, 'lr_decay_style': 'cosine', 'lr_decay_iters': 20000, 'lr_warmup_iters': 500, 'min_lr': 1e-06, 'train_iters': 20000, 'train_samples': None, 'global_batch_size': 320, 'micro_batch_size': 2, 'make_vocab_size_divisible_by': 128, 'sliding_window_size': 4096, 'skip_batch': None, 'no_save_optimizer_state': False, 'continual_pretraining': False, 'instruction_tuning': False, 'direct_preference_optimization': False, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'weight_decay': 0.1, 'adam_beta1': 0.9, 'adam_beta2': 0.95, 'adam_eps': 1e-06, 'hf_transformer_model_dir': None, 'instruction_train_data_path': None, 'instruction_valid_data_path': None, 'epoch': None, 'instruction_dataset_size': None, 'save_sampler_state': False, 'label_smoothing': 0.0, 'save_n_checkpoints': 10, 'hf_repo_id': 'koichi12/yans-sample-gemma-2-2b', 'create_public_hf_repo': False, 'upload_all_checkpoints_to_hf': False, 'hf_upload_retry_limit': 2, 'exit_duration_in_mins': None, 'source_key': None, 'target_key': None, 'attn_implementation': 'flash_attention_2', 'efficient_instruction_tuning': False, 'remove_padding_masking': False, 'save_start_iter': None, 'rank': 0, 'world_size': 1, 'padded_vocab_size': 256000, 'gradient_accumulation_steps': 160} +2024-08-04 22:11:32,260 INFO MainThread:12166 [wandb_init.py:init():616] starting backend +2024-08-04 22:11:32,260 INFO MainThread:12166 [wandb_init.py:init():620] setting up manager +2024-08-04 22:11:32,264 INFO MainThread:12166 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-08-04 22:11:32,265 INFO MainThread:12166 [wandb_init.py:init():628] backend started and connected +2024-08-04 22:11:32,270 INFO MainThread:12166 [wandb_init.py:init():720] updated telemetry +2024-08-04 22:11:32,281 INFO MainThread:12166 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout +2024-08-04 22:11:32,783 INFO MainThread:12166 [wandb_run.py:_on_init():2262] communicating current version +2024-08-04 22:11:32,877 INFO MainThread:12166 [wandb_run.py:_on_init():2271] got version response upgrade_message: "wandb version 0.17.5 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2024-08-04 22:11:32,877 INFO MainThread:12166 [wandb_init.py:init():804] starting run threads in backend +2024-08-04 22:11:32,932 INFO MainThread:12166 [wandb_run.py:_console_start():2241] atexit reg +2024-08-04 22:11:32,932 INFO MainThread:12166 [wandb_run.py:_redirect():2096] redirect: wrap_raw +2024-08-04 22:11:32,932 INFO MainThread:12166 [wandb_run.py:_redirect():2161] Wrapping output streams. +2024-08-04 22:11:32,932 INFO MainThread:12166 [wandb_run.py:_redirect():2186] Redirects installed. +2024-08-04 22:11:32,933 INFO MainThread:12166 [wandb_init.py:init():847] run started, returning control to user process +2024-08-04 22:14:13,452 INFO MainThread:12166 [wandb_run.py:_config_callback():1343] config_cb None None {'activation_function': 'gelu_pytorch_tanh', 'hidden_size': 2304, 'model_type': 'gemma2', 'max_position_embeddings': 4096, 'num_attention_heads': 8, 'num_hidden_layers': 26, 'model_architecture': 'Gemma2ForCausalLM'} +2024-08-04 22:14:13,453 INFO MainThread:12166 [wandb_run.py:_config_callback():1343] config_cb None None {'world_size': 1} +2024-08-04 22:14:27,031 WARNING MsgRouterThr:12166 [router.py:message_loop():77] message_loop has been closed diff --git a/wandb/run-20240804_221132-o8ieoj9i/run-o8ieoj9i.wandb b/wandb/run-20240804_221132-o8ieoj9i/run-o8ieoj9i.wandb new file mode 100644 index 0000000000000000000000000000000000000000..ab574c9c332f8fd96a97707ecfff13a47702ca12 Binary files /dev/null and b/wandb/run-20240804_221132-o8ieoj9i/run-o8ieoj9i.wandb differ diff --git a/wandb/run-20240812_052853-n84i0o06/files/config.yaml b/wandb/run-20240812_052853-n84i0o06/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eb21c43a3ee401eb82790fc81f451f6e52eb3f69 --- /dev/null +++ b/wandb/run-20240812_052853-n84i0o06/files/config.yaml @@ -0,0 +1,335 @@ +wandb_version: 1 + +sharding_strategy: + desc: null + value: FULL_SHARD +checkpoint_type: + desc: null + value: LOCAL_STATE_DICT +fsdp_activation_checkpointing: + desc: null + value: true +fsdp_cpu_offload: + desc: null + value: false +low_cpu_fsdp: + desc: null + value: false +no_meta_device: + desc: null + value: false +data_path: + desc: null + value: null +split: + desc: null + value: 969, 30, 1 +train_data_path: + desc: null + value: + - '304771887' + - /work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v2_ja_wiki_train_0/data_text_document +valid_data_path: + desc: null + value: + - '304771887' + - /work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v2_ja_wiki_train_0/data_text_document +test_data_path: + desc: null + value: + - '304771887' + - /work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v2_ja_wiki_train_0/data_text_document +data_cache_path: + desc: null + value: null +vocab_size: + desc: null + value: null +vocab_file: + desc: null + value: null +merge_file: + desc: null + value: null +seq_length: + desc: null + value: 4096 +num_workers: + desc: null + value: 2 +tokenizer_type: + desc: null + value: HFPreTrainedTokenizer +tokenizer_model: + desc: null + value: /share/pretrained_lm/Qwen/Qwen2-0.5B +reset_position_ids: + desc: null + value: false +reset_attention_mask: + desc: null + value: false +eod_mask_loss: + desc: null + value: false +retro_return_doc_ids: + desc: null + value: false +short_seq_prob: + desc: null + value: 0.1 +vocab_extra_ids: + desc: null + value: 0 +seed: + desc: null + value: 1234 +use_mpi: + desc: null + value: false +wandb_entity: + desc: null + value: iwakawa-koichi-q5-tohoku-nlp6723 +wandb_name: + desc: null + value: yans-qwen2-0.5B_train_2024-08-12-05:28:42 +wandb_project: + desc: null + value: llm_tutorial +quantization: + desc: null + value: false +use_freeze_layers: + desc: null + value: false +freeze_layers: + desc: null + value: null +bf16: + desc: null + value: true +fp16: + desc: null + value: false +mixed_precision: + desc: null + value: true +param_dtype: + desc: null + value: null +load: + desc: null + value: /work/llm_recipes/models/yans-qwen2-0.5B +save: + desc: null + value: /work/llm_recipes/models/yans-qwen2-0.5B +base_model: + desc: null + value: /share/pretrained_lm/Qwen/Qwen2-0.5B +use_better_transformer: + desc: null + value: false +grad_clip_norm: + desc: null + value: 1.0 +eval_interval: + desc: null + value: 200 +save_interval: + desc: null + value: 5 +eval_iters: + desc: null + value: 10 +optimizer: + desc: null + value: adam +lr: + desc: null + value: 2.0e-05 +lr_decay_style: + desc: null + value: cosine +lr_decay_iters: + desc: null + value: 20000 +lr_warmup_iters: + desc: null + value: 500 +min_lr: + desc: null + value: 1.0e-06 +train_iters: + desc: null + value: 20000 +train_samples: + desc: null + value: null +global_batch_size: + desc: null + value: 320 +micro_batch_size: + desc: null + value: 1 +make_vocab_size_divisible_by: + desc: null + value: 128 +sliding_window_size: + desc: null + value: 4096 +skip_batch: + desc: null + value: null +no_save_optimizer_state: + desc: null + value: false +continual_pretraining: + desc: null + value: false +instruction_tuning: + desc: null + value: false +direct_preference_optimization: + desc: null + value: false +attention_dropout: + desc: null + value: 0.1 +hidden_dropout: + desc: null + value: 0.1 +weight_decay: + desc: null + value: 0.1 +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.95 +adam_eps: + desc: null + value: 1.0e-06 +hf_transformer_model_dir: + desc: null + value: null +instruction_train_data_path: + desc: null + value: null +instruction_valid_data_path: + desc: null + value: null +epoch: + desc: null + value: null +instruction_dataset_size: + desc: null + value: null +save_sampler_state: + desc: null + value: false +label_smoothing: + desc: null + value: 0.0 +save_n_checkpoints: + desc: null + value: 10 +hf_repo_id: + desc: null + value: koichi12/yans-qwen2-0.5B +create_public_hf_repo: + desc: null + value: false +upload_all_checkpoints_to_hf: + desc: null + value: false +hf_upload_retry_limit: + desc: null + value: 2 +exit_duration_in_mins: + desc: null + value: null +source_key: + desc: null + value: null +target_key: + desc: null + value: null +attn_implementation: + desc: null + value: flash_attention_2 +efficient_instruction_tuning: + desc: null + value: false +remove_padding_masking: + desc: null + value: false +save_start_iter: + desc: null + value: null +rank: + desc: null + value: 0 +world_size: + desc: null + value: 1 +padded_vocab_size: + desc: null + value: 151680 +gradient_accumulation_steps: + desc: null + value: 320 +_wandb: + desc: null + value: + python_version: 3.10.12 + cli_version: 0.16.3 + framework: huggingface + huggingface_version: 4.43.3 + is_jupyter_run: false + is_kaggle_kernel: false + start_time: 1723408133.524123 + t: + 1: + - 1 + - 11 + - 49 + - 55 + - 71 + 2: + - 1 + - 11 + - 49 + - 55 + - 71 + 3: + - 13 + - 16 + - 23 + 4: 3.10.12 + 5: 0.16.3 + 6: 4.43.3 + 8: + - 5 + 13: linux-x86_64 +model_architecture: + desc: null + value: Qwen2ForCausalLM +activation_function: + desc: null + value: silu +hidden_size: + desc: null + value: 896 +model_type: + desc: null + value: qwen2 +max_position_embeddings: + desc: null + value: 4096 +num_attention_heads: + desc: null + value: 14 +num_hidden_layers: + desc: null + value: 24 diff --git a/wandb/run-20240812_052853-n84i0o06/files/output.log b/wandb/run-20240812_052853-n84i0o06/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..1eb312736ead17ac5dcbf95599133c8152c89c2d --- /dev/null +++ b/wandb/run-20240812_052853-n84i0o06/files/output.log @@ -0,0 +1,139 @@ +Created Hugging Face repository with ID koichi12/yans-qwen2-0.5B. +Clearing GPU cache for all ranks +--> Running with torch torch_distributed debug set to detail +File not found: /work/llm_recipes/models/yans-qwen2-0.5B/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-qwen2-0.5B/latest_iteration.txt +File not found: /work/llm_recipes/models/yans-qwen2-0.5B/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-qwen2-0.5B/latest_iteration.txt +File not found: /work/llm_recipes/models/yans-qwen2-0.5B/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-qwen2-0.5B/latest_iteration.txt +No checkpoint found in /work/llm_recipes/models/yans-qwen2-0.5B, skipping model loading +--> Model /share/pretrained_lm/Qwen/Qwen2-0.5B +--> /share/pretrained_lm/Qwen/Qwen2-0.5B has 494.032768 Million params +BFloat16 enabled for mixed precision - using bfSixteen policy +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +/usr/local/lib/python3.10/dist-packages/torch/distributed/fsdp/_init_utils.py:441: UserWarning: FSDP is switching to use `NO_SHARD` instead of ShardingStrategy.FULL_SHARD since the world size is 1. + warnings.warn( +--> applying fsdp activation checkpointing... + > datasets target sizes (minimum size): + train: 6400000 + validation: 323200 + test: 3200 +> building train, validation, and test datasets for GPT ... +Let split = None +> finished creating GPT datasets ... +File not found: /work/llm_recipes/models/yans-qwen2-0.5B/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-qwen2-0.5B/latest_iteration.txt +No checkpoint found in /work/llm_recipes/models/yans-qwen2-0.5B, skipping optimizer loading +File not found: /work/llm_recipes/models/yans-qwen2-0.5B/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-qwen2-0.5B/latest_iteration.txt +model info: FullyShardedDataParallel( + (_fsdp_wrapped_module): Qwen2ForCausalLM( + (model): Qwen2Model( + (embed_tokens): Embedding(151936, 896) + (layers): ModuleList( + (0-23): 24 x FullyShardedDataParallel( + (_fsdp_wrapped_module): CheckpointWrapper( + (_checkpoint_wrapped_module): Qwen2DecoderLayer( + (self_attn): Qwen2FlashAttention2( + (q_proj): Linear(in_features=896, out_features=896, bias=True) + (k_proj): Linear(in_features=896, out_features=128, bias=True) + (v_proj): Linear(in_features=896, out_features=128, bias=True) + (o_proj): Linear(in_features=896, out_features=896, bias=False) + (rotary_emb): Qwen2RotaryEmbedding() + ) + (mlp): Qwen2MLP( + (gate_proj): Linear(in_features=896, out_features=4864, bias=False) + (up_proj): Linear(in_features=896, out_features=4864, bias=False) + (down_proj): Linear(in_features=4864, out_features=896, bias=False) + (act_fn): SiLU() + ) + (input_layernorm): Qwen2RMSNorm() + (post_attention_layernorm): Qwen2RMSNorm() + ) + ) + ) + ) + (norm): Qwen2RMSNorm() + ) + (lm_head): Linear(in_features=896, out_features=151936, bias=False) + ) +) +model config: Qwen2Config { + "_name_or_path": "/share/pretrained_lm/Qwen/Qwen2-0.5B", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "label_smoothing": 0.0, + "max_position_embeddings": 4096, + "max_window_layers": 24, + "model_type": "qwen2", + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.43.3", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +------------------------------------------------------------------ +iteration: 1 , TFLOPS: 67.05501421617748, Tokens per sec: 16676.24515769431, Loss: 4.1814446449279785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 2 , TFLOPS: 70.71126656778048, Tokens per sec: 17585.5367488818, Loss: 4.19144344329834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 3 , TFLOPS: 70.545913767934, Tokens per sec: 17544.41433827636, Loss: 4.197675704956055 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 4 , TFLOPS: 70.68479486678217, Tokens per sec: 17578.953369834773, Loss: 4.183629989624023 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 5 , TFLOPS: 70.61673302016509, Tokens per sec: 17562.0267305172, Loss: 4.198177337646484 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-qwen2-0.5B/iter_0000005 +Saving model state dict to /work/llm_recipes/models/yans-qwen2-0.5B/iter_0000005/model.pt +/usr/local/lib/python3.10/dist-packages/torch/distributed/fsdp/_state_dict_utils.py:773: UserWarning: When using ``NO_SHARD`` for ``ShardingStrategy``, full_state_dict willbe returned. + warnings.warn( +/usr/local/lib/python3.10/dist-packages/torch/distributed/fsdp/_state_dict_utils.py:716: UserWarning: When using ``NO_SHARD`` for ``ShardingStrategy``, full_state_dict willbe returned. + warnings.warn( +Saved model state dict to /work/llm_recipes/models/yans-qwen2-0.5B/iter_0000005/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-qwen2-0.5B/iter_0000005/optimizer.pt +[rank0]:[2024-08-12 05:35:23,399] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.00647389400000975, 'preprocessing_with_comm': 0.0007460029999037943, 'state_converting': 0.9694889820000299, : 0.9780955020000874}) +Saved optimizer state dict to /work/llm_recipes/models/yans-qwen2-0.5B/iter_0000005/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-qwen2-0.5B/iter_0000005/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-qwen2-0.5B/iter_0000005/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-qwen2-0.5B/iter_0000005/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-qwen2-0.5B/iter_0000005/rng.pt +Traceback (most recent call last): + File "/project/examples/finetuning.py", line 13, in + main() + File "/project/src/llama_recipes/finetuning.py", line 281, in main + train( + File "/project/src/llama_recipes/utils/train_utils.py", line 175, in train + save_checkpoint( + File "/project/src/llama_recipes/utils/checkpoint.py", line 168, in save_checkpoint + tokenizer.tokenizer.save_pretrained(tokenizer_path) + File "/project/lib/transformers/src/transformers/tokenization_utils_base.py", line 2622, in save_pretrained + if os.path.isfile(save_directory): + File "/usr/lib/python3.10/genericpath.py", line 30, in isfile + st = os.stat(path) +TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType \ No newline at end of file diff --git a/wandb/run-20240812_052853-n84i0o06/files/requirements.txt b/wandb/run-20240812_052853-n84i0o06/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7c90fd1c54d0a9881f6b4c6465b2a4fa88c9056c --- /dev/null +++ b/wandb/run-20240812_052853-n84i0o06/files/requirements.txt @@ -0,0 +1,271 @@ +absl-py==2.1.0 +accelerate==0.33.0 +aiohttp==3.9.1 +aiosignal==1.3.1 +annotated-types==0.6.0 +apex==0.1 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==23.1.0 +asttokens==2.4.1 +astunparse==1.6.3 +async-timeout==4.0.3 +attrs==23.2.0 +audioread==3.0.1 +beautifulsoup4==4.12.3 +bleach==6.1.0 +blis==0.7.11 +cachetools==5.3.2 +catalogue==2.0.10 +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +cloudpathlib==0.16.0 +cloudpickle==3.0.0 +cmake==3.28.1 +colorama==0.4.6 +comm==0.2.1 +confection==0.1.4 +contourpy==1.2.0 +cubinlinker==0.3.0+2.g405ac64 +cuda-python==12.3.0rc4+9.gdb8c48a.dirty +cudf==23.12.0 +cugraph-dgl==23.12.0 +cugraph-service-client==23.12.0 +cugraph-service-server==23.12.0 +cugraph==23.12.0 +cuml==23.12.0 +cupy-cuda12x==12.3.0 +cycler==0.12.1 +cymem==2.0.8 +cython==3.0.8 +dask-cuda==23.12.0 +dask-cudf==23.12.0 +dask==2023.11.0 +debugpy==1.8.1 +decorator==5.1.1 +defusedxml==0.7.1 +distributed==2023.11.0 +dm-tree==0.1.8 +docker-pycreds==0.4.0 +einops==0.7.0 +exceptiongroup==1.2.0 +execnet==2.0.2 +executing==2.0.1 +expecttest==0.1.3 +fastjsonschema==2.19.1 +fastrlock==0.8.2 +filelock==3.13.1 +flash-attn==2.4.2 +fonttools==4.48.1 +frozenlist==1.4.1 +fsspec==2023.12.2 +gast==0.5.4 +gitdb==4.0.11 +gitpython==3.1.43 +google-auth-oauthlib==0.4.6 +google-auth==2.27.0 +graphsurgeon==0.4.6 +grpcio==1.60.1 +huggingface-hub==0.24.5 +hypothesis==5.35.1 +idna==3.6 +importlib-metadata==7.0.1 +iniconfig==2.0.0 +intel-openmp==2021.4.0 +ipadic==1.0.0 +ipykernel==6.29.2 +ipython-genutils==0.2.0 +ipython==8.21.0 +jedi==0.19.1 +jinja2==3.1.3 +joblib==1.3.2 +json5==0.9.14 +jsonnet==0.19.1 +jsonschema-specifications==2023.12.1 +jsonschema==4.21.1 +jupyter-client==8.6.0 +jupyter-core==5.7.1 +jupyter-tensorboard==0.2.0 +jupyterlab-pygments==0.3.0 +jupyterlab-server==1.2.0 +jupyterlab==2.3.2 +jupytext==1.16.1 +kiwisolver==1.4.5 +langcodes==3.3.0 +lazy-loader==0.3 +librosa==0.10.1 +llvmlite==0.40.1 +locket==1.0.0 +logzero==1.7.0 +lxml==5.2.2 +markdown-it-py==3.0.0 +markdown==3.5.2 +markupsafe==2.1.4 +matplotlib-inline==0.1.6 +matplotlib==3.8.2 +mdit-py-plugins==0.4.0 +mdurl==0.1.2 +mecab-python3==1.0.6 +mistune==3.0.2 +mkl-devel==2021.1.1 +mkl-include==2021.1.1 +mkl==2021.1.1 +mock==5.1.0 +more-itertools==9.1.0 +mpmath==1.3.0 +msgpack==1.0.7 +multidict==6.0.4 +murmurhash==1.0.10 +nbclient==0.9.0 +nbconvert==7.16.0 +nbformat==5.9.2 +nest-asyncio==1.6.0 +networkx==2.6.3 +ninja==1.11.1.1 +nltk==3.8.1 +notebook==6.4.10 +numba==0.57.1+1.g1ff679645 +numpy==1.24.4 +nvfuser==0.1.4a0+d0bb811 +nvidia-dali-cuda120==1.34.0 +nvidia-pyindex==1.0.9 +nvtx==0.2.5 +oauthlib==3.2.2 +onnx==1.15.0rc2 +opencv==4.7.0 +optree==0.10.0 +packaging==23.2 +pandas==1.5.3 +pandocfilters==1.5.1 +parso==0.8.3 +partd==1.4.1 +peft==0.11.1 +pexpect==4.9.0 +pillow==10.2.0 +pip==24.0 +platformdirs==4.2.0 +pluggy==1.4.0 +ply==3.11 +polygraphy==0.49.4 +pooch==1.8.0 +portalocker==2.10.1 +preshed==3.0.9 +prettytable==3.9.0 +prometheus-client==0.19.0 +prompt-toolkit==3.0.43 +protobuf==4.24.4 +psutil==5.9.4 +ptxcompiler==0.8.1+2.g0d406d6 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==14.0.1.dev0+gba5374836.d20240125 +pyasn1-modules==0.3.0 +pyasn1==0.5.1 +pybind11-global==2.11.1 +pybind11==2.11.1 +pycocotools==2.0+nv0.8.0 +pycparser==2.21 +pydantic-core==2.16.2 +pydantic==2.6.1 +pygments==2.17.2 +pylibcugraph==23.12.0 +pylibcugraphops==23.12.0 +pylibraft==23.12.0 +pynvml==11.4.1 +pyparsing==3.1.1 +pytest-flakefinder==1.1.0 +pytest-rerunfailures==13.0 +pytest-shard==0.1.2 +pytest-xdist==3.5.0 +pytest==8.0.0 +python-dateutil==2.8.2 +python-dotenv==1.0.0 +python-hostlist==1.23.0 +pytorch-quantization==2.1.2 +pytz==2023.3.post1 +pyyaml==6.0.1 +pyzmq==25.1.2 +raft-dask==23.12.0 +rapids-dask-dependency==23.12.1 +referencing==0.33.0 +regex==2023.12.25 +requests-oauthlib==1.3.1 +requests==2.31.0 +rich==13.7.0 +rmm==23.12.0 +rpds-py==0.17.1 +rsa==4.9 +sacrebleu==2.4.0 +safetensors==0.4.3 +scikit-learn==1.2.0 +scipy==1.12.0 +send2trash==1.8.2 +sentencepiece==0.1.99 +sentry-sdk==2.12.0 +setproctitle==1.3.3 +setuptools==68.2.2 +six==1.16.0 +smart-open==6.4.0 +smmap==5.0.1 +sortedcontainers==2.4.0 +soundfile==0.12.1 +soupsieve==2.5 +soxr==0.3.7 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 +spacy==3.7.2 +sphinx-glpi-theme==0.6 +srsly==2.4.8 +stack-data==0.6.3 +sympy==1.12 +tabulate==0.9.0 +tbb==2021.11.0 +tblib==3.0.0 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.9.0 +tensorrt==8.6.3 +terminado==0.18.0 +termplotlib==0.3.9 +thinc==8.2.3 +threadpoolctl==3.2.0 +thriftpy2==0.4.17 +tinycss2==1.2.1 +tokenizers==0.19.1 +toml==0.10.2 +tomli==2.0.1 +toolz==0.12.1 +torch-tensorrt==2.3.0a0 +torch==2.3.0a0+ebedce2 +torchdata==0.7.1a0 +torchtext==0.17.0a0 +torchvision==0.18.0a0 +tornado==6.4 +tqdm==4.66.1 +traitlets==5.9.0 +transformer-engine==1.3.0+5b90b7f +transformers==4.43.3 +treelite-runtime==3.9.1 +treelite==3.9.1 +triton==2.2.0+e28a256 +typer==0.9.0 +types-dataclasses==0.6.6 +typing-extensions==4.9.0 +ucx-py==0.35.0 +uff==0.6.9 +ujson==5.8.0 +urllib3==1.26.18 +wandb==0.16.3 +wasabi==1.1.2 +wcwidth==0.2.13 +weasel==0.3.4 +webencodings==0.5.1 +werkzeug==3.0.1 +wheel==0.42.0 +xdoctest==1.0.2 +xgboost==1.7.6 +yarl==1.9.4 +zict==3.0.0 +zipp==3.17.0 \ No newline at end of file diff --git a/wandb/run-20240812_052853-n84i0o06/files/wandb-metadata.json b/wandb/run-20240812_052853-n84i0o06/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..bb6176c60378cd772096b8310c95f8e4a0c74e1a --- /dev/null +++ b/wandb/run-20240812_052853-n84i0o06/files/wandb-metadata.json @@ -0,0 +1,215 @@ +{ + "os": "Linux-5.15.0-91-generic-x86_64-with-glibc2.35", + "python": "3.10.12", + "heartbeatAt": "2024-08-11T20:28:54.148690", + "startedAt": "2024-08-11T20:28:53.511276", + "docker": null, + "cuda": null, + "args": [ + "--seq-length", + "4096", + "--sliding-window-size", + "4096", + "--micro-batch-size", + "1", + "--global-batch-size", + "320", + "--train-iters", + "20000", + "--tokenizer-type", + "HFPreTrainedTokenizer", + "--tokenizer-model", + "/share/pretrained_lm/Qwen/Qwen2-0.5B", + "--train-data-path", + "304771887", + "/work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v2_ja_wiki_train_0/data_text_document", + "--valid-data-path", + "304771887", + "/work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v2_ja_wiki_train_0/data_text_document", + "--test-data-path", + "304771887", + "/work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v2_ja_wiki_train_0/data_text_document", + "--lr", + "2e-5", + "--min-lr", + "1e-6", + "--lr-decay-style", + "cosine", + "--lr-warmup-iters", + "500", + "--lr-decay-iters", + "20000", + "--weight-decay", + "0.1", + "--grad-clip-norm", + "1.0", + "--optimizer", + "adam", + "--adam-beta1", + "0.9", + "--adam-beta2", + "0.95", + "--adam-eps", + "1e-6", + "--save-interval", + "5", + "--eval-interval", + "200", + "--eval-iters", + "10", + "--bf16", + "--mixed-precision", + "--base-model", + "/share/pretrained_lm/Qwen/Qwen2-0.5B", + "--save", + "/work/llm_recipes/models/yans-qwen2-0.5B", + "--load", + "/work/llm_recipes/models/yans-qwen2-0.5B", + "--fsdp-activation-checkpointing", + "--sharding-strategy", + "FULL_SHARD", + "--checkpoint-type", + "LOCAL_STATE_DICT", + "--save-n-checkpoints", + "10", + "--hf-upload-retry-limit", + "2", + "--hf-repo-id", + "koichi12/yans-qwen2-0.5B", + "--wandb-entity", + "iwakawa-koichi-q5-tohoku-nlp6723", + "--wandb-project", + "llm_tutorial", + "--wandb-name", + "yans-qwen2-0.5B_train_2024-08-12-05:28:42" + ], + "state": "running", + "program": "/project/examples/finetuning.py", + "codePathLocal": "examples/finetuning.py", + "codePath": "examples/finetuning.py", + "git": { + "remote": "https://github.com/cl-tohoku/llm-recipes-failab-m1-yans.git", + "commit": "6da01327e78c302bc0cfdb335f3ca297e2a19c8c" + }, + "email": null, + "root": "/project", + "host": "gpu-koiwa-00", + "username": "koiwa", + "executable": "/usr/bin/python", + "cpu_count": 18, + "cpu_count_logical": 18, + "cpu_freq": { + "current": 2400.0429999999997, + "min": 0.0, + "max": 0.0 + }, + "cpu_freq_per_core": [ + { + "current": 2400.043, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.043, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.043, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.043, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.043, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.043, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.043, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.043, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.043, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.043, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.043, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.043, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.043, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.043, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.043, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.043, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.043, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.043, + "min": 0.0, + "max": 0.0 + } + ], + "disk": { + "/": { + "total": 0.0625, + "used": 1.1444091796875e-05 + } + }, + "gpu": "NVIDIA A100-SXM4-40GB", + "gpu_count": 1, + "gpu_devices": [ + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + } + ], + "memory": { + "total": 56.487823486328125 + } +} diff --git a/wandb/run-20240812_052853-n84i0o06/files/wandb-summary.json b/wandb/run-20240812_052853-n84i0o06/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..0d91c5560ad7357dcfcf4db741188e58f590029e --- /dev/null +++ b/wandb/run-20240812_052853-n84i0o06/files/wandb-summary.json @@ -0,0 +1 @@ +{"training/loss": 4.198177337646484, "training/perplexity": 66.56489507784042, "utils/batch_size": 1, "utils/global_batch_size": 320, "utils/seq_len": 4097, "utils/gradient_accumulation_steps": 320, "utils/iteration": 5, "optimizer/lr": 1.19e-06, "optimizer/variance_l2": 0.00650817005037245, "optimizer/variance_sqrt_l2": 0.4753125323283669, "optimizer/momentum_l2": 0.4059003829432183, "optimizer/weight_l2": 825.0639369164065, "optimizer/variance_l1": 0.22650909423828125, "optimizer/variance_sqrt_l1": 1979.75, "optimizer/momentum_l1": 1591.375, "optimizer/weight_l1": 6886400.0, "optimizer/variance_abs_max": 0.004669189453125, "optimizer/variance_sqrt_abs_max": 0.068359375, "optimizer/momentum_abs_max": 0.058837890625, "optimizer/weight_abs_max": 175.0, "stats/1_iteration_time": 74.65197611400004, "stats/tokens_per_sec": 17562.0267305172, "stats/tokens_per_sec_per_gpu": 17562.0267305172, "stats/tflops": 70.61673302016509, "_timestamp": 1723408520.9273944, "_runtime": 387.4032714366913, "_step": 5, "_wandb": {"runtime": 391}} \ No newline at end of file diff --git a/wandb/run-20240812_052853-n84i0o06/logs/debug-internal.log b/wandb/run-20240812_052853-n84i0o06/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..600c9c5f1c5da9292c3f69e91069df05d9527529 --- /dev/null +++ b/wandb/run-20240812_052853-n84i0o06/logs/debug-internal.log @@ -0,0 +1,384 @@ +2024-08-12 05:28:53,525 INFO StreamThr :10531 [internal.py:wandb_internal():86] W&B internal server running at pid: 10531, started at: 2024-08-12 05:28:53.524894 +2024-08-12 05:28:53,527 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status +2024-08-12 05:28:53,529 INFO WriterThread:10531 [datastore.py:open_for_write():87] open: /project/wandb/run-20240812_052853-n84i0o06/run-n84i0o06.wandb +2024-08-12 05:28:53,530 DEBUG SenderThread:10531 [sender.py:send():382] send: header +2024-08-12 05:28:53,544 DEBUG SenderThread:10531 [sender.py:send():382] send: run +2024-08-12 05:28:54,033 INFO SenderThread:10531 [dir_watcher.py:__init__():211] watching files in: /project/wandb/run-20240812_052853-n84i0o06/files +2024-08-12 05:28:54,033 INFO SenderThread:10531 [sender.py:_start_run_threads():1136] run started: n84i0o06 with start time 1723408133.524123 +2024-08-12 05:28:54,038 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: check_version +2024-08-12 05:28:54,038 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: check_version +2024-08-12 05:28:54,128 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: run_start +2024-08-12 05:28:54,135 DEBUG HandlerThread:10531 [system_info.py:__init__():27] System info init +2024-08-12 05:28:54,135 DEBUG HandlerThread:10531 [system_info.py:__init__():42] System info init done +2024-08-12 05:28:54,135 INFO HandlerThread:10531 [system_monitor.py:start():194] Starting system monitor +2024-08-12 05:28:54,135 INFO SystemMonitor:10531 [system_monitor.py:_start():158] Starting system asset monitoring threads +2024-08-12 05:28:54,135 INFO HandlerThread:10531 [system_monitor.py:probe():214] Collecting system info +2024-08-12 05:28:54,136 INFO SystemMonitor:10531 [interfaces.py:start():190] Started cpu monitoring +2024-08-12 05:28:54,136 INFO SystemMonitor:10531 [interfaces.py:start():190] Started disk monitoring +2024-08-12 05:28:54,137 INFO SystemMonitor:10531 [interfaces.py:start():190] Started gpu monitoring +2024-08-12 05:28:54,138 INFO SystemMonitor:10531 [interfaces.py:start():190] Started memory monitoring +2024-08-12 05:28:54,139 INFO SystemMonitor:10531 [interfaces.py:start():190] Started network monitoring +2024-08-12 05:28:54,148 DEBUG HandlerThread:10531 [system_info.py:probe():151] Probing system +2024-08-12 05:28:54,150 DEBUG HandlerThread:10531 [system_info.py:_probe_git():136] Probing git +2024-08-12 05:28:54,163 DEBUG HandlerThread:10531 [system_info.py:_probe_git():144] Probing git done +2024-08-12 05:28:54,163 DEBUG HandlerThread:10531 [system_info.py:probe():199] Probing system done +2024-08-12 05:28:54,163 DEBUG HandlerThread:10531 [system_monitor.py:probe():223] {'os': 'Linux-5.15.0-91-generic-x86_64-with-glibc2.35', 'python': '3.10.12', 'heartbeatAt': '2024-08-11T20:28:54.148690', 'startedAt': '2024-08-11T20:28:53.511276', 'docker': None, 'cuda': None, 'args': ('--seq-length', '4096', '--sliding-window-size', '4096', '--micro-batch-size', '1', '--global-batch-size', '320', '--train-iters', '20000', '--tokenizer-type', 'HFPreTrainedTokenizer', '--tokenizer-model', '/share/pretrained_lm/Qwen/Qwen2-0.5B', '--train-data-path', '304771887', '/work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v2_ja_wiki_train_0/data_text_document', '--valid-data-path', '304771887', '/work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v2_ja_wiki_train_0/data_text_document', '--test-data-path', '304771887', '/work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v2_ja_wiki_train_0/data_text_document', '--lr', '2e-5', '--min-lr', '1e-6', '--lr-decay-style', 'cosine', '--lr-warmup-iters', '500', '--lr-decay-iters', '20000', '--weight-decay', '0.1', '--grad-clip-norm', '1.0', '--optimizer', 'adam', '--adam-beta1', '0.9', '--adam-beta2', '0.95', '--adam-eps', '1e-6', '--save-interval', '5', '--eval-interval', '200', '--eval-iters', '10', '--bf16', '--mixed-precision', '--base-model', '/share/pretrained_lm/Qwen/Qwen2-0.5B', '--save', '/work/llm_recipes/models/yans-qwen2-0.5B', '--load', '/work/llm_recipes/models/yans-qwen2-0.5B', '--fsdp-activation-checkpointing', '--sharding-strategy', 'FULL_SHARD', '--checkpoint-type', 'LOCAL_STATE_DICT', '--save-n-checkpoints', '10', '--hf-upload-retry-limit', '2', '--hf-repo-id', 'koichi12/yans-qwen2-0.5B', '--wandb-entity', 'iwakawa-koichi-q5-tohoku-nlp6723', '--wandb-project', 'llm_tutorial', '--wandb-name', 'yans-qwen2-0.5B_train_2024-08-12-05:28:42'), 'state': 'running', 'program': '/project/examples/finetuning.py', 'codePathLocal': 'examples/finetuning.py', 'codePath': 'examples/finetuning.py', 'git': {'remote': 'https://github.com/cl-tohoku/llm-recipes-failab-m1-yans.git', 'commit': '6da01327e78c302bc0cfdb335f3ca297e2a19c8c'}, 'email': None, 'root': '/project', 'host': 'gpu-koiwa-00', 'username': 'koiwa', 'executable': '/usr/bin/python', 'cpu_count': 18, 'cpu_count_logical': 18, 'cpu_freq': {'current': 2400.0429999999997, 'min': 0.0, 'max': 0.0}, 'cpu_freq_per_core': [{'current': 2400.043, 'min': 0.0, 'max': 0.0}, {'current': 2400.043, 'min': 0.0, 'max': 0.0}, {'current': 2400.043, 'min': 0.0, 'max': 0.0}, {'current': 2400.043, 'min': 0.0, 'max': 0.0}, {'current': 2400.043, 'min': 0.0, 'max': 0.0}, {'current': 2400.043, 'min': 0.0, 'max': 0.0}, {'current': 2400.043, 'min': 0.0, 'max': 0.0}, {'current': 2400.043, 'min': 0.0, 'max': 0.0}, {'current': 2400.043, 'min': 0.0, 'max': 0.0}, {'current': 2400.043, 'min': 0.0, 'max': 0.0}, {'current': 2400.043, 'min': 0.0, 'max': 0.0}, {'current': 2400.043, 'min': 0.0, 'max': 0.0}, {'current': 2400.043, 'min': 0.0, 'max': 0.0}, {'current': 2400.043, 'min': 0.0, 'max': 0.0}, {'current': 2400.043, 'min': 0.0, 'max': 0.0}, {'current': 2400.043, 'min': 0.0, 'max': 0.0}, {'current': 2400.043, 'min': 0.0, 'max': 0.0}, {'current': 2400.043, 'min': 0.0, 'max': 0.0}], 'disk': {'/': {'total': 0.0625, 'used': 1.1444091796875e-05}}, 'gpu': 'NVIDIA A100-SXM4-40GB', 'gpu_count': 1, 'gpu_devices': [{'name': 'NVIDIA A100-SXM4-40GB', 'memory_total': 42949672960}], 'memory': {'total': 56.487823486328125}} +2024-08-12 05:28:54,163 INFO HandlerThread:10531 [system_monitor.py:probe():224] Finished collecting system info +2024-08-12 05:28:54,163 INFO HandlerThread:10531 [system_monitor.py:probe():227] Publishing system info +2024-08-12 05:28:54,164 INFO HandlerThread:10531 [system_monitor.py:probe():229] Finished publishing system info +2024-08-12 05:28:54,170 DEBUG SenderThread:10531 [sender.py:send():382] send: files +2024-08-12 05:28:54,170 INFO SenderThread:10531 [sender.py:_save_file():1403] saving file wandb-metadata.json with policy now +2024-08-12 05:28:54,180 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: python_packages +2024-08-12 05:28:54,180 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:28:54,181 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: python_packages +2024-08-12 05:28:54,181 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:28:54,182 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:28:54,475 DEBUG SenderThread:10531 [sender.py:send():382] send: telemetry +2024-08-12 05:28:54,885 INFO wandb-upload_0:10531 [upload_job.py:push():131] Uploaded file /tmp/tmp0u7r0fs3wandb/exuilam8-wandb-metadata.json +2024-08-12 05:28:55,035 INFO Thread-12 :10531 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240812_052853-n84i0o06/files/wandb-metadata.json +2024-08-12 05:28:55,035 INFO Thread-12 :10531 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240812_052853-n84i0o06/files/requirements.txt +2024-08-12 05:28:56,035 INFO Thread-12 :10531 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240812_052853-n84i0o06/files/output.log +2024-08-12 05:28:58,036 INFO Thread-12 :10531 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240812_052853-n84i0o06/files/output.log +2024-08-12 05:28:59,328 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:29:00,038 INFO Thread-12 :10531 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240812_052853-n84i0o06/files/output.log +2024-08-12 05:29:01,878 DEBUG SenderThread:10531 [sender.py:send():382] send: config +2024-08-12 05:29:01,879 DEBUG SenderThread:10531 [sender.py:send():382] send: config +2024-08-12 05:29:02,039 INFO Thread-12 :10531 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240812_052853-n84i0o06/files/output.log +2024-08-12 05:29:04,040 INFO Thread-12 :10531 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240812_052853-n84i0o06/files/output.log +2024-08-12 05:29:04,879 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:29:09,180 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:29:09,181 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:29:09,181 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:29:10,368 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:29:15,369 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:29:20,370 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:29:24,180 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:29:24,180 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:29:24,220 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:29:26,367 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:29:27,058 INFO Thread-12 :10531 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240812_052853-n84i0o06/files/config.yaml +2024-08-12 05:29:31,577 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:29:36,578 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:29:39,180 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:29:39,180 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:29:39,220 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:29:42,448 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:29:47,449 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:29:52,450 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:29:54,139 DEBUG SystemMonitor:10531 [system_monitor.py:_start():172] Starting system metrics aggregation loop +2024-08-12 05:29:54,141 DEBUG SenderThread:10531 [sender.py:send():382] send: stats +2024-08-12 05:29:54,180 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:29:54,180 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:29:54,220 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:29:58,446 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:30:03,447 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:30:08,448 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:30:09,180 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:30:09,181 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:30:09,224 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:30:13,456 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:30:18,457 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:30:22,408 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: partial_history +2024-08-12 05:30:24,097 INFO Thread-12 :10531 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240812_052853-n84i0o06/files/output.log +2024-08-12 05:30:24,142 DEBUG SenderThread:10531 [sender.py:send():382] send: stats +2024-08-12 05:30:24,142 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:30:24,180 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:30:24,180 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:30:24,182 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:30:29,451 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:30:34,451 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:30:39,180 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:30:39,181 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:30:39,224 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:30:40,419 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:30:45,420 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:30:50,421 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:30:54,143 DEBUG SenderThread:10531 [sender.py:send():382] send: stats +2024-08-12 05:30:54,180 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:30:54,181 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:30:54,224 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:30:56,414 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:31:01,416 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:31:06,417 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:31:09,181 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:31:09,181 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:31:09,224 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:31:12,373 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:31:17,375 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:31:22,376 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:31:24,144 DEBUG SenderThread:10531 [sender.py:send():382] send: stats +2024-08-12 05:31:24,181 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:31:24,181 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:31:24,224 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:31:28,366 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:31:33,367 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:31:36,963 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: partial_history +2024-08-12 05:31:36,966 DEBUG SenderThread:10531 [sender.py:send():382] send: history +2024-08-12 05:31:36,966 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: summary_record +2024-08-12 05:31:36,968 INFO SenderThread:10531 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-12 05:31:37,152 INFO Thread-12 :10531 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240812_052853-n84i0o06/files/wandb-summary.json +2024-08-12 05:31:39,006 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:31:39,181 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:31:39,181 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:31:39,183 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:31:40,154 INFO Thread-12 :10531 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240812_052853-n84i0o06/files/output.log +2024-08-12 05:31:44,409 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:31:49,410 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:31:54,145 DEBUG SenderThread:10531 [sender.py:send():382] send: stats +2024-08-12 05:31:54,181 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:31:54,181 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:31:54,228 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:31:55,354 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:32:00,355 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:32:05,356 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:32:09,181 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:32:09,181 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:32:09,224 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:32:10,376 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:32:15,377 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:32:20,378 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:32:24,146 DEBUG SenderThread:10531 [sender.py:send():382] send: stats +2024-08-12 05:32:24,181 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:32:24,181 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:32:24,224 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:32:25,450 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:32:30,451 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:32:35,451 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:32:39,181 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:32:39,182 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:32:39,224 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:32:41,437 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:32:46,438 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:32:51,438 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:32:51,692 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: partial_history +2024-08-12 05:32:51,694 DEBUG SenderThread:10531 [sender.py:send():382] send: history +2024-08-12 05:32:51,694 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: summary_record +2024-08-12 05:32:51,696 INFO SenderThread:10531 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-12 05:32:52,204 INFO Thread-12 :10531 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240812_052853-n84i0o06/files/wandb-summary.json +2024-08-12 05:32:54,147 DEBUG SenderThread:10531 [sender.py:send():382] send: stats +2024-08-12 05:32:54,181 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:32:54,182 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:32:54,183 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:32:54,205 INFO Thread-12 :10531 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240812_052853-n84i0o06/files/output.log +2024-08-12 05:32:56,453 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:33:01,453 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:33:06,454 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:33:09,181 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:33:09,182 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:33:09,224 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:33:12,386 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:33:17,386 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:33:22,387 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:33:24,148 DEBUG SenderThread:10531 [sender.py:send():382] send: stats +2024-08-12 05:33:24,181 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:33:24,182 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:33:24,224 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:33:28,379 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:33:33,380 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:33:38,380 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:33:39,182 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:33:39,182 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:33:39,224 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:33:43,420 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:33:48,421 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:33:53,421 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:33:54,149 DEBUG SenderThread:10531 [sender.py:send():382] send: stats +2024-08-12 05:33:54,182 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:33:54,182 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:33:54,224 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:33:59,378 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:34:04,379 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:34:06,274 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: partial_history +2024-08-12 05:34:06,276 DEBUG SenderThread:10531 [sender.py:send():382] send: history +2024-08-12 05:34:06,277 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: summary_record +2024-08-12 05:34:06,278 INFO SenderThread:10531 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-12 05:34:07,249 INFO Thread-12 :10531 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240812_052853-n84i0o06/files/wandb-summary.json +2024-08-12 05:34:08,250 INFO Thread-12 :10531 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240812_052853-n84i0o06/files/output.log +2024-08-12 05:34:09,182 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:34:09,182 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:34:09,184 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:34:09,395 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:34:14,395 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:34:19,396 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:34:24,150 DEBUG SenderThread:10531 [sender.py:send():382] send: stats +2024-08-12 05:34:24,182 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:34:24,182 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:34:24,224 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:34:25,394 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:34:30,395 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:34:35,396 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:34:39,182 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:34:39,182 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:34:39,224 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:34:40,439 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:34:45,439 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:34:50,440 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:34:54,152 DEBUG SenderThread:10531 [sender.py:send():382] send: stats +2024-08-12 05:34:54,182 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:34:54,182 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:34:54,224 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:34:55,454 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:35:00,455 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:35:05,455 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:35:09,182 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:35:09,182 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:35:09,224 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:35:11,407 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:35:16,407 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:35:20,928 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: partial_history +2024-08-12 05:35:20,930 DEBUG SenderThread:10531 [sender.py:send():382] send: history +2024-08-12 05:35:20,931 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: summary_record +2024-08-12 05:35:20,932 INFO SenderThread:10531 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-12 05:35:21,295 INFO Thread-12 :10531 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240812_052853-n84i0o06/files/wandb-summary.json +2024-08-12 05:35:21,970 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:35:22,296 INFO Thread-12 :10531 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240812_052853-n84i0o06/files/output.log +2024-08-12 05:35:24,152 DEBUG SenderThread:10531 [sender.py:send():382] send: stats +2024-08-12 05:35:24,232 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:35:24,255 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: stop_status +2024-08-12 05:35:24,256 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: stop_status +2024-08-12 05:35:24,297 INFO Thread-12 :10531 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240812_052853-n84i0o06/files/output.log +2024-08-12 05:35:25,212 DEBUG SenderThread:10531 [sender.py:send():382] send: exit +2024-08-12 05:35:25,213 INFO SenderThread:10531 [sender.py:send_exit():589] handling exit code: 1 +2024-08-12 05:35:25,213 INFO SenderThread:10531 [sender.py:send_exit():591] handling runtime: 391 +2024-08-12 05:35:25,214 INFO SenderThread:10531 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-12 05:35:25,214 INFO SenderThread:10531 [sender.py:send_exit():597] send defer +2024-08-12 05:35:25,214 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: defer +2024-08-12 05:35:25,214 INFO HandlerThread:10531 [handler.py:handle_request_defer():172] handle defer: 0 +2024-08-12 05:35:25,215 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: defer +2024-08-12 05:35:25,215 INFO SenderThread:10531 [sender.py:send_request_defer():613] handle sender defer: 0 +2024-08-12 05:35:25,215 INFO SenderThread:10531 [sender.py:transition_state():617] send defer: 1 +2024-08-12 05:35:25,215 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: defer +2024-08-12 05:35:25,215 INFO HandlerThread:10531 [handler.py:handle_request_defer():172] handle defer: 1 +2024-08-12 05:35:25,215 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: defer +2024-08-12 05:35:25,215 INFO SenderThread:10531 [sender.py:send_request_defer():613] handle sender defer: 1 +2024-08-12 05:35:25,215 INFO SenderThread:10531 [sender.py:transition_state():617] send defer: 2 +2024-08-12 05:35:25,215 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: defer +2024-08-12 05:35:25,215 INFO HandlerThread:10531 [handler.py:handle_request_defer():172] handle defer: 2 +2024-08-12 05:35:25,215 INFO HandlerThread:10531 [system_monitor.py:finish():203] Stopping system monitor +2024-08-12 05:35:25,215 DEBUG SystemMonitor:10531 [system_monitor.py:_start():179] Finished system metrics aggregation loop +2024-08-12 05:35:25,215 INFO HandlerThread:10531 [interfaces.py:finish():202] Joined cpu monitor +2024-08-12 05:35:25,216 DEBUG SystemMonitor:10531 [system_monitor.py:_start():183] Publishing last batch of metrics +2024-08-12 05:35:25,216 INFO HandlerThread:10531 [interfaces.py:finish():202] Joined disk monitor +2024-08-12 05:35:25,249 INFO HandlerThread:10531 [interfaces.py:finish():202] Joined gpu monitor +2024-08-12 05:35:25,249 INFO HandlerThread:10531 [interfaces.py:finish():202] Joined memory monitor +2024-08-12 05:35:25,249 INFO HandlerThread:10531 [interfaces.py:finish():202] Joined network monitor +2024-08-12 05:35:25,249 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: defer +2024-08-12 05:35:25,249 INFO SenderThread:10531 [sender.py:send_request_defer():613] handle sender defer: 2 +2024-08-12 05:35:25,249 INFO SenderThread:10531 [sender.py:transition_state():617] send defer: 3 +2024-08-12 05:35:25,249 DEBUG SenderThread:10531 [sender.py:send():382] send: stats +2024-08-12 05:35:25,250 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: defer +2024-08-12 05:35:25,250 INFO HandlerThread:10531 [handler.py:handle_request_defer():172] handle defer: 3 +2024-08-12 05:35:25,251 DEBUG SenderThread:10531 [sender.py:send():382] send: history +2024-08-12 05:35:25,252 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: summary_record +2024-08-12 05:35:25,253 INFO SenderThread:10531 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-12 05:35:25,253 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: defer +2024-08-12 05:35:25,253 INFO SenderThread:10531 [sender.py:send_request_defer():613] handle sender defer: 3 +2024-08-12 05:35:25,253 INFO SenderThread:10531 [sender.py:transition_state():617] send defer: 4 +2024-08-12 05:35:25,253 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: defer +2024-08-12 05:35:25,253 INFO HandlerThread:10531 [handler.py:handle_request_defer():172] handle defer: 4 +2024-08-12 05:35:25,253 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: defer +2024-08-12 05:35:25,253 INFO SenderThread:10531 [sender.py:send_request_defer():613] handle sender defer: 4 +2024-08-12 05:35:25,253 INFO SenderThread:10531 [sender.py:transition_state():617] send defer: 5 +2024-08-12 05:35:25,253 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: defer +2024-08-12 05:35:25,253 INFO HandlerThread:10531 [handler.py:handle_request_defer():172] handle defer: 5 +2024-08-12 05:35:25,254 DEBUG SenderThread:10531 [sender.py:send():382] send: summary +2024-08-12 05:35:25,255 INFO SenderThread:10531 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-12 05:35:25,255 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: defer +2024-08-12 05:35:25,255 INFO SenderThread:10531 [sender.py:send_request_defer():613] handle sender defer: 5 +2024-08-12 05:35:25,255 INFO SenderThread:10531 [sender.py:transition_state():617] send defer: 6 +2024-08-12 05:35:25,255 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: defer +2024-08-12 05:35:25,255 INFO HandlerThread:10531 [handler.py:handle_request_defer():172] handle defer: 6 +2024-08-12 05:35:25,255 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: defer +2024-08-12 05:35:25,255 INFO SenderThread:10531 [sender.py:send_request_defer():613] handle sender defer: 6 +2024-08-12 05:35:25,256 INFO SenderThread:10531 [sender.py:transition_state():617] send defer: 7 +2024-08-12 05:35:25,256 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: status_report +2024-08-12 05:35:25,256 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: defer +2024-08-12 05:35:25,256 INFO HandlerThread:10531 [handler.py:handle_request_defer():172] handle defer: 7 +2024-08-12 05:35:25,256 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: defer +2024-08-12 05:35:25,256 INFO SenderThread:10531 [sender.py:send_request_defer():613] handle sender defer: 7 +2024-08-12 05:35:25,298 INFO Thread-12 :10531 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240812_052853-n84i0o06/files/wandb-summary.json +2024-08-12 05:35:26,141 INFO SenderThread:10531 [sender.py:transition_state():617] send defer: 8 +2024-08-12 05:35:26,142 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: defer +2024-08-12 05:35:26,142 INFO HandlerThread:10531 [handler.py:handle_request_defer():172] handle defer: 8 +2024-08-12 05:35:26,142 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: defer +2024-08-12 05:35:26,142 INFO SenderThread:10531 [sender.py:send_request_defer():613] handle sender defer: 8 +2024-08-12 05:35:26,142 INFO SenderThread:10531 [job_builder.py:build():296] Attempting to build job artifact +2024-08-12 05:35:26,143 INFO SenderThread:10531 [job_builder.py:_get_source_type():426] is repo sourced job +2024-08-12 05:35:26,157 INFO SenderThread:10531 [job_builder.py:build():402] adding wandb-job metadata file +2024-08-12 05:35:26,166 INFO SenderThread:10531 [sender.py:transition_state():617] send defer: 9 +2024-08-12 05:35:26,166 DEBUG SenderThread:10531 [sender.py:send():382] send: artifact +2024-08-12 05:35:26,166 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: defer +2024-08-12 05:35:26,167 INFO HandlerThread:10531 [handler.py:handle_request_defer():172] handle defer: 9 +2024-08-12 05:35:26,213 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-12 05:35:26,299 INFO Thread-12 :10531 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240812_052853-n84i0o06/files/output.log +2024-08-12 05:35:27,302 INFO wandb-upload_1:10531 [upload_job.py:push():86] Skipped uploading /singularity_home/.local/share/wandb/artifacts/staging/tmpyfws5ko3 +2024-08-12 05:35:27,738 INFO wandb-upload_0:10531 [upload_job.py:push():89] Uploaded file /singularity_home/.local/share/wandb/artifacts/staging/tmpypuucsag +2024-08-12 05:35:29,357 INFO SenderThread:10531 [sender.py:send_artifact():1494] sent artifact job-https___github.com_cl-tohoku_llm-recipes-failab-m1-yans.git_examples_finetuning.py - {'id': 'QXJ0aWZhY3Q6MTEzOTg5OTc5MQ==', 'state': 'PENDING', 'artifactSequence': {'id': 'QXJ0aWZhY3RDb2xsZWN0aW9uOjM2MjY3MjMzNA==', 'latestArtifact': {'id': 'QXJ0aWZhY3Q6MTEzOTgzMzc4Mw==', 'versionIndex': 6}}} +2024-08-12 05:35:29,357 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: defer +2024-08-12 05:35:29,357 INFO SenderThread:10531 [sender.py:send_request_defer():613] handle sender defer: 9 +2024-08-12 05:35:29,358 INFO SenderThread:10531 [dir_watcher.py:finish():358] shutting down directory watcher +2024-08-12 05:35:30,300 INFO SenderThread:10531 [dir_watcher.py:finish():388] scan: /project/wandb/run-20240812_052853-n84i0o06/files +2024-08-12 05:35:30,301 INFO SenderThread:10531 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240812_052853-n84i0o06/files/requirements.txt requirements.txt +2024-08-12 05:35:30,301 INFO SenderThread:10531 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240812_052853-n84i0o06/files/config.yaml config.yaml +2024-08-12 05:35:30,301 INFO SenderThread:10531 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240812_052853-n84i0o06/files/wandb-metadata.json wandb-metadata.json +2024-08-12 05:35:30,302 INFO SenderThread:10531 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240812_052853-n84i0o06/files/wandb-summary.json wandb-summary.json +2024-08-12 05:35:30,304 INFO SenderThread:10531 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240812_052853-n84i0o06/files/output.log output.log +2024-08-12 05:35:30,306 INFO SenderThread:10531 [sender.py:transition_state():617] send defer: 10 +2024-08-12 05:35:30,306 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: poll_exit +2024-08-12 05:35:30,306 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: defer +2024-08-12 05:35:30,307 INFO HandlerThread:10531 [handler.py:handle_request_defer():172] handle defer: 10 +2024-08-12 05:35:30,308 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: defer +2024-08-12 05:35:30,308 INFO SenderThread:10531 [sender.py:send_request_defer():613] handle sender defer: 10 +2024-08-12 05:35:30,308 INFO SenderThread:10531 [file_pusher.py:finish():172] shutting down file pusher +2024-08-12 05:35:30,718 INFO wandb-upload_0:10531 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240812_052853-n84i0o06/files/config.yaml +2024-08-12 05:35:30,895 INFO wandb-upload_3:10531 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240812_052853-n84i0o06/files/output.log +2024-08-12 05:35:31,214 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: keepalive +2024-08-12 05:35:31,214 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-12 05:35:31,214 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: poll_exit +2024-08-12 05:35:31,248 INFO wandb-upload_1:10531 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240812_052853-n84i0o06/files/requirements.txt +2024-08-12 05:35:31,299 INFO wandb-upload_2:10531 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240812_052853-n84i0o06/files/wandb-summary.json +2024-08-12 05:35:31,499 INFO Thread-11 (_thread_body):10531 [sender.py:transition_state():617] send defer: 11 +2024-08-12 05:35:31,499 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: defer +2024-08-12 05:35:31,500 INFO HandlerThread:10531 [handler.py:handle_request_defer():172] handle defer: 11 +2024-08-12 05:35:31,500 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: defer +2024-08-12 05:35:31,500 INFO SenderThread:10531 [sender.py:send_request_defer():613] handle sender defer: 11 +2024-08-12 05:35:31,500 INFO SenderThread:10531 [file_pusher.py:join():178] waiting for file pusher +2024-08-12 05:35:31,500 INFO SenderThread:10531 [sender.py:transition_state():617] send defer: 12 +2024-08-12 05:35:31,500 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: defer +2024-08-12 05:35:31,500 INFO HandlerThread:10531 [handler.py:handle_request_defer():172] handle defer: 12 +2024-08-12 05:35:31,500 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: defer +2024-08-12 05:35:31,500 INFO SenderThread:10531 [sender.py:send_request_defer():613] handle sender defer: 12 +2024-08-12 05:35:31,500 INFO SenderThread:10531 [file_stream.py:finish():595] file stream finish called +2024-08-12 05:35:32,061 INFO SenderThread:10531 [file_stream.py:finish():599] file stream finish is done +2024-08-12 05:35:32,061 INFO SenderThread:10531 [sender.py:transition_state():617] send defer: 13 +2024-08-12 05:35:32,061 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: defer +2024-08-12 05:35:32,061 INFO HandlerThread:10531 [handler.py:handle_request_defer():172] handle defer: 13 +2024-08-12 05:35:32,062 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: defer +2024-08-12 05:35:32,062 INFO SenderThread:10531 [sender.py:send_request_defer():613] handle sender defer: 13 +2024-08-12 05:35:32,062 INFO SenderThread:10531 [sender.py:transition_state():617] send defer: 14 +2024-08-12 05:35:32,062 DEBUG SenderThread:10531 [sender.py:send():382] send: final +2024-08-12 05:35:32,062 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: defer +2024-08-12 05:35:32,062 INFO HandlerThread:10531 [handler.py:handle_request_defer():172] handle defer: 14 +2024-08-12 05:35:32,062 DEBUG SenderThread:10531 [sender.py:send():382] send: footer +2024-08-12 05:35:32,062 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: defer +2024-08-12 05:35:32,062 INFO SenderThread:10531 [sender.py:send_request_defer():613] handle sender defer: 14 +2024-08-12 05:35:32,063 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-12 05:35:32,063 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: poll_exit +2024-08-12 05:35:32,063 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-12 05:35:32,064 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: poll_exit +2024-08-12 05:35:32,064 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: server_info +2024-08-12 05:35:32,064 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: server_info +2024-08-12 05:35:32,065 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: get_summary +2024-08-12 05:35:32,066 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: sampled_history +2024-08-12 05:35:32,067 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-12 05:35:32,067 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: job_info +2024-08-12 05:35:32,238 DEBUG SenderThread:10531 [sender.py:send_request():409] send_request: job_info +2024-08-12 05:35:32,238 INFO MainThread:10531 [wandb_run.py:_footer_history_summary_info():3866] rendering history +2024-08-12 05:35:32,239 INFO MainThread:10531 [wandb_run.py:_footer_history_summary_info():3898] rendering summary +2024-08-12 05:35:32,239 INFO MainThread:10531 [wandb_run.py:_footer_sync_info():3825] logging synced files +2024-08-12 05:35:32,240 DEBUG HandlerThread:10531 [handler.py:handle_request():146] handle_request: shutdown +2024-08-12 05:35:32,240 INFO HandlerThread:10531 [handler.py:finish():869] shutting down handler +2024-08-12 05:35:33,068 INFO WriterThread:10531 [datastore.py:close():296] close: /project/wandb/run-20240812_052853-n84i0o06/run-n84i0o06.wandb +2024-08-12 05:35:33,239 INFO SenderThread:10531 [sender.py:finish():1572] shutting down sender +2024-08-12 05:35:33,239 INFO SenderThread:10531 [file_pusher.py:finish():172] shutting down file pusher +2024-08-12 05:35:33,239 INFO SenderThread:10531 [file_pusher.py:join():178] waiting for file pusher diff --git a/wandb/run-20240812_052853-n84i0o06/logs/debug.log b/wandb/run-20240812_052853-n84i0o06/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..5e464fe296cb95916694fa5990af64be787cea65 --- /dev/null +++ b/wandb/run-20240812_052853-n84i0o06/logs/debug.log @@ -0,0 +1,30 @@ +2024-08-12 05:28:53,517 INFO MainThread:10460 [wandb_setup.py:_flush():76] Current SDK version is 0.16.3 +2024-08-12 05:28:53,517 INFO MainThread:10460 [wandb_setup.py:_flush():76] Configure stats pid to 10460 +2024-08-12 05:28:53,517 INFO MainThread:10460 [wandb_setup.py:_flush():76] Loading settings from /singularity_home/.config/wandb/settings +2024-08-12 05:28:53,517 INFO MainThread:10460 [wandb_setup.py:_flush():76] Loading settings from /project/wandb/settings +2024-08-12 05:28:53,517 INFO MainThread:10460 [wandb_setup.py:_flush():76] Loading settings from environment variables: {'api_key': '***REDACTED***', 'run_notes': 'Train Qwen2'} +2024-08-12 05:28:53,518 INFO MainThread:10460 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False} +2024-08-12 05:28:53,518 INFO MainThread:10460 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'examples/finetuning.py', 'program_abspath': '/project/examples/finetuning.py', 'program': '/project/examples/finetuning.py'} +2024-08-12 05:28:53,518 INFO MainThread:10460 [wandb_init.py:_log_setup():526] Logging user logs to /project/wandb/run-20240812_052853-n84i0o06/logs/debug.log +2024-08-12 05:28:53,518 INFO MainThread:10460 [wandb_init.py:_log_setup():527] Logging internal logs to /project/wandb/run-20240812_052853-n84i0o06/logs/debug-internal.log +2024-08-12 05:28:53,518 INFO MainThread:10460 [wandb_init.py:init():566] calling init triggers +2024-08-12 05:28:53,518 INFO MainThread:10460 [wandb_init.py:init():573] wandb.init called with sweep_config: {} +config: {'sharding_strategy': 'FULL_SHARD', 'checkpoint_type': 'LOCAL_STATE_DICT', 'fsdp_activation_checkpointing': True, 'fsdp_cpu_offload': False, 'low_cpu_fsdp': False, 'no_meta_device': False, 'data_path': None, 'split': '969, 30, 1', 'train_data_path': ['304771887', '/work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v2_ja_wiki_train_0/data_text_document'], 'valid_data_path': ['304771887', '/work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v2_ja_wiki_train_0/data_text_document'], 'test_data_path': ['304771887', '/work/llm_recipes/datasets/bin/sample/llm_jp_corpus_v2_ja_wiki_train_0/data_text_document'], 'data_cache_path': None, 'vocab_size': None, 'vocab_file': None, 'merge_file': None, 'seq_length': 4096, 'num_workers': 2, 'tokenizer_type': 'HFPreTrainedTokenizer', 'tokenizer_model': '/share/pretrained_lm/Qwen/Qwen2-0.5B', 'reset_position_ids': False, 'reset_attention_mask': False, 'eod_mask_loss': False, 'retro_return_doc_ids': False, 'short_seq_prob': 0.1, 'vocab_extra_ids': 0, 'seed': 1234, 'use_mpi': False, 'wandb_entity': 'iwakawa-koichi-q5-tohoku-nlp6723', 'wandb_name': 'yans-qwen2-0.5B_train_2024-08-12-05:28:42', 'wandb_project': 'llm_tutorial', 'quantization': False, 'use_freeze_layers': False, 'freeze_layers': None, 'bf16': True, 'fp16': False, 'mixed_precision': True, 'param_dtype': None, 'load': '/work/llm_recipes/models/yans-qwen2-0.5B', 'save': '/work/llm_recipes/models/yans-qwen2-0.5B', 'base_model': '/share/pretrained_lm/Qwen/Qwen2-0.5B', 'use_better_transformer': False, 'grad_clip_norm': 1.0, 'eval_interval': 200, 'save_interval': 5, 'eval_iters': 10, 'optimizer': 'adam', 'lr': 2e-05, 'lr_decay_style': 'cosine', 'lr_decay_iters': 20000, 'lr_warmup_iters': 500, 'min_lr': 1e-06, 'train_iters': 20000, 'train_samples': None, 'global_batch_size': 320, 'micro_batch_size': 1, 'make_vocab_size_divisible_by': 128, 'sliding_window_size': 4096, 'skip_batch': None, 'no_save_optimizer_state': False, 'continual_pretraining': False, 'instruction_tuning': False, 'direct_preference_optimization': False, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'weight_decay': 0.1, 'adam_beta1': 0.9, 'adam_beta2': 0.95, 'adam_eps': 1e-06, 'hf_transformer_model_dir': None, 'instruction_train_data_path': None, 'instruction_valid_data_path': None, 'epoch': None, 'instruction_dataset_size': None, 'save_sampler_state': False, 'label_smoothing': 0.0, 'save_n_checkpoints': 10, 'hf_repo_id': 'koichi12/yans-qwen2-0.5B', 'create_public_hf_repo': False, 'upload_all_checkpoints_to_hf': False, 'hf_upload_retry_limit': 2, 'exit_duration_in_mins': None, 'source_key': None, 'target_key': None, 'attn_implementation': 'flash_attention_2', 'efficient_instruction_tuning': False, 'remove_padding_masking': False, 'save_start_iter': None, 'rank': 0, 'world_size': 1, 'padded_vocab_size': 151680, 'gradient_accumulation_steps': 320} +2024-08-12 05:28:53,518 INFO MainThread:10460 [wandb_init.py:init():616] starting backend +2024-08-12 05:28:53,518 INFO MainThread:10460 [wandb_init.py:init():620] setting up manager +2024-08-12 05:28:53,523 INFO MainThread:10460 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-08-12 05:28:53,523 INFO MainThread:10460 [wandb_init.py:init():628] backend started and connected +2024-08-12 05:28:53,528 INFO MainThread:10460 [wandb_init.py:init():720] updated telemetry +2024-08-12 05:28:53,540 INFO MainThread:10460 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout +2024-08-12 05:28:54,037 INFO MainThread:10460 [wandb_run.py:_on_init():2262] communicating current version +2024-08-12 05:28:54,121 INFO MainThread:10460 [wandb_run.py:_on_init():2271] got version response upgrade_message: "wandb version 0.17.6 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2024-08-12 05:28:54,121 INFO MainThread:10460 [wandb_init.py:init():804] starting run threads in backend +2024-08-12 05:28:54,179 INFO MainThread:10460 [wandb_run.py:_console_start():2241] atexit reg +2024-08-12 05:28:54,180 INFO MainThread:10460 [wandb_run.py:_redirect():2096] redirect: wrap_raw +2024-08-12 05:28:54,180 INFO MainThread:10460 [wandb_run.py:_redirect():2161] Wrapping output streams. +2024-08-12 05:28:54,180 INFO MainThread:10460 [wandb_run.py:_redirect():2186] Redirects installed. +2024-08-12 05:28:54,181 INFO MainThread:10460 [wandb_init.py:init():847] run started, returning control to user process +2024-08-12 05:29:01,877 INFO MainThread:10460 [wandb_run.py:_config_callback():1343] config_cb None None {'model_architecture': 'Qwen2ForCausalLM', 'activation_function': 'silu', 'hidden_size': 896, 'model_type': 'qwen2', 'max_position_embeddings': 4096, 'num_attention_heads': 14, 'num_hidden_layers': 24} +2024-08-12 05:29:01,878 INFO MainThread:10460 [wandb_run.py:_config_callback():1343] config_cb None None {'world_size': 1} +2024-08-12 05:35:33,240 WARNING MsgRouterThr:10460 [router.py:message_loop():77] message_loop has been closed diff --git a/wandb/run-20240812_052853-n84i0o06/run-n84i0o06.wandb b/wandb/run-20240812_052853-n84i0o06/run-n84i0o06.wandb new file mode 100644 index 0000000000000000000000000000000000000000..7955df72e772efdaebba7ed6f429b2aef224a86e Binary files /dev/null and b/wandb/run-20240812_052853-n84i0o06/run-n84i0o06.wandb differ diff --git a/wandb/run-20240812_063027-j1htzx7q/files/output.log b/wandb/run-20240812_063027-j1htzx7q/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..9326a277a7d3c2a2a13c09db9ce174ab3df32373 --- /dev/null +++ b/wandb/run-20240812_063027-j1htzx7q/files/output.log @@ -0,0 +1,121 @@ +Created Hugging Face repository with ID koichi12/yans-sample-gemma-2-2b. +Clearing GPU cache for all ranks +--> Running with torch torch_distributed debug set to detail +File not found: /work/llm_recipes/models/yans-sample-gemma-2-2b/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-sample-gemma-2-2b/latest_iteration.txt +File not found: /work/llm_recipes/models/yans-sample-gemma-2-2b/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-sample-gemma-2-2b/latest_iteration.txt +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + + +Loading checkpoint shards: 67%|██████▋ | 2/3 [02:31<01:16, 76.44s/it] +File not found: /work/llm_recipes/models/yans-sample-gemma-2-2b/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-sample-gemma-2-2b/latest_iteration.txt +No checkpoint found in /work/llm_recipes/models/yans-sample-gemma-2-2b, skipping model loading +--> Model /share/pretrained_lm/google/gemma-2-2b +--> /share/pretrained_lm/google/gemma-2-2b has 2614.341888 Million params +BFloat16 enabled for mixed precision - using bfSixteen policy +--> applying fsdp activation checkpointing... + > datasets target sizes (minimum size): + train: 6400000 + validation: 21334400 + test: 3200 +Loading checkpoint shards: 100%|██████████| 3/3 [02:40<00:00, 53.37s/it] +/usr/local/lib/python3.10/dist-packages/torch/distributed/fsdp/_init_utils.py:441: UserWarning: FSDP is switching to use `NO_SHARD` instead of ShardingStrategy.FULL_SHARD since the world size is 1. + warnings.warn( +Let split = None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +> finished creating GPT datasets ... +File not found: /work/llm_recipes/models/yans-sample-gemma-2-2b/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-sample-gemma-2-2b/latest_iteration.txt +No checkpoint found in /work/llm_recipes/models/yans-sample-gemma-2-2b, skipping optimizer loading +File not found: /work/llm_recipes/models/yans-sample-gemma-2-2b/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-sample-gemma-2-2b/latest_iteration.txt +model info: FullyShardedDataParallel( + (_fsdp_wrapped_module): Gemma2ForCausalLM( + (model): Gemma2Model( + (embed_tokens): Embedding(256000, 2304, padding_idx=0) + (layers): ModuleList( + (0-25): 26 x FullyShardedDataParallel( + (_fsdp_wrapped_module): CheckpointWrapper( + (_checkpoint_wrapped_module): Gemma2DecoderLayer( + (self_attn): Gemma2FlashAttention2( + (q_proj): Linear(in_features=2304, out_features=2048, bias=False) + (k_proj): Linear(in_features=2304, out_features=1024, bias=False) + (v_proj): Linear(in_features=2304, out_features=1024, bias=False) + (o_proj): Linear(in_features=2048, out_features=2304, bias=False) + (rotary_emb): Gemma2RotaryEmbedding() + ) + (mlp): Gemma2MLP( + (gate_proj): Linear(in_features=2304, out_features=9216, bias=False) + (up_proj): Linear(in_features=2304, out_features=9216, bias=False) + (down_proj): Linear(in_features=9216, out_features=2304, bias=False) + (act_fn): PytorchGELUTanh() + ) + (input_layernorm): Gemma2RMSNorm() + (post_attention_layernorm): Gemma2RMSNorm() + (pre_feedforward_layernorm): Gemma2RMSNorm() + (post_feedforward_layernorm): Gemma2RMSNorm() + ) + ) + ) + ) + (norm): Gemma2RMSNorm() + ) + (lm_head): Linear(in_features=2304, out_features=256000, bias=False) + ) +) +model config: Gemma2Config { + "_name_or_path": "/share/pretrained_lm/google/gemma-2-2b", + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": 1, + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 2304, + "initializer_range": 0.02, + "intermediate_size": 9216, + "label_smoothing": 0.0, + "max_position_embeddings": 4096, + "model_type": "gemma2", + "num_attention_heads": 8, + "num_hidden_layers": 26, + "num_key_value_heads": 4, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "torch_dtype": "float32", + "transformers_version": "4.43.3", + "use_cache": false, + "vocab_size": 256000 +} +It is strongly recommended to train Gemma2 models with the `eager` attention implementation instead of `flash_attention_2`. Use `eager` with `AutoModelForCausalLM.from_pretrained('', attn_implementation='eager')`. +Traceback (most recent call last): + File "/project/examples/finetuning.py", line 13, in + main() + File "/project/src/llama_recipes/finetuning.py", line 281, in main + train( + File "/project/src/llama_recipes/utils/train_utils.py", line 118, in train + loss.backward() + File "/usr/local/lib/python3.10/dist-packages/torch/_tensor.py", line 522, in backward + torch.autograd.backward( + File "/usr/local/lib/python3.10/dist-packages/torch/autograd/__init__.py", line 267, in backward + _engine_run_backward( + File "/usr/local/lib/python3.10/dist-packages/torch/autograd/graph.py", line 681, in _engine_run_backward + return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 8.70 GiB. GPU 0 has a total capacity of 39.39 GiB of which 3.86 GiB is free. Including non-PyTorch memory, this process has 35.52 GiB memory in use. Of the allocated memory 32.71 GiB is allocated by PyTorch, and 1.99 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) \ No newline at end of file diff --git a/wandb/run-20240812_063027-j1htzx7q/files/wandb-summary.json b/wandb/run-20240812_063027-j1htzx7q/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..01f8f6b068a04ee402ea6cebb74bb3c73b768c03 --- /dev/null +++ b/wandb/run-20240812_063027-j1htzx7q/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 167}} \ No newline at end of file diff --git a/wandb/run-20240823_154448-v9m85jnt/files/config.yaml b/wandb/run-20240823_154448-v9m85jnt/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3555fe1048e2a996399ffc41701997c5b6a85352 --- /dev/null +++ b/wandb/run-20240823_154448-v9m85jnt/files/config.yaml @@ -0,0 +1,321 @@ +wandb_version: 1 + +sharding_strategy: + desc: null + value: FULL_SHARD +checkpoint_type: + desc: null + value: LOCAL_STATE_DICT +fsdp_activation_checkpointing: + desc: null + value: true +fsdp_cpu_offload: + desc: null + value: false +low_cpu_fsdp: + desc: null + value: false +no_meta_device: + desc: null + value: false +data_path: + desc: null + value: null +split: + desc: null + value: 969, 30, 1 +train_data_path: + desc: null + value: + - '1754785366' + - /project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document + - '28623823675' + - /project/datas/llm-jp-corpus-v2/ja-cc/level0/data_text_document +valid_data_path: + desc: null + value: + - '1754785366' + - /project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document +test_data_path: + desc: null + value: + - '1754785366' + - /project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document +data_cache_path: + desc: null + value: null +vocab_size: + desc: null + value: null +vocab_file: + desc: null + value: null +merge_file: + desc: null + value: null +seq_length: + desc: null + value: 2048 +num_workers: + desc: null + value: 2 +tokenizer_type: + desc: null + value: HFPreTrainedTokenizer +tokenizer_model: + desc: null + value: /share/pretrained_lm/Qwen/Qwen2-0.5B +reset_position_ids: + desc: null + value: false +reset_attention_mask: + desc: null + value: false +eod_mask_loss: + desc: null + value: false +retro_return_doc_ids: + desc: null + value: false +short_seq_prob: + desc: null + value: 0.1 +vocab_extra_ids: + desc: null + value: 0 +seed: + desc: null + value: 1234 +use_mpi: + desc: null + value: false +wandb_entity: + desc: null + value: iwakawa-koichi-q5-tohoku-nlp6723 +wandb_name: + desc: null + value: Qwen2-0.5b-0.2_train_2024-08-23-15:44:18 +wandb_project: + desc: null + value: llm_tutorial-0.2 +quantization: + desc: null + value: false +use_freeze_layers: + desc: null + value: false +freeze_layers: + desc: null + value: null +bf16: + desc: null + value: true +fp16: + desc: null + value: false +mixed_precision: + desc: null + value: true +param_dtype: + desc: null + value: null +load: + desc: null + value: /work/llm_recipes/models/Qwen2-0.5b-0.2 +save: + desc: null + value: /work/llm_recipes/models/Qwen2-0.5b-0.2 +base_model: + desc: null + value: /share/pretrained_lm/Qwen/Qwen2-0.5B +use_better_transformer: + desc: null + value: false +grad_clip_norm: + desc: null + value: 1.0 +eval_interval: + desc: null + value: 10 +save_interval: + desc: null + value: 10 +eval_iters: + desc: null + value: 10 +optimizer: + desc: null + value: anyprecision +lr: + desc: null + value: 2.0e-05 +lr_decay_style: + desc: null + value: cosine +lr_decay_iters: + desc: null + value: 7500 +lr_warmup_iters: + desc: null + value: 500 +min_lr: + desc: null + value: 1.0e-06 +train_iters: + desc: null + value: 7500 +train_samples: + desc: null + value: null +global_batch_size: + desc: null + value: 320 +micro_batch_size: + desc: null + value: 1 +make_vocab_size_divisible_by: + desc: null + value: 128 +sliding_window_size: + desc: null + value: 4096 +skip_batch: + desc: null + value: null +no_save_optimizer_state: + desc: null + value: false +continual_pretraining: + desc: null + value: false +instruction_tuning: + desc: null + value: false +direct_preference_optimization: + desc: null + value: false +attention_dropout: + desc: null + value: 0.1 +hidden_dropout: + desc: null + value: 0.1 +weight_decay: + desc: null + value: 0.1 +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.95 +adam_eps: + desc: null + value: 1.0e-06 +hf_transformer_model_dir: + desc: null + value: null +instruction_train_data_path: + desc: null + value: null +instruction_valid_data_path: + desc: null + value: null +epoch: + desc: null + value: null +instruction_dataset_size: + desc: null + value: null +save_sampler_state: + desc: null + value: false +label_smoothing: + desc: null + value: 0.0 +save_n_checkpoints: + desc: null + value: 10 +hf_repo_id: + desc: null + value: koichi12/Qwen2-0.5b-0.2 +create_public_hf_repo: + desc: null + value: false +upload_all_checkpoints_to_hf: + desc: null + value: true +hf_upload_retry_limit: + desc: null + value: 2 +exit_duration_in_mins: + desc: null + value: null +source_key: + desc: null + value: null +target_key: + desc: null + value: null +attn_implementation: + desc: null + value: flash_attention_2 +efficient_instruction_tuning: + desc: null + value: false +remove_padding_masking: + desc: null + value: false +save_start_iter: + desc: null + value: null +valid_micro_batch_size: + desc: null + value: 1 +rank: + desc: null + value: 0 +world_size: + desc: null + value: 1 +padded_vocab_size: + desc: null + value: 151680 +gradient_accumulation_steps: + desc: null + value: 320 +_wandb: + desc: null + value: + python_version: 3.10.12 + cli_version: 0.16.3 + framework: huggingface + huggingface_version: 4.43.3 + is_jupyter_run: false + is_kaggle_kernel: false + start_time: 1724395488.891619 + t: + 1: + - 1 + - 11 + - 49 + - 55 + - 71 + - 105 + 2: + - 1 + - 11 + - 49 + - 55 + - 71 + - 105 + 3: + - 13 + - 16 + - 23 + 4: 3.10.12 + 5: 0.16.3 + 6: 4.43.3 + 8: + - 5 + 13: linux-x86_64 diff --git a/wandb/run-20240823_154448-v9m85jnt/files/output.log b/wandb/run-20240823_154448-v9m85jnt/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..bc93acaf705ca1fc052cc9750c185af89be360c1 --- /dev/null +++ b/wandb/run-20240823_154448-v9m85jnt/files/output.log @@ -0,0 +1,15 @@ +Created Hugging Face repository with ID koichi12/Qwen2-0.5b-0.2. +Clearing GPU cache for all ranks +--> Running with torch torch_distributed debug set to detail +File not found: /work/llm_recipes/models/Qwen2-0.5b-0.2/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/Qwen2-0.5b-0.2/latest_iteration.txt +File not found: /work/llm_recipes/models/Qwen2-0.5b-0.2/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/Qwen2-0.5b-0.2/latest_iteration.txt +Traceback (most recent call last): + File "/project/examples/finetuning.py", line 13, in + main() + File "/project/src/llama_recipes/finetuning.py", line 103, in main + model = get_model( + File "/project/src/llama_recipes/get_models.py", line 106, in get_model + assert sliding_window == 131072 +AssertionError \ No newline at end of file diff --git a/wandb/run-20240823_154448-v9m85jnt/files/requirements.txt b/wandb/run-20240823_154448-v9m85jnt/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..34a2774e444bdc395654ccf8ce6da6833c7bc1ee --- /dev/null +++ b/wandb/run-20240823_154448-v9m85jnt/files/requirements.txt @@ -0,0 +1,375 @@ +absl-py==2.1.0 +accelerate==0.23.0 +aiohttp==3.9.1 +aiosignal==1.3.1 +annotated-types==0.6.0 +antlr4-python3-runtime==4.9.3 +anyio==4.4.0 +apex==0.1 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==23.1.0 +astroid==3.2.4 +asttokens==2.4.1 +astunparse==1.6.3 +async-timeout==4.0.3 +attrs==23.2.0 +audioread==3.0.1 +beautifulsoup4==4.12.3 +bert-score==0.3.13 +bleach==6.1.0 +blis==0.7.11 +build==1.2.1 +cachecontrol==0.14.0 +cachetools==5.3.2 +catalogue==2.0.10 +certifi==2024.2.2 +cffi==1.16.0 +chardet==5.2.0 +charset-normalizer==3.3.2 +cleo==2.1.0 +click==8.1.7 +cloudpathlib==0.16.0 +cloudpickle==3.0.0 +cmake==3.28.1 +colorama==0.4.6 +comm==0.2.1 +confection==0.1.4 +contourpy==1.2.0 +cramjam==2.8.3 +crashtest==0.4.1 +cryptography==43.0.0 +cubinlinker==0.3.0+2.g405ac64 +cuda-python==12.3.0rc4+9.gdb8c48a.dirty +cudf==23.12.0 +cugraph-dgl==23.12.0 +cugraph-service-client==23.12.0 +cugraph-service-server==23.12.0 +cugraph==23.12.0 +cuml==23.12.0 +cupy-cuda12x==12.3.0 +cycler==0.12.1 +cymem==2.0.8 +cython==3.0.8 +dask-cuda==23.12.0 +dask-cudf==23.12.0 +dask==2023.11.0 +dataclasses-json==0.6.7 +dataproperty==1.0.1 +datasets==2.20.0 +debugpy==1.8.1 +decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.8 +distlib==0.3.8 +distributed==2023.11.0 +distro==1.9.0 +dm-tree==0.1.8 +docker-pycreds==0.4.0 +dulwich==0.21.7 +einops==0.7.0 +emoji==2.12.1 +entmax==1.3 +evaluate==0.4.2 +exceptiongroup==1.2.0 +execnet==2.0.2 +executing==2.0.1 +expecttest==0.1.3 +fastjsonschema==2.19.1 +fastparquet==2023.10.1 +fastrlock==0.8.2 +filelock==3.13.1 +flash-attn==2.4.2 +fonttools==4.48.1 +frozenlist==1.4.1 +fsspec==2023.12.2 +fugashi==1.3.2 +fuzzywuzzy==0.18.0 +gast==0.5.4 +gitdb==4.0.11 +gitpython==3.1.43 +google-auth-oauthlib==0.4.6 +google-auth==2.27.0 +graphsurgeon==0.4.6 +greenlet==3.0.3 +grpcio==1.60.1 +h11==0.14.0 +httpcore==1.0.5 +httpx==0.27.0 +huggingface-hub==0.24.5 +hydra-core==1.3.2 +hypothesis==5.35.1 +idna==3.6 +importlib-metadata==7.0.1 +iniconfig==2.0.0 +installer==0.7.0 +intel-openmp==2021.4.0 +ipadic==1.0.0 +ipykernel==6.29.2 +ipython-genutils==0.2.0 +ipython==8.21.0 +isort==5.13.2 +jaraco.classes==3.4.0 +jedi==0.19.1 +jeepney==0.8.0 +jinja2==3.1.3 +jiter==0.5.0 +joblib==1.3.2 +json5==0.9.14 +jsonargparse==3.13.1 +jsonlines==4.0.0 +jsonnet==0.19.1 +jsonpatch==1.33 +jsonpointer==3.0.0 +jsonschema-specifications==2023.12.1 +jsonschema==4.21.1 +jupyter-client==8.6.0 +jupyter-core==5.7.1 +jupyter-tensorboard==0.2.0 +jupyterlab-pygments==0.3.0 +jupyterlab-server==1.2.0 +jupyterlab==2.3.2 +jupytext==1.16.1 +keyring==24.3.1 +kiwisolver==1.4.5 +langchain-community==0.2.12 +langchain-core==0.2.31 +langchain-huggingface==0.0.2 +langchain-openai==0.1.21 +langchain-text-splitters==0.2.2 +langchain==0.2.13 +langcodes==3.3.0 +langsmith==0.1.99 +lazy-loader==0.3 +levenshtein==0.25.1 +librosa==0.10.1 +lightning-utilities==0.11.6 +llm-jp-eval==1.4.0 +llvmlite==0.40.1 +lm-eval==0.3.0 +locket==1.0.0 +logzero==1.7.0 +lxml==5.2.2 +markdown-it-py==3.0.0 +markdown==3.5.2 +markupsafe==2.1.4 +marshmallow==3.21.3 +matplotlib-inline==0.1.6 +matplotlib==3.8.2 +mbstrdecoder==1.1.3 +mccabe==0.7.0 +mdit-py-plugins==0.4.0 +mdurl==0.1.2 +mecab-python3==1.0.6 +mistune==3.0.2 +mkl-devel==2021.1.1 +mkl-include==2021.1.1 +mkl==2021.1.1 +mock==5.1.0 +mojimoji==0.0.13 +more-itertools==9.1.0 +mpmath==1.3.0 +msgpack==1.0.7 +multidict==6.0.4 +multiprocess==0.70.16 +murmurhash==1.0.10 +mypy-extensions==1.0.0 +nbclient==0.9.0 +nbconvert==7.16.0 +nbformat==5.9.2 +neologdn==0.5.3 +nest-asyncio==1.6.0 +networkx==2.6.3 +ninja==1.11.1.1 +nltk==3.8.1 +notebook==6.4.10 +numba==0.57.1+1.g1ff679645 +numexpr==2.10.1 +numpy==1.24.4 +nvfuser==0.1.4a0+d0bb811 +nvidia-dali-cuda120==1.34.0 +nvidia-pyindex==1.0.9 +nvtx==0.2.5 +oauthlib==3.2.2 +omegaconf==2.3.0 +onnx==1.15.0rc2 +openai==1.40.6 +opencv==4.7.0 +optree==0.10.0 +orjson==3.10.7 +packaging==23.2 +pandas==2.2.2 +pandocfilters==1.5.1 +parso==0.8.3 +partd==1.4.1 +pathvalidate==3.2.0 +peft==0.5.0 +pexpect==4.9.0 +pillow==10.2.0 +pip==24.0 +pkginfo==1.11.1 +plac==1.4.3 +platformdirs==4.2.0 +pluggy==1.4.0 +ply==3.11 +poetry-core==1.9.0 +poetry-plugin-export==1.8.0 +poetry==1.8.3 +polygraphy==0.49.4 +pooch==1.8.0 +portalocker==2.10.1 +preshed==3.0.9 +prettytable==3.9.0 +prometheus-client==0.19.0 +prompt-toolkit==3.0.43 +protobuf==4.24.4 +psutil==5.9.4 +ptxcompiler==0.8.1+2.g0d406d6 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow-hotfix==0.6 +pyarrow==15.0.2 +pyasn1-modules==0.3.0 +pyasn1==0.5.1 +pybind11-global==2.11.1 +pybind11==2.11.1 +pycocotools==2.0+nv0.8.0 +pycountry==24.6.1 +pycparser==2.21 +pydantic-core==2.16.2 +pydantic==2.6.1 +pygments==2.17.2 +pylibcugraph==23.12.0 +pylibcugraphops==23.12.0 +pylibraft==23.12.0 +pylint==3.2.6 +pynvml==11.4.1 +pyparsing==3.1.1 +pyproject-hooks==1.1.0 +pytablewriter==1.2.0 +pytest-flakefinder==1.1.0 +pytest-rerunfailures==13.0 +pytest-shard==0.1.2 +pytest-xdist==3.5.0 +pytest==8.0.0 +python-dateutil==2.8.2 +python-dotenv==1.0.0 +python-hostlist==1.23.0 +python-levenshtein==0.25.1 +pytorch-lightning==2.4.0 +pytorch-quantization==2.1.2 +pytz==2023.3.post1 +pyyaml==6.0.1 +pyzmq==25.1.2 +raft-dask==23.12.0 +rapidfuzz==3.9.6 +rapids-dask-dependency==23.12.1 +referencing==0.33.0 +regex==2023.12.25 +requests-oauthlib==1.3.1 +requests-toolbelt==1.0.0 +requests==2.32.3 +rhoknp==1.7.0 +rich==13.7.0 +rmm==23.12.0 +rouge-score==0.1.2 +rpds-py==0.17.1 +rsa==4.9 +sacrebleu==2.4.2 +safetensors==0.4.3 +scikit-learn==1.5.1 +scipy==1.12.0 +secretstorage==3.3.3 +send2trash==1.8.2 +sentence-transformers==3.0.1 +sentencepiece==0.1.99 +sentry-sdk==2.12.0 +setproctitle==1.3.3 +setuptools==68.2.2 +shellingham==1.5.4 +six==1.16.0 +smart-open==6.4.0 +smmap==5.0.1 +sniffio==1.3.1 +sortedcontainers==2.4.0 +soundfile==0.12.1 +soupsieve==2.5 +soxr==0.3.7 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 +spacy==3.7.2 +sphinx-glpi-theme==0.6 +sqlalchemy==2.0.32 +sqlitedict==2.1.0 +srsly==2.4.8 +stack-data==0.6.3 +sumeval==0.2.2 +sympy==1.12 +tabledata==1.3.3 +tabulate==0.9.0 +tbb==2021.11.0 +tblib==3.0.0 +tcolorpy==0.1.6 +tenacity==8.5.0 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.9.0 +tensorrt==8.6.3 +terminado==0.18.0 +termplotlib==0.3.9 +text-generation==0.7.0 +thinc==8.2.3 +threadpoolctl==3.2.0 +thriftpy2==0.4.17 +tiktoken==0.7.0 +tinycss2==1.2.1 +tokenizers==0.19.1 +toml==0.10.2 +tomli==2.0.1 +tomlkit==0.13.2 +toolz==0.12.1 +torch-tensorrt==2.3.0a0 +torch==2.3.0a0+ebedce2 +torchdata==0.7.1a0 +torchmetrics==0.10.3 +torchtext==0.17.0a0 +torchvision==0.18.0a0 +tornado==6.4 +tqdm-multiprocess==0.0.11 +tqdm==4.66.5 +traitlets==5.9.0 +transformer-engine==1.3.0+5b90b7f +transformers==4.43.3 +treelite-runtime==3.9.1 +treelite==3.9.1 +triton==2.2.0+e28a256 +trove-classifiers==2024.7.2 +typepy==1.3.2 +typer==0.9.0 +types-dataclasses==0.6.6 +typing-extensions==4.12.2 +typing-inspect==0.9.0 +tzdata==2024.1 +ucx-py==0.35.0 +uff==0.6.9 +ujson==5.8.0 +unbabel-comet==2.2.2 +unidic-lite==1.0.8 +urllib3==1.26.18 +virtualenv==20.26.3 +wandb==0.16.3 +wasabi==1.1.2 +wcwidth==0.2.13 +weasel==0.3.4 +webencodings==0.5.1 +werkzeug==3.0.1 +wheel==0.42.0 +word2number==1.1 +xdoctest==1.0.2 +xgboost==1.7.6 +xmltodict==0.13.0 +xxhash==3.4.1 +yarl==1.9.4 +zict==3.0.0 +zipp==3.17.0 +zstandard==0.23.0 \ No newline at end of file diff --git a/wandb/run-20240823_154448-v9m85jnt/files/wandb-metadata.json b/wandb/run-20240823_154448-v9m85jnt/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..603bf7390a1c080ed711568c82021c4037062b64 --- /dev/null +++ b/wandb/run-20240823_154448-v9m85jnt/files/wandb-metadata.json @@ -0,0 +1,220 @@ +{ + "os": "Linux-5.15.0-91-generic-x86_64-with-glibc2.35", + "python": "3.10.12", + "heartbeatAt": "2024-08-23T06:44:49.486428", + "startedAt": "2024-08-23T06:44:48.878270", + "docker": null, + "cuda": null, + "args": [ + "--seq-length", + "2048", + "--sliding-window-size", + "4096", + "--micro-batch-size", + "1", + "--valid_micro_batch_size", + "1", + "--global-batch-size", + "320", + "--train-iters", + "7500", + "--tokenizer-type", + "HFPreTrainedTokenizer", + "--tokenizer-model", + "/share/pretrained_lm/Qwen/Qwen2-0.5B", + "--train-data-path", + "1754785366", + "/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document", + "28623823675", + "/project/datas/llm-jp-corpus-v2/ja-cc/level0/data_text_document", + "--valid-data-path", + "1754785366", + "/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document", + "--test-data-path", + "1754785366", + "/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document", + "--lr", + "2e-5", + "--min-lr", + "1e-6", + "--lr-decay-style", + "cosine", + "--lr-warmup-iters", + "500", + "--lr-decay-iters", + "7500", + "--weight-decay", + "0.1", + "--grad-clip-norm", + "1.0", + "--optimizer", + "anyprecision", + "--adam-beta1", + "0.9", + "--adam-beta2", + "0.95", + "--adam-eps", + "1e-6", + "--save-interval", + "10", + "--eval-interval", + "10", + "--eval-iters", + "10", + "--bf16", + "--mixed-precision", + "--base-model", + "/share/pretrained_lm/Qwen/Qwen2-0.5B", + "--save", + "/work/llm_recipes/models/Qwen2-0.5b-0.2", + "--load", + "/work/llm_recipes/models/Qwen2-0.5b-0.2", + "--fsdp-activation-checkpointing", + "--sharding-strategy", + "FULL_SHARD", + "--checkpoint-type", + "LOCAL_STATE_DICT", + "--save-n-checkpoints", + "10", + "--upload-all-checkpoints-to-hf", + "--hf-upload-retry-limit", + "2", + "--hf-repo-id", + "koichi12/Qwen2-0.5b-0.2", + "--wandb-entity", + "iwakawa-koichi-q5-tohoku-nlp6723", + "--wandb-project", + "llm_tutorial-0.2", + "--wandb-name", + "Qwen2-0.5b-0.2_train_2024-08-23-15:44:18" + ], + "state": "running", + "program": "/project/examples/finetuning.py", + "codePathLocal": "examples/finetuning.py", + "codePath": "examples/finetuning.py", + "git": { + "remote": "https://github.com/cl-tohoku/llm-recipes-failab-m1-yans.git", + "commit": "887a2cc5d104c10264701f95cbbb0a6a116768d6" + }, + "email": null, + "root": "/project", + "host": "gpu-koiwa-00", + "username": "koiwa", + "executable": "/usr/bin/python", + "cpu_count": 18, + "cpu_count_logical": 18, + "cpu_freq": { + "current": 2400.0389999999993, + "min": 0.0, + "max": 0.0 + }, + "cpu_freq_per_core": [ + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + } + ], + "disk": { + "/": { + "total": 0.0625, + "used": 1.1444091796875e-05 + } + }, + "gpu": "NVIDIA A100-SXM4-40GB", + "gpu_count": 1, + "gpu_devices": [ + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + } + ], + "memory": { + "total": 56.487831115722656 + } +} diff --git a/wandb/run-20240823_154448-v9m85jnt/files/wandb-summary.json b/wandb/run-20240823_154448-v9m85jnt/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..971d47939227bae0ec95239d24debe3d22478dc7 --- /dev/null +++ b/wandb/run-20240823_154448-v9m85jnt/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 1}} \ No newline at end of file diff --git a/wandb/run-20240823_154448-v9m85jnt/logs/debug-internal.log b/wandb/run-20240823_154448-v9m85jnt/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..c2f5b936ee7dbe65688576e6ed387d071ed5ae8c --- /dev/null +++ b/wandb/run-20240823_154448-v9m85jnt/logs/debug-internal.log @@ -0,0 +1,189 @@ +2024-08-23 15:44:48,892 INFO StreamThr :10032 [internal.py:wandb_internal():86] W&B internal server running at pid: 10032, started at: 2024-08-23 15:44:48.891774 +2024-08-23 15:44:48,893 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: status +2024-08-23 15:44:48,896 INFO WriterThread:10032 [datastore.py:open_for_write():87] open: /project/wandb/run-20240823_154448-v9m85jnt/run-v9m85jnt.wandb +2024-08-23 15:44:48,897 DEBUG SenderThread:10032 [sender.py:send():382] send: header +2024-08-23 15:44:48,913 DEBUG SenderThread:10032 [sender.py:send():382] send: run +2024-08-23 15:44:49,390 INFO SenderThread:10032 [dir_watcher.py:__init__():211] watching files in: /project/wandb/run-20240823_154448-v9m85jnt/files +2024-08-23 15:44:49,390 INFO SenderThread:10032 [sender.py:_start_run_threads():1136] run started: v9m85jnt with start time 1724395488.891619 +2024-08-23 15:44:49,395 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: check_version +2024-08-23 15:44:49,396 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: check_version +2024-08-23 15:44:49,467 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: run_start +2024-08-23 15:44:49,473 DEBUG HandlerThread:10032 [system_info.py:__init__():27] System info init +2024-08-23 15:44:49,474 DEBUG HandlerThread:10032 [system_info.py:__init__():42] System info init done +2024-08-23 15:44:49,474 INFO HandlerThread:10032 [system_monitor.py:start():194] Starting system monitor +2024-08-23 15:44:49,474 INFO SystemMonitor:10032 [system_monitor.py:_start():158] Starting system asset monitoring threads +2024-08-23 15:44:49,474 INFO HandlerThread:10032 [system_monitor.py:probe():214] Collecting system info +2024-08-23 15:44:49,474 INFO SystemMonitor:10032 [interfaces.py:start():190] Started cpu monitoring +2024-08-23 15:44:49,475 INFO SystemMonitor:10032 [interfaces.py:start():190] Started disk monitoring +2024-08-23 15:44:49,475 INFO SystemMonitor:10032 [interfaces.py:start():190] Started gpu monitoring +2024-08-23 15:44:49,475 INFO SystemMonitor:10032 [interfaces.py:start():190] Started memory monitoring +2024-08-23 15:44:49,476 INFO SystemMonitor:10032 [interfaces.py:start():190] Started network monitoring +2024-08-23 15:44:49,486 DEBUG HandlerThread:10032 [system_info.py:probe():151] Probing system +2024-08-23 15:44:49,488 DEBUG HandlerThread:10032 [system_info.py:_probe_git():136] Probing git +2024-08-23 15:44:49,500 DEBUG HandlerThread:10032 [system_info.py:_probe_git():144] Probing git done +2024-08-23 15:44:49,500 DEBUG HandlerThread:10032 [system_info.py:probe():199] Probing system done +2024-08-23 15:44:49,500 DEBUG HandlerThread:10032 [system_monitor.py:probe():223] {'os': 'Linux-5.15.0-91-generic-x86_64-with-glibc2.35', 'python': '3.10.12', 'heartbeatAt': '2024-08-23T06:44:49.486428', 'startedAt': '2024-08-23T06:44:48.878270', 'docker': None, 'cuda': None, 'args': ('--seq-length', '2048', '--sliding-window-size', '4096', '--micro-batch-size', '1', '--valid_micro_batch_size', '1', '--global-batch-size', '320', '--train-iters', '7500', '--tokenizer-type', 'HFPreTrainedTokenizer', '--tokenizer-model', '/share/pretrained_lm/Qwen/Qwen2-0.5B', '--train-data-path', '1754785366', '/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document', '28623823675', '/project/datas/llm-jp-corpus-v2/ja-cc/level0/data_text_document', '--valid-data-path', '1754785366', '/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document', '--test-data-path', '1754785366', '/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document', '--lr', '2e-5', '--min-lr', '1e-6', '--lr-decay-style', 'cosine', '--lr-warmup-iters', '500', '--lr-decay-iters', '7500', '--weight-decay', '0.1', '--grad-clip-norm', '1.0', '--optimizer', 'anyprecision', '--adam-beta1', '0.9', '--adam-beta2', '0.95', '--adam-eps', '1e-6', '--save-interval', '10', '--eval-interval', '10', '--eval-iters', '10', '--bf16', '--mixed-precision', '--base-model', '/share/pretrained_lm/Qwen/Qwen2-0.5B', '--save', '/work/llm_recipes/models/Qwen2-0.5b-0.2', '--load', '/work/llm_recipes/models/Qwen2-0.5b-0.2', '--fsdp-activation-checkpointing', '--sharding-strategy', 'FULL_SHARD', '--checkpoint-type', 'LOCAL_STATE_DICT', '--save-n-checkpoints', '10', '--upload-all-checkpoints-to-hf', '--hf-upload-retry-limit', '2', '--hf-repo-id', 'koichi12/Qwen2-0.5b-0.2', '--wandb-entity', 'iwakawa-koichi-q5-tohoku-nlp6723', '--wandb-project', 'llm_tutorial-0.2', '--wandb-name', 'Qwen2-0.5b-0.2_train_2024-08-23-15:44:18'), 'state': 'running', 'program': '/project/examples/finetuning.py', 'codePathLocal': 'examples/finetuning.py', 'codePath': 'examples/finetuning.py', 'git': {'remote': 'https://github.com/cl-tohoku/llm-recipes-failab-m1-yans.git', 'commit': '887a2cc5d104c10264701f95cbbb0a6a116768d6'}, 'email': None, 'root': '/project', 'host': 'gpu-koiwa-00', 'username': 'koiwa', 'executable': '/usr/bin/python', 'cpu_count': 18, 'cpu_count_logical': 18, 'cpu_freq': {'current': 2400.0389999999993, 'min': 0.0, 'max': 0.0}, 'cpu_freq_per_core': [{'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}], 'disk': {'/': {'total': 0.0625, 'used': 1.1444091796875e-05}}, 'gpu': 'NVIDIA A100-SXM4-40GB', 'gpu_count': 1, 'gpu_devices': [{'name': 'NVIDIA A100-SXM4-40GB', 'memory_total': 42949672960}], 'memory': {'total': 56.487831115722656}} +2024-08-23 15:44:49,500 INFO HandlerThread:10032 [system_monitor.py:probe():224] Finished collecting system info +2024-08-23 15:44:49,500 INFO HandlerThread:10032 [system_monitor.py:probe():227] Publishing system info +2024-08-23 15:44:49,502 INFO HandlerThread:10032 [system_monitor.py:probe():229] Finished publishing system info +2024-08-23 15:44:49,528 DEBUG SenderThread:10032 [sender.py:send():382] send: files +2024-08-23 15:44:49,529 INFO SenderThread:10032 [sender.py:_save_file():1403] saving file wandb-metadata.json with policy now +2024-08-23 15:44:49,540 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: python_packages +2024-08-23 15:44:49,540 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:44:49,540 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:44:49,541 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: python_packages +2024-08-23 15:44:49,543 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:44:49,740 DEBUG SenderThread:10032 [sender.py:send():382] send: telemetry +2024-08-23 15:44:50,157 INFO wandb-upload_0:10032 [upload_job.py:push():131] Uploaded file /tmp/tmp_akktvpmwandb/xbudf9th-wandb-metadata.json +2024-08-23 15:44:50,392 INFO Thread-12 :10032 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240823_154448-v9m85jnt/files/wandb-metadata.json +2024-08-23 15:44:50,392 INFO Thread-12 :10032 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240823_154448-v9m85jnt/files/requirements.txt +2024-08-23 15:44:50,392 INFO Thread-12 :10032 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240823_154448-v9m85jnt/files/output.log +2024-08-23 15:44:50,729 DEBUG SenderThread:10032 [sender.py:send():382] send: exit +2024-08-23 15:44:50,729 INFO SenderThread:10032 [sender.py:send_exit():589] handling exit code: 1 +2024-08-23 15:44:50,730 INFO SenderThread:10032 [sender.py:send_exit():591] handling runtime: 1 +2024-08-23 15:44:50,731 INFO SenderThread:10032 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 15:44:50,731 INFO SenderThread:10032 [sender.py:send_exit():597] send defer +2024-08-23 15:44:50,731 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: defer +2024-08-23 15:44:50,731 INFO HandlerThread:10032 [handler.py:handle_request_defer():172] handle defer: 0 +2024-08-23 15:44:50,731 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: defer +2024-08-23 15:44:50,732 INFO SenderThread:10032 [sender.py:send_request_defer():613] handle sender defer: 0 +2024-08-23 15:44:50,732 INFO SenderThread:10032 [sender.py:transition_state():617] send defer: 1 +2024-08-23 15:44:50,732 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: defer +2024-08-23 15:44:50,732 INFO HandlerThread:10032 [handler.py:handle_request_defer():172] handle defer: 1 +2024-08-23 15:44:50,732 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: defer +2024-08-23 15:44:50,732 INFO SenderThread:10032 [sender.py:send_request_defer():613] handle sender defer: 1 +2024-08-23 15:44:50,732 INFO SenderThread:10032 [sender.py:transition_state():617] send defer: 2 +2024-08-23 15:44:50,732 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: defer +2024-08-23 15:44:50,732 INFO HandlerThread:10032 [handler.py:handle_request_defer():172] handle defer: 2 +2024-08-23 15:44:50,732 INFO HandlerThread:10032 [system_monitor.py:finish():203] Stopping system monitor +2024-08-23 15:44:50,732 INFO HandlerThread:10032 [interfaces.py:finish():202] Joined cpu monitor +2024-08-23 15:44:50,733 DEBUG SystemMonitor:10032 [system_monitor.py:_start():172] Starting system metrics aggregation loop +2024-08-23 15:44:50,733 INFO HandlerThread:10032 [interfaces.py:finish():202] Joined disk monitor +2024-08-23 15:44:50,733 DEBUG SystemMonitor:10032 [system_monitor.py:_start():179] Finished system metrics aggregation loop +2024-08-23 15:44:50,733 DEBUG SystemMonitor:10032 [system_monitor.py:_start():183] Publishing last batch of metrics +2024-08-23 15:44:50,765 INFO HandlerThread:10032 [interfaces.py:finish():202] Joined gpu monitor +2024-08-23 15:44:50,765 INFO HandlerThread:10032 [interfaces.py:finish():202] Joined memory monitor +2024-08-23 15:44:50,765 INFO HandlerThread:10032 [interfaces.py:finish():202] Joined network monitor +2024-08-23 15:44:50,766 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: defer +2024-08-23 15:44:50,766 INFO SenderThread:10032 [sender.py:send_request_defer():613] handle sender defer: 2 +2024-08-23 15:44:50,766 INFO SenderThread:10032 [sender.py:transition_state():617] send defer: 3 +2024-08-23 15:44:50,766 DEBUG SenderThread:10032 [sender.py:send():382] send: stats +2024-08-23 15:44:50,766 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: defer +2024-08-23 15:44:50,766 INFO HandlerThread:10032 [handler.py:handle_request_defer():172] handle defer: 3 +2024-08-23 15:44:50,766 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: defer +2024-08-23 15:44:50,766 INFO SenderThread:10032 [sender.py:send_request_defer():613] handle sender defer: 3 +2024-08-23 15:44:50,766 INFO SenderThread:10032 [sender.py:transition_state():617] send defer: 4 +2024-08-23 15:44:50,767 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: defer +2024-08-23 15:44:50,767 INFO HandlerThread:10032 [handler.py:handle_request_defer():172] handle defer: 4 +2024-08-23 15:44:50,767 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: defer +2024-08-23 15:44:50,767 INFO SenderThread:10032 [sender.py:send_request_defer():613] handle sender defer: 4 +2024-08-23 15:44:50,767 INFO SenderThread:10032 [sender.py:transition_state():617] send defer: 5 +2024-08-23 15:44:50,767 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: defer +2024-08-23 15:44:50,767 INFO HandlerThread:10032 [handler.py:handle_request_defer():172] handle defer: 5 +2024-08-23 15:44:50,767 DEBUG SenderThread:10032 [sender.py:send():382] send: summary +2024-08-23 15:44:50,768 INFO SenderThread:10032 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 15:44:50,768 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: defer +2024-08-23 15:44:50,768 INFO SenderThread:10032 [sender.py:send_request_defer():613] handle sender defer: 5 +2024-08-23 15:44:50,768 INFO SenderThread:10032 [sender.py:transition_state():617] send defer: 6 +2024-08-23 15:44:50,768 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: defer +2024-08-23 15:44:50,768 INFO HandlerThread:10032 [handler.py:handle_request_defer():172] handle defer: 6 +2024-08-23 15:44:50,768 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: defer +2024-08-23 15:44:50,769 INFO SenderThread:10032 [sender.py:send_request_defer():613] handle sender defer: 6 +2024-08-23 15:44:50,771 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:44:50,957 INFO SenderThread:10032 [sender.py:transition_state():617] send defer: 7 +2024-08-23 15:44:50,957 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: defer +2024-08-23 15:44:50,957 INFO HandlerThread:10032 [handler.py:handle_request_defer():172] handle defer: 7 +2024-08-23 15:44:50,958 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: defer +2024-08-23 15:44:50,958 INFO SenderThread:10032 [sender.py:send_request_defer():613] handle sender defer: 7 +2024-08-23 15:44:51,392 INFO Thread-12 :10032 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154448-v9m85jnt/files/config.yaml +2024-08-23 15:44:51,392 INFO Thread-12 :10032 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240823_154448-v9m85jnt/files/wandb-summary.json +2024-08-23 15:44:51,729 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-23 15:44:52,393 INFO Thread-12 :10032 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154448-v9m85jnt/files/output.log +2024-08-23 15:44:52,721 INFO SenderThread:10032 [sender.py:transition_state():617] send defer: 8 +2024-08-23 15:44:52,721 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: poll_exit +2024-08-23 15:44:52,721 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: defer +2024-08-23 15:44:52,721 INFO HandlerThread:10032 [handler.py:handle_request_defer():172] handle defer: 8 +2024-08-23 15:44:52,721 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: defer +2024-08-23 15:44:52,721 INFO SenderThread:10032 [sender.py:send_request_defer():613] handle sender defer: 8 +2024-08-23 15:44:52,721 INFO SenderThread:10032 [job_builder.py:build():296] Attempting to build job artifact +2024-08-23 15:44:52,722 INFO SenderThread:10032 [job_builder.py:_get_source_type():426] is repo sourced job +2024-08-23 15:44:52,730 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-23 15:44:52,737 INFO SenderThread:10032 [job_builder.py:build():402] adding wandb-job metadata file +2024-08-23 15:44:52,746 INFO SenderThread:10032 [sender.py:transition_state():617] send defer: 9 +2024-08-23 15:44:52,747 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: poll_exit +2024-08-23 15:44:52,747 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: defer +2024-08-23 15:44:52,747 DEBUG SenderThread:10032 [sender.py:send():382] send: artifact +2024-08-23 15:44:52,747 INFO HandlerThread:10032 [handler.py:handle_request_defer():172] handle defer: 9 +2024-08-23 15:44:53,393 INFO Thread-12 :10032 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154448-v9m85jnt/files/output.log +2024-08-23 15:44:53,730 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-23 15:44:54,153 INFO wandb-upload_1:10032 [upload_job.py:push():89] Uploaded file /singularity_home/.local/share/wandb/artifacts/staging/tmp_o6jbw71 +2024-08-23 15:44:54,878 INFO wandb-upload_0:10032 [upload_job.py:push():89] Uploaded file /singularity_home/.local/share/wandb/artifacts/staging/tmpdgbh2byi +2024-08-23 15:44:55,934 INFO SenderThread:10032 [sender.py:send_artifact():1494] sent artifact job-https___github.com_cl-tohoku_llm-recipes-failab-m1-yans.git_examples_finetuning.py - {'id': 'QXJ0aWZhY3Q6MTE2MTk3MTc1OA==', 'state': 'PENDING', 'artifactSequence': {'id': 'QXJ0aWZhY3RDb2xsZWN0aW9uOjQxNjQ1ODQ1MA==', 'latestArtifact': None}} +2024-08-23 15:44:55,934 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: defer +2024-08-23 15:44:55,934 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:44:55,934 INFO SenderThread:10032 [sender.py:send_request_defer():613] handle sender defer: 9 +2024-08-23 15:44:55,934 INFO SenderThread:10032 [dir_watcher.py:finish():358] shutting down directory watcher +2024-08-23 15:44:56,394 INFO SenderThread:10032 [dir_watcher.py:finish():388] scan: /project/wandb/run-20240823_154448-v9m85jnt/files +2024-08-23 15:44:56,395 INFO SenderThread:10032 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240823_154448-v9m85jnt/files/requirements.txt requirements.txt +2024-08-23 15:44:56,395 INFO SenderThread:10032 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240823_154448-v9m85jnt/files/config.yaml config.yaml +2024-08-23 15:44:56,396 INFO SenderThread:10032 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240823_154448-v9m85jnt/files/wandb-metadata.json wandb-metadata.json +2024-08-23 15:44:56,396 INFO SenderThread:10032 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240823_154448-v9m85jnt/files/wandb-summary.json wandb-summary.json +2024-08-23 15:44:56,398 INFO SenderThread:10032 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240823_154448-v9m85jnt/files/output.log output.log +2024-08-23 15:44:56,399 INFO SenderThread:10032 [sender.py:transition_state():617] send defer: 10 +2024-08-23 15:44:56,399 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: poll_exit +2024-08-23 15:44:56,399 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: defer +2024-08-23 15:44:56,401 INFO HandlerThread:10032 [handler.py:handle_request_defer():172] handle defer: 10 +2024-08-23 15:44:56,401 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: defer +2024-08-23 15:44:56,401 INFO SenderThread:10032 [sender.py:send_request_defer():613] handle sender defer: 10 +2024-08-23 15:44:56,401 INFO SenderThread:10032 [file_pusher.py:finish():172] shutting down file pusher +2024-08-23 15:44:56,731 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-23 15:44:56,731 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: poll_exit +2024-08-23 15:44:56,790 INFO wandb-upload_1:10032 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240823_154448-v9m85jnt/files/requirements.txt +2024-08-23 15:44:56,818 INFO wandb-upload_0:10032 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240823_154448-v9m85jnt/files/config.yaml +2024-08-23 15:44:56,848 INFO wandb-upload_2:10032 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240823_154448-v9m85jnt/files/wandb-summary.json +2024-08-23 15:44:56,865 INFO wandb-upload_3:10032 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240823_154448-v9m85jnt/files/output.log +2024-08-23 15:44:57,065 INFO Thread-11 (_thread_body):10032 [sender.py:transition_state():617] send defer: 11 +2024-08-23 15:44:57,065 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: defer +2024-08-23 15:44:57,065 INFO HandlerThread:10032 [handler.py:handle_request_defer():172] handle defer: 11 +2024-08-23 15:44:57,065 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: defer +2024-08-23 15:44:57,065 INFO SenderThread:10032 [sender.py:send_request_defer():613] handle sender defer: 11 +2024-08-23 15:44:57,065 INFO SenderThread:10032 [file_pusher.py:join():178] waiting for file pusher +2024-08-23 15:44:57,066 INFO SenderThread:10032 [sender.py:transition_state():617] send defer: 12 +2024-08-23 15:44:57,066 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: defer +2024-08-23 15:44:57,066 INFO HandlerThread:10032 [handler.py:handle_request_defer():172] handle defer: 12 +2024-08-23 15:44:57,066 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: defer +2024-08-23 15:44:57,066 INFO SenderThread:10032 [sender.py:send_request_defer():613] handle sender defer: 12 +2024-08-23 15:44:57,066 INFO SenderThread:10032 [file_stream.py:finish():595] file stream finish called +2024-08-23 15:44:57,271 INFO SenderThread:10032 [file_stream.py:finish():599] file stream finish is done +2024-08-23 15:44:57,271 INFO SenderThread:10032 [sender.py:transition_state():617] send defer: 13 +2024-08-23 15:44:57,271 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: defer +2024-08-23 15:44:57,271 INFO HandlerThread:10032 [handler.py:handle_request_defer():172] handle defer: 13 +2024-08-23 15:44:57,271 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: defer +2024-08-23 15:44:57,271 INFO SenderThread:10032 [sender.py:send_request_defer():613] handle sender defer: 13 +2024-08-23 15:44:57,271 INFO SenderThread:10032 [sender.py:transition_state():617] send defer: 14 +2024-08-23 15:44:57,271 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: defer +2024-08-23 15:44:57,271 DEBUG SenderThread:10032 [sender.py:send():382] send: final +2024-08-23 15:44:57,271 INFO HandlerThread:10032 [handler.py:handle_request_defer():172] handle defer: 14 +2024-08-23 15:44:57,271 DEBUG SenderThread:10032 [sender.py:send():382] send: footer +2024-08-23 15:44:57,272 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: defer +2024-08-23 15:44:57,272 INFO SenderThread:10032 [sender.py:send_request_defer():613] handle sender defer: 14 +2024-08-23 15:44:57,272 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-23 15:44:57,272 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-23 15:44:57,272 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: server_info +2024-08-23 15:44:57,273 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: get_summary +2024-08-23 15:44:57,273 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: sampled_history +2024-08-23 15:44:57,273 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: poll_exit +2024-08-23 15:44:57,273 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:44:57,273 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: poll_exit +2024-08-23 15:44:57,274 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: server_info +2024-08-23 15:44:57,275 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: job_info +2024-08-23 15:44:57,441 DEBUG SenderThread:10032 [sender.py:send_request():409] send_request: job_info +2024-08-23 15:44:57,441 INFO MainThread:10032 [wandb_run.py:_footer_history_summary_info():3866] rendering history +2024-08-23 15:44:57,441 INFO MainThread:10032 [wandb_run.py:_footer_history_summary_info():3898] rendering summary +2024-08-23 15:44:57,441 INFO MainThread:10032 [wandb_run.py:_footer_sync_info():3825] logging synced files +2024-08-23 15:44:57,441 DEBUG HandlerThread:10032 [handler.py:handle_request():146] handle_request: shutdown +2024-08-23 15:44:57,441 INFO HandlerThread:10032 [handler.py:finish():869] shutting down handler +2024-08-23 15:44:58,275 INFO WriterThread:10032 [datastore.py:close():296] close: /project/wandb/run-20240823_154448-v9m85jnt/run-v9m85jnt.wandb +2024-08-23 15:44:58,441 INFO SenderThread:10032 [sender.py:finish():1572] shutting down sender +2024-08-23 15:44:58,441 INFO SenderThread:10032 [file_pusher.py:finish():172] shutting down file pusher +2024-08-23 15:44:58,441 INFO SenderThread:10032 [file_pusher.py:join():178] waiting for file pusher diff --git a/wandb/run-20240823_154448-v9m85jnt/logs/debug.log b/wandb/run-20240823_154448-v9m85jnt/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..6958ad0ac931c7cd6a2e4f821938414d7770a2ed --- /dev/null +++ b/wandb/run-20240823_154448-v9m85jnt/logs/debug.log @@ -0,0 +1,28 @@ +2024-08-23 15:44:48,884 INFO MainThread:9961 [wandb_setup.py:_flush():76] Current SDK version is 0.16.3 +2024-08-23 15:44:48,884 INFO MainThread:9961 [wandb_setup.py:_flush():76] Configure stats pid to 9961 +2024-08-23 15:44:48,884 INFO MainThread:9961 [wandb_setup.py:_flush():76] Loading settings from /singularity_home/.config/wandb/settings +2024-08-23 15:44:48,884 INFO MainThread:9961 [wandb_setup.py:_flush():76] Loading settings from /project/wandb/settings +2024-08-23 15:44:48,884 INFO MainThread:9961 [wandb_setup.py:_flush():76] Loading settings from environment variables: {'api_key': '***REDACTED***', 'run_notes': 'Train sample'} +2024-08-23 15:44:48,884 INFO MainThread:9961 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False} +2024-08-23 15:44:48,884 INFO MainThread:9961 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'examples/finetuning.py', 'program_abspath': '/project/examples/finetuning.py', 'program': '/project/examples/finetuning.py'} +2024-08-23 15:44:48,884 INFO MainThread:9961 [wandb_init.py:_log_setup():526] Logging user logs to /project/wandb/run-20240823_154448-v9m85jnt/logs/debug.log +2024-08-23 15:44:48,884 INFO MainThread:9961 [wandb_init.py:_log_setup():527] Logging internal logs to /project/wandb/run-20240823_154448-v9m85jnt/logs/debug-internal.log +2024-08-23 15:44:48,884 INFO MainThread:9961 [wandb_init.py:init():566] calling init triggers +2024-08-23 15:44:48,884 INFO MainThread:9961 [wandb_init.py:init():573] wandb.init called with sweep_config: {} +config: {'sharding_strategy': 'FULL_SHARD', 'checkpoint_type': 'LOCAL_STATE_DICT', 'fsdp_activation_checkpointing': True, 'fsdp_cpu_offload': False, 'low_cpu_fsdp': False, 'no_meta_device': False, 'data_path': None, 'split': '969, 30, 1', 'train_data_path': ['1754785366', '/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document', '28623823675', '/project/datas/llm-jp-corpus-v2/ja-cc/level0/data_text_document'], 'valid_data_path': ['1754785366', '/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document'], 'test_data_path': ['1754785366', '/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document'], 'data_cache_path': None, 'vocab_size': None, 'vocab_file': None, 'merge_file': None, 'seq_length': 2048, 'num_workers': 2, 'tokenizer_type': 'HFPreTrainedTokenizer', 'tokenizer_model': '/share/pretrained_lm/Qwen/Qwen2-0.5B', 'reset_position_ids': False, 'reset_attention_mask': False, 'eod_mask_loss': False, 'retro_return_doc_ids': False, 'short_seq_prob': 0.1, 'vocab_extra_ids': 0, 'seed': 1234, 'use_mpi': False, 'wandb_entity': 'iwakawa-koichi-q5-tohoku-nlp6723', 'wandb_name': 'Qwen2-0.5b-0.2_train_2024-08-23-15:44:18', 'wandb_project': 'llm_tutorial-0.2', 'quantization': False, 'use_freeze_layers': False, 'freeze_layers': None, 'bf16': True, 'fp16': False, 'mixed_precision': True, 'param_dtype': None, 'load': '/work/llm_recipes/models/Qwen2-0.5b-0.2', 'save': '/work/llm_recipes/models/Qwen2-0.5b-0.2', 'base_model': '/share/pretrained_lm/Qwen/Qwen2-0.5B', 'use_better_transformer': False, 'grad_clip_norm': 1.0, 'eval_interval': 10, 'save_interval': 10, 'eval_iters': 10, 'optimizer': 'anyprecision', 'lr': 2e-05, 'lr_decay_style': 'cosine', 'lr_decay_iters': 7500, 'lr_warmup_iters': 500, 'min_lr': 1e-06, 'train_iters': 7500, 'train_samples': None, 'global_batch_size': 320, 'micro_batch_size': 1, 'make_vocab_size_divisible_by': 128, 'sliding_window_size': 4096, 'skip_batch': None, 'no_save_optimizer_state': False, 'continual_pretraining': False, 'instruction_tuning': False, 'direct_preference_optimization': False, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'weight_decay': 0.1, 'adam_beta1': 0.9, 'adam_beta2': 0.95, 'adam_eps': 1e-06, 'hf_transformer_model_dir': None, 'instruction_train_data_path': None, 'instruction_valid_data_path': None, 'epoch': None, 'instruction_dataset_size': None, 'save_sampler_state': False, 'label_smoothing': 0.0, 'save_n_checkpoints': 10, 'hf_repo_id': 'koichi12/Qwen2-0.5b-0.2', 'create_public_hf_repo': False, 'upload_all_checkpoints_to_hf': True, 'hf_upload_retry_limit': 2, 'exit_duration_in_mins': None, 'source_key': None, 'target_key': None, 'attn_implementation': 'flash_attention_2', 'efficient_instruction_tuning': False, 'remove_padding_masking': False, 'save_start_iter': None, 'valid_micro_batch_size': 1, 'rank': 0, 'world_size': 1, 'padded_vocab_size': 151680, 'gradient_accumulation_steps': 320} +2024-08-23 15:44:48,884 INFO MainThread:9961 [wandb_init.py:init():616] starting backend +2024-08-23 15:44:48,885 INFO MainThread:9961 [wandb_init.py:init():620] setting up manager +2024-08-23 15:44:48,889 INFO MainThread:9961 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-08-23 15:44:48,891 INFO MainThread:9961 [wandb_init.py:init():628] backend started and connected +2024-08-23 15:44:48,896 INFO MainThread:9961 [wandb_init.py:init():720] updated telemetry +2024-08-23 15:44:48,909 INFO MainThread:9961 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout +2024-08-23 15:44:49,395 INFO MainThread:9961 [wandb_run.py:_on_init():2262] communicating current version +2024-08-23 15:44:49,418 INFO MainThread:9961 [wandb_run.py:_on_init():2271] got version response upgrade_message: "wandb version 0.17.7 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2024-08-23 15:44:49,418 INFO MainThread:9961 [wandb_init.py:init():804] starting run threads in backend +2024-08-23 15:44:49,539 INFO MainThread:9961 [wandb_run.py:_console_start():2241] atexit reg +2024-08-23 15:44:49,539 INFO MainThread:9961 [wandb_run.py:_redirect():2096] redirect: wrap_raw +2024-08-23 15:44:49,539 INFO MainThread:9961 [wandb_run.py:_redirect():2161] Wrapping output streams. +2024-08-23 15:44:49,539 INFO MainThread:9961 [wandb_run.py:_redirect():2186] Redirects installed. +2024-08-23 15:44:49,540 INFO MainThread:9961 [wandb_init.py:init():847] run started, returning control to user process +2024-08-23 15:44:58,442 WARNING MsgRouterThr:9961 [router.py:message_loop():77] message_loop has been closed diff --git a/wandb/run-20240823_154448-v9m85jnt/run-v9m85jnt.wandb b/wandb/run-20240823_154448-v9m85jnt/run-v9m85jnt.wandb new file mode 100644 index 0000000000000000000000000000000000000000..6df9a6a783764eeb37c2a0416661cf93fd3c867a Binary files /dev/null and b/wandb/run-20240823_154448-v9m85jnt/run-v9m85jnt.wandb differ diff --git a/wandb/run-20240823_154550-khhvuked/files/config.yaml b/wandb/run-20240823_154550-khhvuked/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..60533394f34587156aacbe3e879af490733d136b --- /dev/null +++ b/wandb/run-20240823_154550-khhvuked/files/config.yaml @@ -0,0 +1,342 @@ +wandb_version: 1 + +sharding_strategy: + desc: null + value: FULL_SHARD +checkpoint_type: + desc: null + value: LOCAL_STATE_DICT +fsdp_activation_checkpointing: + desc: null + value: true +fsdp_cpu_offload: + desc: null + value: false +low_cpu_fsdp: + desc: null + value: false +no_meta_device: + desc: null + value: false +data_path: + desc: null + value: null +split: + desc: null + value: 969, 30, 1 +train_data_path: + desc: null + value: + - '1754785366' + - /project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document + - '28623823675' + - /project/datas/llm-jp-corpus-v2/ja-cc/level0/data_text_document +valid_data_path: + desc: null + value: + - '1754785366' + - /project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document +test_data_path: + desc: null + value: + - '1754785366' + - /project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document +data_cache_path: + desc: null + value: null +vocab_size: + desc: null + value: null +vocab_file: + desc: null + value: null +merge_file: + desc: null + value: null +seq_length: + desc: null + value: 2048 +num_workers: + desc: null + value: 2 +tokenizer_type: + desc: null + value: HFPreTrainedTokenizer +tokenizer_model: + desc: null + value: /share/pretrained_lm/Qwen/Qwen2-0.5B +reset_position_ids: + desc: null + value: false +reset_attention_mask: + desc: null + value: false +eod_mask_loss: + desc: null + value: false +retro_return_doc_ids: + desc: null + value: false +short_seq_prob: + desc: null + value: 0.1 +vocab_extra_ids: + desc: null + value: 0 +seed: + desc: null + value: 1234 +use_mpi: + desc: null + value: false +wandb_entity: + desc: null + value: iwakawa-koichi-q5-tohoku-nlp6723 +wandb_name: + desc: null + value: Qwen2-0.5b-0.2_train_2024-08-23-15:45:37 +wandb_project: + desc: null + value: llm_tutorial-0.2 +quantization: + desc: null + value: false +use_freeze_layers: + desc: null + value: false +freeze_layers: + desc: null + value: null +bf16: + desc: null + value: true +fp16: + desc: null + value: false +mixed_precision: + desc: null + value: true +param_dtype: + desc: null + value: null +load: + desc: null + value: /work/llm_recipes/models/Qwen2-0.5b-0.2 +save: + desc: null + value: /work/llm_recipes/models/Qwen2-0.5b-0.2 +base_model: + desc: null + value: /share/pretrained_lm/Qwen/Qwen2-0.5B +use_better_transformer: + desc: null + value: false +grad_clip_norm: + desc: null + value: 1.0 +eval_interval: + desc: null + value: 10 +save_interval: + desc: null + value: 10 +eval_iters: + desc: null + value: 10 +optimizer: + desc: null + value: anyprecision +lr: + desc: null + value: 2.0e-05 +lr_decay_style: + desc: null + value: cosine +lr_decay_iters: + desc: null + value: 7500 +lr_warmup_iters: + desc: null + value: 500 +min_lr: + desc: null + value: 1.0e-06 +train_iters: + desc: null + value: 7500 +train_samples: + desc: null + value: null +global_batch_size: + desc: null + value: 320 +micro_batch_size: + desc: null + value: 1 +make_vocab_size_divisible_by: + desc: null + value: 128 +sliding_window_size: + desc: null + value: 131072 +skip_batch: + desc: null + value: null +no_save_optimizer_state: + desc: null + value: false +continual_pretraining: + desc: null + value: false +instruction_tuning: + desc: null + value: false +direct_preference_optimization: + desc: null + value: false +attention_dropout: + desc: null + value: 0.1 +hidden_dropout: + desc: null + value: 0.1 +weight_decay: + desc: null + value: 0.1 +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.95 +adam_eps: + desc: null + value: 1.0e-06 +hf_transformer_model_dir: + desc: null + value: null +instruction_train_data_path: + desc: null + value: null +instruction_valid_data_path: + desc: null + value: null +epoch: + desc: null + value: null +instruction_dataset_size: + desc: null + value: null +save_sampler_state: + desc: null + value: false +label_smoothing: + desc: null + value: 0.0 +save_n_checkpoints: + desc: null + value: 10 +hf_repo_id: + desc: null + value: koichi12/Qwen2-0.5b-0.2 +create_public_hf_repo: + desc: null + value: false +upload_all_checkpoints_to_hf: + desc: null + value: true +hf_upload_retry_limit: + desc: null + value: 2 +exit_duration_in_mins: + desc: null + value: null +source_key: + desc: null + value: null +target_key: + desc: null + value: null +attn_implementation: + desc: null + value: flash_attention_2 +efficient_instruction_tuning: + desc: null + value: false +remove_padding_masking: + desc: null + value: false +save_start_iter: + desc: null + value: null +valid_micro_batch_size: + desc: null + value: 1 +rank: + desc: null + value: 0 +world_size: + desc: null + value: 1 +padded_vocab_size: + desc: null + value: 151680 +gradient_accumulation_steps: + desc: null + value: 320 +_wandb: + desc: null + value: + python_version: 3.10.12 + cli_version: 0.16.3 + framework: huggingface + huggingface_version: 4.43.3 + is_jupyter_run: false + is_kaggle_kernel: false + start_time: 1724395550.528632 + t: + 1: + - 1 + - 11 + - 49 + - 55 + - 71 + - 105 + 2: + - 1 + - 11 + - 49 + - 55 + - 71 + - 105 + 3: + - 13 + - 16 + - 23 + 4: 3.10.12 + 5: 0.16.3 + 6: 4.43.3 + 8: + - 5 + 13: linux-x86_64 +model_architecture: + desc: null + value: Qwen2ForCausalLM +activation_function: + desc: null + value: silu +hidden_size: + desc: null + value: 896 +model_type: + desc: null + value: qwen2 +max_position_embeddings: + desc: null + value: 2048 +num_attention_heads: + desc: null + value: 14 +num_hidden_layers: + desc: null + value: 24 diff --git a/wandb/run-20240823_154550-khhvuked/files/output.log b/wandb/run-20240823_154550-khhvuked/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..1ef46ec37e07f9aa7e0598adada7c3bab443a25e --- /dev/null +++ b/wandb/run-20240823_154550-khhvuked/files/output.log @@ -0,0 +1,173 @@ +Created Hugging Face repository with ID koichi12/Qwen2-0.5b-0.2. +Clearing GPU cache for all ranks +--> Running with torch torch_distributed debug set to detail +File not found: /work/llm_recipes/models/Qwen2-0.5b-0.2/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/Qwen2-0.5b-0.2/latest_iteration.txt +File not found: /work/llm_recipes/models/Qwen2-0.5b-0.2/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/Qwen2-0.5b-0.2/latest_iteration.txt +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +/usr/local/lib/python3.10/dist-packages/torch/distributed/fsdp/_init_utils.py:441: UserWarning: FSDP is switching to use `NO_SHARD` instead of ShardingStrategy.FULL_SHARD since the world size is 1. + warnings.warn( +File not found: /work/llm_recipes/models/Qwen2-0.5b-0.2/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/Qwen2-0.5b-0.2/latest_iteration.txt +No checkpoint found in /work/llm_recipes/models/Qwen2-0.5b-0.2, skipping model loading +--> Model /share/pretrained_lm/Qwen/Qwen2-0.5B +--> /share/pretrained_lm/Qwen/Qwen2-0.5B has 494.032768 Million params +BFloat16 enabled for mixed precision - using bfSixteen policy +Let split = None +--> applying fsdp activation checkpointing... + > datasets target sizes (minimum size): + train: 2400000 + validation: 2403200 + test: 3200 +> building train, validation, and test datasets for GPT ... +Unable to save the indexes because path_to_cache is None +> finished creating GPT datasets ... +File not found: /work/llm_recipes/models/Qwen2-0.5b-0.2/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/Qwen2-0.5b-0.2/latest_iteration.txt +No checkpoint found in /work/llm_recipes/models/Qwen2-0.5b-0.2, skipping optimizer loading +File not found: /work/llm_recipes/models/Qwen2-0.5b-0.2/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/Qwen2-0.5b-0.2/latest_iteration.txt +model info: FullyShardedDataParallel( + (_fsdp_wrapped_module): Qwen2ForCausalLM( + (model): Qwen2Model( + (embed_tokens): Embedding(151936, 896) + (layers): ModuleList( + (0-23): 24 x FullyShardedDataParallel( + (_fsdp_wrapped_module): CheckpointWrapper( + (_checkpoint_wrapped_module): Qwen2DecoderLayer( + (self_attn): Qwen2FlashAttention2( + (q_proj): Linear(in_features=896, out_features=896, bias=True) + (k_proj): Linear(in_features=896, out_features=128, bias=True) + (v_proj): Linear(in_features=896, out_features=128, bias=True) + (o_proj): Linear(in_features=896, out_features=896, bias=False) + (rotary_emb): Qwen2RotaryEmbedding() + ) + (mlp): Qwen2MLP( + (gate_proj): Linear(in_features=896, out_features=4864, bias=False) + (up_proj): Linear(in_features=896, out_features=4864, bias=False) + (down_proj): Linear(in_features=4864, out_features=896, bias=False) + (act_fn): SiLU() + ) + (input_layernorm): Qwen2RMSNorm() + (post_attention_layernorm): Qwen2RMSNorm() + ) + ) + ) + ) + (norm): Qwen2RMSNorm() + ) + (lm_head): Linear(in_features=896, out_features=151936, bias=False) + ) +) +model config: Qwen2Config { + "_name_or_path": "/share/pretrained_lm/Qwen/Qwen2-0.5B", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "label_smoothing": 0.0, + "max_position_embeddings": 2048, + "max_window_layers": 24, + "model_type": "qwen2", + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.43.3", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +------------------------------------------------------------------ +iteration: 1 , TFLOPS: 39.62667016552675, Tokens per sec: 11346.194241899266, Loss: 4.281282424926758 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 2 , TFLOPS: 40.77044688313634, Tokens per sec: 11673.688647892786, Loss: 4.296356201171875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 3 , TFLOPS: 40.91913365351358, Tokens per sec: 11716.261717263842, Loss: 4.264483451843262 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 4 , TFLOPS: 40.947573046001985, Tokens per sec: 11724.404689407278, Loss: 4.274806499481201 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 5 , TFLOPS: 40.680285007827315, Tokens per sec: 11647.872848932217, Loss: 4.263977527618408 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 6 , TFLOPS: 40.960716510955116, Tokens per sec: 11728.168021167097, Loss: 4.264924049377441 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 7 , TFLOPS: 40.83653684880867, Tokens per sec: 11692.612003926144, Loss: 4.235414505004883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 8 , TFLOPS: 40.59130542719987, Tokens per sec: 11622.395573119122, Loss: 4.255239486694336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 9 , TFLOPS: 40.39803954268539, Tokens per sec: 11567.058289999595, Loss: 4.267778396606445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 10 , TFLOPS: 40.67203366368554, Tokens per sec: 11645.51026451626, Loss: 4.24506139755249 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/Qwen2-0.5b-0.2/iter_0000010 +Saving model state dict to /work/llm_recipes/models/Qwen2-0.5b-0.2/iter_0000010/model.pt +/usr/local/lib/python3.10/dist-packages/torch/distributed/fsdp/_state_dict_utils.py:773: UserWarning: When using ``NO_SHARD`` for ``ShardingStrategy``, full_state_dict willbe returned. + warnings.warn( +/usr/local/lib/python3.10/dist-packages/torch/distributed/fsdp/_state_dict_utils.py:716: UserWarning: When using ``NO_SHARD`` for ``ShardingStrategy``, full_state_dict willbe returned. + warnings.warn( +Saved model state dict to /work/llm_recipes/models/Qwen2-0.5b-0.2/iter_0000010/model.pt +Saving optimizer state dict to /work/llm_recipes/models/Qwen2-0.5b-0.2/iter_0000010/optimizer.pt +[rank0]:[2024-08-23 15:55:39,749] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007558891999906336, 'preprocessing_with_comm': 0.0007106409998414165, 'state_converting': 0.9681659190000573, : 0.9778257900002245}) +Saved optimizer state dict to /work/llm_recipes/models/Qwen2-0.5b-0.2/iter_0000010/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/Qwen2-0.5b-0.2/iter_0000010/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/Qwen2-0.5b-0.2/iter_0000010/scheduler.pt +Saving RNG states to /work/llm_recipes/models/Qwen2-0.5b-0.2/iter_0000010/rng.pt +Saved RNG states to /work/llm_recipes/models/Qwen2-0.5b-0.2/iter_0000010/rng.pt +Saved checkpoint to /work/llm_recipes/models/Qwen2-0.5b-0.2/iter_0000010, took 4.44s + eval ppl=49.751155853271484, eval loss=3.907033681869507 +------------------------------------------------------------------ +iteration: 11 , TFLOPS: 40.68505455620414, Tokens per sec: 11649.238500451851, Loss: 4.254261493682861 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 12 , TFLOPS: 40.534058151449834, Tokens per sec: 11606.004119894153, Loss: 4.242730140686035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13 , TFLOPS: 41.0401849932382, Tokens per sec: 11750.922010647577, Loss: 4.22725772857666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14 , TFLOPS: 41.13024406203507, Tokens per sec: 11776.708373305491, Loss: 4.2645134925842285 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15 , TFLOPS: 41.11274857267346, Tokens per sec: 11771.6989385025, Loss: 4.256051540374756 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16 , TFLOPS: 40.65324075099607, Tokens per sec: 11640.129342100747, Loss: 4.258178234100342 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17 , TFLOPS: 40.74661170759878, Tokens per sec: 11666.863988381674, Loss: 4.23093318939209 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18 , TFLOPS: 40.61688463125125, Tokens per sec: 11629.71959546336, Loss: 4.232746124267578 +------------------------------------------------------------------ +Traceback (most recent call last): + File "/project/examples/finetuning.py", line 13, in + main() + File "/project/src/llama_recipes/finetuning.py", line 282, in main + train( + File "/project/src/llama_recipes/utils/train_utils.py", line 120, in train + total_loss += loss.item() +KeyboardInterrupt \ No newline at end of file diff --git a/wandb/run-20240823_154550-khhvuked/files/requirements.txt b/wandb/run-20240823_154550-khhvuked/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..34a2774e444bdc395654ccf8ce6da6833c7bc1ee --- /dev/null +++ b/wandb/run-20240823_154550-khhvuked/files/requirements.txt @@ -0,0 +1,375 @@ +absl-py==2.1.0 +accelerate==0.23.0 +aiohttp==3.9.1 +aiosignal==1.3.1 +annotated-types==0.6.0 +antlr4-python3-runtime==4.9.3 +anyio==4.4.0 +apex==0.1 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==23.1.0 +astroid==3.2.4 +asttokens==2.4.1 +astunparse==1.6.3 +async-timeout==4.0.3 +attrs==23.2.0 +audioread==3.0.1 +beautifulsoup4==4.12.3 +bert-score==0.3.13 +bleach==6.1.0 +blis==0.7.11 +build==1.2.1 +cachecontrol==0.14.0 +cachetools==5.3.2 +catalogue==2.0.10 +certifi==2024.2.2 +cffi==1.16.0 +chardet==5.2.0 +charset-normalizer==3.3.2 +cleo==2.1.0 +click==8.1.7 +cloudpathlib==0.16.0 +cloudpickle==3.0.0 +cmake==3.28.1 +colorama==0.4.6 +comm==0.2.1 +confection==0.1.4 +contourpy==1.2.0 +cramjam==2.8.3 +crashtest==0.4.1 +cryptography==43.0.0 +cubinlinker==0.3.0+2.g405ac64 +cuda-python==12.3.0rc4+9.gdb8c48a.dirty +cudf==23.12.0 +cugraph-dgl==23.12.0 +cugraph-service-client==23.12.0 +cugraph-service-server==23.12.0 +cugraph==23.12.0 +cuml==23.12.0 +cupy-cuda12x==12.3.0 +cycler==0.12.1 +cymem==2.0.8 +cython==3.0.8 +dask-cuda==23.12.0 +dask-cudf==23.12.0 +dask==2023.11.0 +dataclasses-json==0.6.7 +dataproperty==1.0.1 +datasets==2.20.0 +debugpy==1.8.1 +decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.8 +distlib==0.3.8 +distributed==2023.11.0 +distro==1.9.0 +dm-tree==0.1.8 +docker-pycreds==0.4.0 +dulwich==0.21.7 +einops==0.7.0 +emoji==2.12.1 +entmax==1.3 +evaluate==0.4.2 +exceptiongroup==1.2.0 +execnet==2.0.2 +executing==2.0.1 +expecttest==0.1.3 +fastjsonschema==2.19.1 +fastparquet==2023.10.1 +fastrlock==0.8.2 +filelock==3.13.1 +flash-attn==2.4.2 +fonttools==4.48.1 +frozenlist==1.4.1 +fsspec==2023.12.2 +fugashi==1.3.2 +fuzzywuzzy==0.18.0 +gast==0.5.4 +gitdb==4.0.11 +gitpython==3.1.43 +google-auth-oauthlib==0.4.6 +google-auth==2.27.0 +graphsurgeon==0.4.6 +greenlet==3.0.3 +grpcio==1.60.1 +h11==0.14.0 +httpcore==1.0.5 +httpx==0.27.0 +huggingface-hub==0.24.5 +hydra-core==1.3.2 +hypothesis==5.35.1 +idna==3.6 +importlib-metadata==7.0.1 +iniconfig==2.0.0 +installer==0.7.0 +intel-openmp==2021.4.0 +ipadic==1.0.0 +ipykernel==6.29.2 +ipython-genutils==0.2.0 +ipython==8.21.0 +isort==5.13.2 +jaraco.classes==3.4.0 +jedi==0.19.1 +jeepney==0.8.0 +jinja2==3.1.3 +jiter==0.5.0 +joblib==1.3.2 +json5==0.9.14 +jsonargparse==3.13.1 +jsonlines==4.0.0 +jsonnet==0.19.1 +jsonpatch==1.33 +jsonpointer==3.0.0 +jsonschema-specifications==2023.12.1 +jsonschema==4.21.1 +jupyter-client==8.6.0 +jupyter-core==5.7.1 +jupyter-tensorboard==0.2.0 +jupyterlab-pygments==0.3.0 +jupyterlab-server==1.2.0 +jupyterlab==2.3.2 +jupytext==1.16.1 +keyring==24.3.1 +kiwisolver==1.4.5 +langchain-community==0.2.12 +langchain-core==0.2.31 +langchain-huggingface==0.0.2 +langchain-openai==0.1.21 +langchain-text-splitters==0.2.2 +langchain==0.2.13 +langcodes==3.3.0 +langsmith==0.1.99 +lazy-loader==0.3 +levenshtein==0.25.1 +librosa==0.10.1 +lightning-utilities==0.11.6 +llm-jp-eval==1.4.0 +llvmlite==0.40.1 +lm-eval==0.3.0 +locket==1.0.0 +logzero==1.7.0 +lxml==5.2.2 +markdown-it-py==3.0.0 +markdown==3.5.2 +markupsafe==2.1.4 +marshmallow==3.21.3 +matplotlib-inline==0.1.6 +matplotlib==3.8.2 +mbstrdecoder==1.1.3 +mccabe==0.7.0 +mdit-py-plugins==0.4.0 +mdurl==0.1.2 +mecab-python3==1.0.6 +mistune==3.0.2 +mkl-devel==2021.1.1 +mkl-include==2021.1.1 +mkl==2021.1.1 +mock==5.1.0 +mojimoji==0.0.13 +more-itertools==9.1.0 +mpmath==1.3.0 +msgpack==1.0.7 +multidict==6.0.4 +multiprocess==0.70.16 +murmurhash==1.0.10 +mypy-extensions==1.0.0 +nbclient==0.9.0 +nbconvert==7.16.0 +nbformat==5.9.2 +neologdn==0.5.3 +nest-asyncio==1.6.0 +networkx==2.6.3 +ninja==1.11.1.1 +nltk==3.8.1 +notebook==6.4.10 +numba==0.57.1+1.g1ff679645 +numexpr==2.10.1 +numpy==1.24.4 +nvfuser==0.1.4a0+d0bb811 +nvidia-dali-cuda120==1.34.0 +nvidia-pyindex==1.0.9 +nvtx==0.2.5 +oauthlib==3.2.2 +omegaconf==2.3.0 +onnx==1.15.0rc2 +openai==1.40.6 +opencv==4.7.0 +optree==0.10.0 +orjson==3.10.7 +packaging==23.2 +pandas==2.2.2 +pandocfilters==1.5.1 +parso==0.8.3 +partd==1.4.1 +pathvalidate==3.2.0 +peft==0.5.0 +pexpect==4.9.0 +pillow==10.2.0 +pip==24.0 +pkginfo==1.11.1 +plac==1.4.3 +platformdirs==4.2.0 +pluggy==1.4.0 +ply==3.11 +poetry-core==1.9.0 +poetry-plugin-export==1.8.0 +poetry==1.8.3 +polygraphy==0.49.4 +pooch==1.8.0 +portalocker==2.10.1 +preshed==3.0.9 +prettytable==3.9.0 +prometheus-client==0.19.0 +prompt-toolkit==3.0.43 +protobuf==4.24.4 +psutil==5.9.4 +ptxcompiler==0.8.1+2.g0d406d6 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow-hotfix==0.6 +pyarrow==15.0.2 +pyasn1-modules==0.3.0 +pyasn1==0.5.1 +pybind11-global==2.11.1 +pybind11==2.11.1 +pycocotools==2.0+nv0.8.0 +pycountry==24.6.1 +pycparser==2.21 +pydantic-core==2.16.2 +pydantic==2.6.1 +pygments==2.17.2 +pylibcugraph==23.12.0 +pylibcugraphops==23.12.0 +pylibraft==23.12.0 +pylint==3.2.6 +pynvml==11.4.1 +pyparsing==3.1.1 +pyproject-hooks==1.1.0 +pytablewriter==1.2.0 +pytest-flakefinder==1.1.0 +pytest-rerunfailures==13.0 +pytest-shard==0.1.2 +pytest-xdist==3.5.0 +pytest==8.0.0 +python-dateutil==2.8.2 +python-dotenv==1.0.0 +python-hostlist==1.23.0 +python-levenshtein==0.25.1 +pytorch-lightning==2.4.0 +pytorch-quantization==2.1.2 +pytz==2023.3.post1 +pyyaml==6.0.1 +pyzmq==25.1.2 +raft-dask==23.12.0 +rapidfuzz==3.9.6 +rapids-dask-dependency==23.12.1 +referencing==0.33.0 +regex==2023.12.25 +requests-oauthlib==1.3.1 +requests-toolbelt==1.0.0 +requests==2.32.3 +rhoknp==1.7.0 +rich==13.7.0 +rmm==23.12.0 +rouge-score==0.1.2 +rpds-py==0.17.1 +rsa==4.9 +sacrebleu==2.4.2 +safetensors==0.4.3 +scikit-learn==1.5.1 +scipy==1.12.0 +secretstorage==3.3.3 +send2trash==1.8.2 +sentence-transformers==3.0.1 +sentencepiece==0.1.99 +sentry-sdk==2.12.0 +setproctitle==1.3.3 +setuptools==68.2.2 +shellingham==1.5.4 +six==1.16.0 +smart-open==6.4.0 +smmap==5.0.1 +sniffio==1.3.1 +sortedcontainers==2.4.0 +soundfile==0.12.1 +soupsieve==2.5 +soxr==0.3.7 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 +spacy==3.7.2 +sphinx-glpi-theme==0.6 +sqlalchemy==2.0.32 +sqlitedict==2.1.0 +srsly==2.4.8 +stack-data==0.6.3 +sumeval==0.2.2 +sympy==1.12 +tabledata==1.3.3 +tabulate==0.9.0 +tbb==2021.11.0 +tblib==3.0.0 +tcolorpy==0.1.6 +tenacity==8.5.0 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.9.0 +tensorrt==8.6.3 +terminado==0.18.0 +termplotlib==0.3.9 +text-generation==0.7.0 +thinc==8.2.3 +threadpoolctl==3.2.0 +thriftpy2==0.4.17 +tiktoken==0.7.0 +tinycss2==1.2.1 +tokenizers==0.19.1 +toml==0.10.2 +tomli==2.0.1 +tomlkit==0.13.2 +toolz==0.12.1 +torch-tensorrt==2.3.0a0 +torch==2.3.0a0+ebedce2 +torchdata==0.7.1a0 +torchmetrics==0.10.3 +torchtext==0.17.0a0 +torchvision==0.18.0a0 +tornado==6.4 +tqdm-multiprocess==0.0.11 +tqdm==4.66.5 +traitlets==5.9.0 +transformer-engine==1.3.0+5b90b7f +transformers==4.43.3 +treelite-runtime==3.9.1 +treelite==3.9.1 +triton==2.2.0+e28a256 +trove-classifiers==2024.7.2 +typepy==1.3.2 +typer==0.9.0 +types-dataclasses==0.6.6 +typing-extensions==4.12.2 +typing-inspect==0.9.0 +tzdata==2024.1 +ucx-py==0.35.0 +uff==0.6.9 +ujson==5.8.0 +unbabel-comet==2.2.2 +unidic-lite==1.0.8 +urllib3==1.26.18 +virtualenv==20.26.3 +wandb==0.16.3 +wasabi==1.1.2 +wcwidth==0.2.13 +weasel==0.3.4 +webencodings==0.5.1 +werkzeug==3.0.1 +wheel==0.42.0 +word2number==1.1 +xdoctest==1.0.2 +xgboost==1.7.6 +xmltodict==0.13.0 +xxhash==3.4.1 +yarl==1.9.4 +zict==3.0.0 +zipp==3.17.0 +zstandard==0.23.0 \ No newline at end of file diff --git a/wandb/run-20240823_154550-khhvuked/files/wandb-metadata.json b/wandb/run-20240823_154550-khhvuked/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..3274dcee1b6a2d4a6c9e33c20c3e68bcbbe0c4e1 --- /dev/null +++ b/wandb/run-20240823_154550-khhvuked/files/wandb-metadata.json @@ -0,0 +1,220 @@ +{ + "os": "Linux-5.15.0-91-generic-x86_64-with-glibc2.35", + "python": "3.10.12", + "heartbeatAt": "2024-08-23T06:45:51.078144", + "startedAt": "2024-08-23T06:45:50.516097", + "docker": null, + "cuda": null, + "args": [ + "--seq-length", + "2048", + "--sliding-window-size", + "131072", + "--micro-batch-size", + "1", + "--valid_micro_batch_size", + "1", + "--global-batch-size", + "320", + "--train-iters", + "7500", + "--tokenizer-type", + "HFPreTrainedTokenizer", + "--tokenizer-model", + "/share/pretrained_lm/Qwen/Qwen2-0.5B", + "--train-data-path", + "1754785366", + "/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document", + "28623823675", + "/project/datas/llm-jp-corpus-v2/ja-cc/level0/data_text_document", + "--valid-data-path", + "1754785366", + "/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document", + "--test-data-path", + "1754785366", + "/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document", + "--lr", + "2e-5", + "--min-lr", + "1e-6", + "--lr-decay-style", + "cosine", + "--lr-warmup-iters", + "500", + "--lr-decay-iters", + "7500", + "--weight-decay", + "0.1", + "--grad-clip-norm", + "1.0", + "--optimizer", + "anyprecision", + "--adam-beta1", + "0.9", + "--adam-beta2", + "0.95", + "--adam-eps", + "1e-6", + "--save-interval", + "10", + "--eval-interval", + "10", + "--eval-iters", + "10", + "--bf16", + "--mixed-precision", + "--base-model", + "/share/pretrained_lm/Qwen/Qwen2-0.5B", + "--save", + "/work/llm_recipes/models/Qwen2-0.5b-0.2", + "--load", + "/work/llm_recipes/models/Qwen2-0.5b-0.2", + "--fsdp-activation-checkpointing", + "--sharding-strategy", + "FULL_SHARD", + "--checkpoint-type", + "LOCAL_STATE_DICT", + "--save-n-checkpoints", + "10", + "--upload-all-checkpoints-to-hf", + "--hf-upload-retry-limit", + "2", + "--hf-repo-id", + "koichi12/Qwen2-0.5b-0.2", + "--wandb-entity", + "iwakawa-koichi-q5-tohoku-nlp6723", + "--wandb-project", + "llm_tutorial-0.2", + "--wandb-name", + "Qwen2-0.5b-0.2_train_2024-08-23-15:45:37" + ], + "state": "running", + "program": "/project/examples/finetuning.py", + "codePathLocal": "examples/finetuning.py", + "codePath": "examples/finetuning.py", + "git": { + "remote": "https://github.com/cl-tohoku/llm-recipes-failab-m1-yans.git", + "commit": "887a2cc5d104c10264701f95cbbb0a6a116768d6" + }, + "email": null, + "root": "/project", + "host": "gpu-koiwa-00", + "username": "koiwa", + "executable": "/usr/bin/python", + "cpu_count": 18, + "cpu_count_logical": 18, + "cpu_freq": { + "current": 2400.0389999999993, + "min": 0.0, + "max": 0.0 + }, + "cpu_freq_per_core": [ + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + } + ], + "disk": { + "/": { + "total": 0.0625, + "used": 1.1444091796875e-05 + } + }, + "gpu": "NVIDIA A100-SXM4-40GB", + "gpu_count": 1, + "gpu_devices": [ + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + } + ], + "memory": { + "total": 56.487831115722656 + } +} diff --git a/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json b/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..8fe0c6c6401136969544b9193b60d0a4357ae44f --- /dev/null +++ b/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json @@ -0,0 +1 @@ +{"training/loss": 4.232746124267578, "training/perplexity": 68.90619757520494, "utils/batch_size": 1, "utils/global_batch_size": 320, "utils/seq_len": 2049, "utils/gradient_accumulation_steps": 320, "utils/iteration": 18, "optimizer/lr": 1.6839999999999999e-06, "optimizer/variance_l2": 0.022748586344438475, "optimizer/variance_sqrt_l2": 0.7771022172189591, "optimizer/momentum_l2": 0.8383688462308668, "optimizer/weight_l2": 825.0639369164065, "optimizer/variance_l1": 0.6041488647460938, "optimizer/variance_sqrt_l1": 3162.0, "optimizer/momentum_l1": 3053.5, "optimizer/weight_l1": 6886400.0, "optimizer/variance_abs_max": 0.01513671875, "optimizer/variance_sqrt_abs_max": 0.123046875, "optimizer/momentum_abs_max": 0.134765625, "optimizer/weight_abs_max": 175.0, "stats/1_iteration_time": 56.37969123999983, "stats/tokens_per_sec": 11629.71959546336, "stats/tokens_per_sec_per_gpu": 11629.71959546336, "stats/tflops": 40.61688463125125, "_timestamp": 1724396600.3768795, "_runtime": 1049.8482475280762, "_step": 18, "evaluation/val_loss": 3.907033681869507, "evaluation/val_ppl": 49.751155853271484, "_wandb": {"runtime": 1088}} \ No newline at end of file diff --git a/wandb/run-20240823_154550-khhvuked/logs/debug-internal.log b/wandb/run-20240823_154550-khhvuked/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..12eebd48925b809e88ef3501ef4b5f29b847a838 --- /dev/null +++ b/wandb/run-20240823_154550-khhvuked/logs/debug-internal.log @@ -0,0 +1,771 @@ +2024-08-23 15:45:50,530 INFO StreamThr :10268 [internal.py:wandb_internal():86] W&B internal server running at pid: 10268, started at: 2024-08-23 15:45:50.529910 +2024-08-23 15:45:50,532 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status +2024-08-23 15:45:50,535 INFO WriterThread:10268 [datastore.py:open_for_write():87] open: /project/wandb/run-20240823_154550-khhvuked/run-khhvuked.wandb +2024-08-23 15:45:50,536 DEBUG SenderThread:10268 [sender.py:send():382] send: header +2024-08-23 15:45:50,548 DEBUG SenderThread:10268 [sender.py:send():382] send: run +2024-08-23 15:45:50,986 INFO SenderThread:10268 [dir_watcher.py:__init__():211] watching files in: /project/wandb/run-20240823_154550-khhvuked/files +2024-08-23 15:45:50,986 INFO SenderThread:10268 [sender.py:_start_run_threads():1136] run started: khhvuked with start time 1724395550.528632 +2024-08-23 15:45:50,991 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: check_version +2024-08-23 15:45:50,991 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: check_version +2024-08-23 15:45:51,059 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: run_start +2024-08-23 15:45:51,066 DEBUG HandlerThread:10268 [system_info.py:__init__():27] System info init +2024-08-23 15:45:51,066 DEBUG HandlerThread:10268 [system_info.py:__init__():42] System info init done +2024-08-23 15:45:51,066 INFO HandlerThread:10268 [system_monitor.py:start():194] Starting system monitor +2024-08-23 15:45:51,066 INFO SystemMonitor:10268 [system_monitor.py:_start():158] Starting system asset monitoring threads +2024-08-23 15:45:51,066 INFO HandlerThread:10268 [system_monitor.py:probe():214] Collecting system info +2024-08-23 15:45:51,066 INFO SystemMonitor:10268 [interfaces.py:start():190] Started cpu monitoring +2024-08-23 15:45:51,067 INFO SystemMonitor:10268 [interfaces.py:start():190] Started disk monitoring +2024-08-23 15:45:51,067 INFO SystemMonitor:10268 [interfaces.py:start():190] Started gpu monitoring +2024-08-23 15:45:51,068 INFO SystemMonitor:10268 [interfaces.py:start():190] Started memory monitoring +2024-08-23 15:45:51,069 INFO SystemMonitor:10268 [interfaces.py:start():190] Started network monitoring +2024-08-23 15:45:51,078 DEBUG HandlerThread:10268 [system_info.py:probe():151] Probing system +2024-08-23 15:45:51,080 DEBUG HandlerThread:10268 [system_info.py:_probe_git():136] Probing git +2024-08-23 15:45:51,092 DEBUG HandlerThread:10268 [system_info.py:_probe_git():144] Probing git done +2024-08-23 15:45:51,092 DEBUG HandlerThread:10268 [system_info.py:probe():199] Probing system done +2024-08-23 15:45:51,092 DEBUG HandlerThread:10268 [system_monitor.py:probe():223] {'os': 'Linux-5.15.0-91-generic-x86_64-with-glibc2.35', 'python': '3.10.12', 'heartbeatAt': '2024-08-23T06:45:51.078144', 'startedAt': '2024-08-23T06:45:50.516097', 'docker': None, 'cuda': None, 'args': ('--seq-length', '2048', '--sliding-window-size', '131072', '--micro-batch-size', '1', '--valid_micro_batch_size', '1', '--global-batch-size', '320', '--train-iters', '7500', '--tokenizer-type', 'HFPreTrainedTokenizer', '--tokenizer-model', '/share/pretrained_lm/Qwen/Qwen2-0.5B', '--train-data-path', '1754785366', '/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document', '28623823675', '/project/datas/llm-jp-corpus-v2/ja-cc/level0/data_text_document', '--valid-data-path', '1754785366', '/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document', '--test-data-path', '1754785366', '/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document', '--lr', '2e-5', '--min-lr', '1e-6', '--lr-decay-style', 'cosine', '--lr-warmup-iters', '500', '--lr-decay-iters', '7500', '--weight-decay', '0.1', '--grad-clip-norm', '1.0', '--optimizer', 'anyprecision', '--adam-beta1', '0.9', '--adam-beta2', '0.95', '--adam-eps', '1e-6', '--save-interval', '10', '--eval-interval', '10', '--eval-iters', '10', '--bf16', '--mixed-precision', '--base-model', '/share/pretrained_lm/Qwen/Qwen2-0.5B', '--save', '/work/llm_recipes/models/Qwen2-0.5b-0.2', '--load', '/work/llm_recipes/models/Qwen2-0.5b-0.2', '--fsdp-activation-checkpointing', '--sharding-strategy', 'FULL_SHARD', '--checkpoint-type', 'LOCAL_STATE_DICT', '--save-n-checkpoints', '10', '--upload-all-checkpoints-to-hf', '--hf-upload-retry-limit', '2', '--hf-repo-id', 'koichi12/Qwen2-0.5b-0.2', '--wandb-entity', 'iwakawa-koichi-q5-tohoku-nlp6723', '--wandb-project', 'llm_tutorial-0.2', '--wandb-name', 'Qwen2-0.5b-0.2_train_2024-08-23-15:45:37'), 'state': 'running', 'program': '/project/examples/finetuning.py', 'codePathLocal': 'examples/finetuning.py', 'codePath': 'examples/finetuning.py', 'git': {'remote': 'https://github.com/cl-tohoku/llm-recipes-failab-m1-yans.git', 'commit': '887a2cc5d104c10264701f95cbbb0a6a116768d6'}, 'email': None, 'root': '/project', 'host': 'gpu-koiwa-00', 'username': 'koiwa', 'executable': '/usr/bin/python', 'cpu_count': 18, 'cpu_count_logical': 18, 'cpu_freq': {'current': 2400.0389999999993, 'min': 0.0, 'max': 0.0}, 'cpu_freq_per_core': [{'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}, {'current': 2400.039, 'min': 0.0, 'max': 0.0}], 'disk': {'/': {'total': 0.0625, 'used': 1.1444091796875e-05}}, 'gpu': 'NVIDIA A100-SXM4-40GB', 'gpu_count': 1, 'gpu_devices': [{'name': 'NVIDIA A100-SXM4-40GB', 'memory_total': 42949672960}], 'memory': {'total': 56.487831115722656}} +2024-08-23 15:45:51,092 INFO HandlerThread:10268 [system_monitor.py:probe():224] Finished collecting system info +2024-08-23 15:45:51,092 INFO HandlerThread:10268 [system_monitor.py:probe():227] Publishing system info +2024-08-23 15:45:51,093 INFO HandlerThread:10268 [system_monitor.py:probe():229] Finished publishing system info +2024-08-23 15:45:51,098 DEBUG SenderThread:10268 [sender.py:send():382] send: files +2024-08-23 15:45:51,099 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-metadata.json with policy now +2024-08-23 15:45:51,110 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: python_packages +2024-08-23 15:45:51,110 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:45:51,110 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:45:51,110 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: python_packages +2024-08-23 15:45:51,112 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:45:51,295 DEBUG SenderThread:10268 [sender.py:send():382] send: telemetry +2024-08-23 15:45:51,721 INFO wandb-upload_0:10268 [upload_job.py:push():131] Uploaded file /tmp/tmpb6n3o3pxwandb/7yig3p6l-wandb-metadata.json +2024-08-23 15:45:51,988 INFO Thread-12 :10268 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:45:51,988 INFO Thread-12 :10268 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240823_154550-khhvuked/files/requirements.txt +2024-08-23 15:45:51,988 INFO Thread-12 :10268 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240823_154550-khhvuked/files/wandb-metadata.json +2024-08-23 15:45:53,988 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:45:54,989 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:45:55,989 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:45:56,503 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:45:56,990 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:45:57,990 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:45:58,541 DEBUG SenderThread:10268 [sender.py:send():382] send: config +2024-08-23 15:45:58,541 DEBUG SenderThread:10268 [sender.py:send():382] send: config +2024-08-23 15:45:58,991 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:45:59,992 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:46:00,992 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:46:01,542 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:46:06,110 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:46:06,110 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:46:06,111 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:46:07,359 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:46:12,359 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:46:17,360 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:46:21,109 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:46:21,109 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:46:21,155 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:46:23,338 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:46:24,003 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/config.yaml +2024-08-23 15:46:28,530 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:46:33,531 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:46:36,109 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:46:36,109 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:46:36,150 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:46:39,296 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:46:44,297 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:46:49,297 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:46:51,069 DEBUG SystemMonitor:10268 [system_monitor.py:_start():172] Starting system metrics aggregation loop +2024-08-23 15:46:51,070 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:46:51,109 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:46:51,109 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:46:51,151 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:46:54,364 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:46:59,365 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:47:04,366 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:47:06,538 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:47:06,539 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:47:06,579 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:47:09,778 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:47:11,460 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: partial_history +2024-08-23 15:47:14,029 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:47:15,504 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:47:20,505 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:47:21,071 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:47:21,538 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:47:21,538 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:47:21,540 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:47:25,725 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:47:30,726 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:47:35,727 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:47:36,538 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:47:36,538 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:47:36,579 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:47:40,762 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:47:45,763 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:47:50,763 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:47:51,073 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:47:51,538 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:47:51,538 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:47:51,579 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:47:55,769 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:48:00,769 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:48:05,770 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:48:06,538 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:48:06,538 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:48:06,579 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:48:07,629 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: partial_history +2024-08-23 15:48:07,632 DEBUG SenderThread:10268 [sender.py:send():382] send: history +2024-08-23 15:48:07,632 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: summary_record +2024-08-23 15:48:07,634 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 15:48:08,055 INFO Thread-12 :10268 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json +2024-08-23 15:48:10,056 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:48:11,672 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:48:16,672 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:48:21,074 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:48:21,538 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:48:21,539 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:48:21,539 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:48:21,757 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:48:26,758 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:48:31,759 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:48:36,539 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:48:36,539 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:48:36,579 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:48:36,806 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:48:41,807 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:48:46,807 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:48:51,074 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:48:51,539 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:48:51,539 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:48:51,579 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:48:52,774 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:48:57,775 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:49:02,775 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:49:03,595 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: partial_history +2024-08-23 15:49:03,596 DEBUG SenderThread:10268 [sender.py:send():382] send: history +2024-08-23 15:49:03,596 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: summary_record +2024-08-23 15:49:03,598 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 15:49:04,083 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json +2024-08-23 15:49:06,084 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:49:06,539 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:49:06,539 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:49:06,540 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:49:07,820 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:49:12,820 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:49:17,821 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:49:21,075 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:49:21,539 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:49:21,539 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:49:21,587 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:49:23,757 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:49:28,758 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:49:33,759 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:49:36,539 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:49:36,539 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:49:36,583 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:49:39,710 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:49:44,710 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:49:49,711 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:49:51,076 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:49:51,539 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:49:51,539 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:49:51,583 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:49:54,768 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:49:59,521 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: partial_history +2024-08-23 15:49:59,523 DEBUG SenderThread:10268 [sender.py:send():382] send: history +2024-08-23 15:49:59,523 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: summary_record +2024-08-23 15:49:59,524 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 15:50:00,110 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json +2024-08-23 15:50:00,564 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:50:02,111 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:50:05,564 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:50:06,539 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:50:06,539 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:50:06,541 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:50:10,725 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:50:15,726 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:50:20,727 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:50:21,077 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:50:21,539 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:50:21,539 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:50:21,583 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:50:26,727 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:50:31,727 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:50:36,539 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:50:36,540 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:50:36,587 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:50:37,709 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:50:42,710 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:50:47,710 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:50:51,078 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:50:51,539 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:50:51,540 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:50:51,583 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:50:52,730 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:50:55,814 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: partial_history +2024-08-23 15:50:55,816 DEBUG SenderThread:10268 [sender.py:send():382] send: history +2024-08-23 15:50:55,816 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: summary_record +2024-08-23 15:50:55,818 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 15:50:56,138 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json +2024-08-23 15:50:57,857 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:50:58,139 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:51:02,858 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:51:06,540 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:51:06,540 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:51:06,540 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:51:08,744 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:51:13,745 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:51:18,745 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:51:21,079 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:51:21,540 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:51:21,540 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:51:21,583 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:51:23,748 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:51:28,749 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:51:33,749 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:51:36,540 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:51:36,540 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:51:36,583 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:51:39,713 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:51:44,713 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:51:49,714 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:51:51,080 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:51:51,540 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:51:51,540 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:51:51,583 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:51:51,723 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: partial_history +2024-08-23 15:51:51,756 DEBUG SenderThread:10268 [sender.py:send():382] send: history +2024-08-23 15:51:51,757 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: summary_record +2024-08-23 15:51:51,758 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 15:51:52,166 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json +2024-08-23 15:51:54,167 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:51:54,759 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:51:59,760 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:52:04,760 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:52:06,540 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:52:06,540 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:52:06,583 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:52:10,714 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:52:15,715 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:52:20,715 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:52:21,081 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:52:21,540 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:52:21,540 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:52:21,583 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:52:25,793 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:52:30,793 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:52:35,794 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:52:36,540 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:52:36,540 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:52:36,583 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:52:41,783 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:52:46,784 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:52:47,801 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: partial_history +2024-08-23 15:52:47,803 DEBUG SenderThread:10268 [sender.py:send():382] send: history +2024-08-23 15:52:47,803 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: summary_record +2024-08-23 15:52:47,804 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 15:52:48,193 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json +2024-08-23 15:52:50,194 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:52:51,082 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:52:51,540 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:52:51,540 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:52:51,542 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:52:52,736 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:52:57,736 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:53:02,737 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:53:06,540 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:53:06,540 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:53:06,583 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:53:08,714 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:53:13,715 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:53:18,715 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:53:21,083 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:53:21,540 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:53:21,541 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:53:21,583 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:53:23,746 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:53:28,746 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:53:33,747 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:53:36,541 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:53:36,541 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:53:36,583 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:53:38,784 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:53:43,784 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:53:44,219 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: partial_history +2024-08-23 15:53:44,220 DEBUG SenderThread:10268 [sender.py:send():382] send: history +2024-08-23 15:53:44,221 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: summary_record +2024-08-23 15:53:44,222 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 15:53:44,222 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json +2024-08-23 15:53:46,223 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:53:49,260 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:53:51,084 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:53:51,541 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:53:51,541 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:53:51,542 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:53:54,804 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:53:59,804 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:54:04,805 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:54:06,541 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:54:06,541 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:54:06,583 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:54:10,754 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:54:15,755 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:54:20,756 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:54:21,085 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:54:21,541 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:54:21,541 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:54:21,583 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:54:26,717 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:54:31,718 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:54:36,552 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:54:36,553 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:54:36,595 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:54:36,785 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:54:40,906 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: partial_history +2024-08-23 15:54:40,908 DEBUG SenderThread:10268 [sender.py:send():382] send: history +2024-08-23 15:54:40,908 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: summary_record +2024-08-23 15:54:40,909 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 15:54:41,250 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json +2024-08-23 15:54:41,948 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:54:42,250 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:54:46,948 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:54:51,086 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:54:51,552 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:54:51,552 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:54:51,552 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:54:52,804 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:54:57,805 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:55:02,806 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:55:06,553 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:55:06,553 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:55:06,553 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:55:08,748 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:55:13,748 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:55:18,749 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:55:21,087 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:55:21,573 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:55:21,573 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:55:21,614 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:55:23,754 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:55:28,754 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:55:33,755 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:55:36,572 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:55:36,573 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:55:36,615 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:55:37,211 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: partial_history +2024-08-23 15:55:37,213 DEBUG SenderThread:10268 [sender.py:send():382] send: history +2024-08-23 15:55:37,213 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: summary_record +2024-08-23 15:55:37,214 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 15:55:37,279 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json +2024-08-23 15:55:38,280 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:55:38,765 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:55:39,280 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:55:40,281 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:55:41,281 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:55:42,282 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:55:44,650 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:55:49,651 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:55:51,087 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:55:51,497 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: partial_history +2024-08-23 15:55:51,573 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:55:51,573 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:55:51,573 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:55:52,287 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:55:54,787 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:55:59,787 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:56:04,788 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:56:06,572 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:56:06,573 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:56:06,573 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:56:09,838 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:56:14,838 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:56:19,839 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:56:21,088 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:56:21,573 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:56:21,573 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:56:21,574 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:56:25,786 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:56:30,787 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:56:35,787 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:56:36,573 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:56:36,574 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:56:36,574 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:56:40,828 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:56:45,828 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:56:47,785 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: partial_history +2024-08-23 15:56:47,787 DEBUG SenderThread:10268 [sender.py:send():382] send: history +2024-08-23 15:56:47,787 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: summary_record +2024-08-23 15:56:47,788 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 15:56:48,319 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json +2024-08-23 15:56:49,319 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:56:51,089 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:56:51,090 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:56:51,572 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:56:51,573 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:56:51,574 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:56:56,810 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:57:01,811 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:57:06,573 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:57:06,574 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:57:06,574 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:57:07,804 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:57:12,804 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:57:17,805 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:57:21,090 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:57:21,573 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:57:21,574 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:57:21,574 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:57:23,793 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:57:28,794 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:57:33,794 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:57:36,573 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:57:36,574 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:57:36,574 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:57:38,847 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:57:43,848 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:57:44,282 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: partial_history +2024-08-23 15:57:44,284 DEBUG SenderThread:10268 [sender.py:send():382] send: history +2024-08-23 15:57:44,285 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: summary_record +2024-08-23 15:57:44,286 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 15:57:44,350 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json +2024-08-23 15:57:45,351 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:57:49,287 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:57:51,091 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:57:51,573 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:57:51,574 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:57:51,574 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:57:54,761 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:57:59,762 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:58:04,762 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:58:06,573 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:58:06,574 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:58:06,574 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:58:09,773 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:58:14,774 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:58:19,774 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:58:21,092 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:58:21,573 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:58:21,574 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:58:21,574 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:58:25,749 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:58:30,749 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:58:35,750 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:58:36,573 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:58:36,574 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:58:36,574 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:58:40,082 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: partial_history +2024-08-23 15:58:40,085 DEBUG SenderThread:10268 [sender.py:send():382] send: history +2024-08-23 15:58:40,085 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: summary_record +2024-08-23 15:58:40,087 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 15:58:40,378 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json +2024-08-23 15:58:41,087 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:58:41,379 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:58:46,088 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:58:51,088 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:58:51,093 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:58:51,573 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:58:51,574 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:58:51,575 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:58:56,745 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:59:01,746 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:59:06,573 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:59:06,574 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:59:06,575 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:59:06,780 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:59:11,780 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:59:16,781 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:59:21,094 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:59:21,573 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:59:21,575 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:59:21,575 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:59:21,828 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:59:26,828 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:59:31,829 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:59:35,762 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: partial_history +2024-08-23 15:59:35,766 DEBUG SenderThread:10268 [sender.py:send():382] send: history +2024-08-23 15:59:35,767 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: summary_record +2024-08-23 15:59:35,768 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 15:59:36,406 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json +2024-08-23 15:59:36,574 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:59:36,575 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:59:36,575 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:59:37,407 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 15:59:37,777 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:59:42,778 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:59:47,778 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:59:51,095 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 15:59:51,573 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 15:59:51,575 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 15:59:51,575 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 15:59:53,756 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 15:59:58,756 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:00:03,757 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:00:06,574 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 16:00:06,576 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 16:00:06,576 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 16:00:08,768 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:00:13,769 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:00:18,770 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:00:21,095 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 16:00:21,574 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 16:00:21,575 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 16:00:21,575 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 16:00:23,811 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:00:28,812 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:00:31,462 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: partial_history +2024-08-23 16:00:31,464 DEBUG SenderThread:10268 [sender.py:send():382] send: history +2024-08-23 16:00:31,465 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: summary_record +2024-08-23 16:00:31,466 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 16:00:32,435 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json +2024-08-23 16:00:33,435 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 16:00:34,467 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:00:36,574 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 16:00:36,575 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 16:00:36,575 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 16:00:39,823 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:00:44,823 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:00:49,824 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:00:51,096 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 16:00:51,574 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 16:00:51,575 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 16:00:51,576 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 16:00:55,755 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:01:00,755 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:01:05,756 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:01:06,574 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 16:01:06,575 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 16:01:06,576 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 16:01:10,788 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:01:15,789 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:01:20,789 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:01:21,097 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 16:01:21,574 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 16:01:21,576 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 16:01:21,576 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 16:01:26,757 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:01:27,793 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: partial_history +2024-08-23 16:01:27,795 DEBUG SenderThread:10268 [sender.py:send():382] send: history +2024-08-23 16:01:27,796 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: summary_record +2024-08-23 16:01:27,797 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 16:01:28,464 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json +2024-08-23 16:01:29,465 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 16:01:31,798 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:01:36,574 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 16:01:36,576 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 16:01:36,576 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 16:01:37,750 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:01:42,751 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:01:47,751 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:01:51,098 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 16:01:51,574 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 16:01:51,576 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 16:01:51,576 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 16:01:52,757 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:01:57,758 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:02:02,758 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:02:06,575 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 16:02:06,576 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 16:02:06,576 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 16:02:07,803 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:02:12,804 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:02:17,805 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:02:21,099 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 16:02:21,575 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 16:02:21,576 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 16:02:21,576 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 16:02:23,764 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:02:23,996 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: partial_history +2024-08-23 16:02:23,998 DEBUG SenderThread:10268 [sender.py:send():382] send: history +2024-08-23 16:02:23,999 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: summary_record +2024-08-23 16:02:24,000 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 16:02:24,493 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json +2024-08-23 16:02:25,493 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 16:02:29,001 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:02:34,001 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:02:36,575 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 16:02:36,576 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 16:02:36,576 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 16:02:39,804 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:02:44,804 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:02:49,805 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:02:51,124 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 16:02:51,575 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 16:02:51,576 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 16:02:51,576 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 16:02:54,833 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:02:59,834 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:03:04,834 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:03:06,575 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 16:03:06,577 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 16:03:06,577 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 16:03:10,758 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:03:15,758 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:03:20,378 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: partial_history +2024-08-23 16:03:20,381 DEBUG SenderThread:10268 [sender.py:send():382] send: history +2024-08-23 16:03:20,381 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: summary_record +2024-08-23 16:03:20,382 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 16:03:20,522 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json +2024-08-23 16:03:21,125 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 16:03:21,126 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:03:21,523 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 16:03:21,575 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 16:03:21,577 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 16:03:21,577 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 16:03:26,830 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:03:31,830 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:03:36,575 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 16:03:36,577 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 16:03:36,577 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 16:03:37,782 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:03:42,783 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:03:47,783 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:03:51,125 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 16:03:51,575 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 16:03:51,578 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: stop_status +2024-08-23 16:03:51,578 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: stop_status +2024-08-23 16:03:53,769 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:03:58,769 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:03:59,418 DEBUG SenderThread:10268 [sender.py:send():382] send: exit +2024-08-23 16:03:59,418 INFO SenderThread:10268 [sender.py:send_exit():589] handling exit code: 255 +2024-08-23 16:03:59,418 INFO SenderThread:10268 [sender.py:send_exit():591] handling runtime: 1088 +2024-08-23 16:03:59,420 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 16:03:59,420 INFO SenderThread:10268 [sender.py:send_exit():597] send defer +2024-08-23 16:03:59,420 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: defer +2024-08-23 16:03:59,421 INFO HandlerThread:10268 [handler.py:handle_request_defer():172] handle defer: 0 +2024-08-23 16:03:59,421 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: defer +2024-08-23 16:03:59,421 INFO SenderThread:10268 [sender.py:send_request_defer():613] handle sender defer: 0 +2024-08-23 16:03:59,421 INFO SenderThread:10268 [sender.py:transition_state():617] send defer: 1 +2024-08-23 16:03:59,421 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: defer +2024-08-23 16:03:59,421 INFO HandlerThread:10268 [handler.py:handle_request_defer():172] handle defer: 1 +2024-08-23 16:03:59,421 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: defer +2024-08-23 16:03:59,421 INFO SenderThread:10268 [sender.py:send_request_defer():613] handle sender defer: 1 +2024-08-23 16:03:59,421 INFO SenderThread:10268 [sender.py:transition_state():617] send defer: 2 +2024-08-23 16:03:59,421 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: defer +2024-08-23 16:03:59,421 INFO HandlerThread:10268 [handler.py:handle_request_defer():172] handle defer: 2 +2024-08-23 16:03:59,421 INFO HandlerThread:10268 [system_monitor.py:finish():203] Stopping system monitor +2024-08-23 16:03:59,422 INFO HandlerThread:10268 [interfaces.py:finish():202] Joined cpu monitor +2024-08-23 16:03:59,422 DEBUG SystemMonitor:10268 [system_monitor.py:_start():179] Finished system metrics aggregation loop +2024-08-23 16:03:59,422 INFO HandlerThread:10268 [interfaces.py:finish():202] Joined disk monitor +2024-08-23 16:03:59,422 DEBUG SystemMonitor:10268 [system_monitor.py:_start():183] Publishing last batch of metrics +2024-08-23 16:03:59,455 INFO HandlerThread:10268 [interfaces.py:finish():202] Joined gpu monitor +2024-08-23 16:03:59,455 INFO HandlerThread:10268 [interfaces.py:finish():202] Joined memory monitor +2024-08-23 16:03:59,455 INFO HandlerThread:10268 [interfaces.py:finish():202] Joined network monitor +2024-08-23 16:03:59,456 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: defer +2024-08-23 16:03:59,456 INFO SenderThread:10268 [sender.py:send_request_defer():613] handle sender defer: 2 +2024-08-23 16:03:59,456 INFO SenderThread:10268 [sender.py:transition_state():617] send defer: 3 +2024-08-23 16:03:59,456 DEBUG SenderThread:10268 [sender.py:send():382] send: stats +2024-08-23 16:03:59,456 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: defer +2024-08-23 16:03:59,456 INFO HandlerThread:10268 [handler.py:handle_request_defer():172] handle defer: 3 +2024-08-23 16:03:59,458 DEBUG SenderThread:10268 [sender.py:send():382] send: history +2024-08-23 16:03:59,458 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: summary_record +2024-08-23 16:03:59,459 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 16:03:59,459 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: defer +2024-08-23 16:03:59,459 INFO SenderThread:10268 [sender.py:send_request_defer():613] handle sender defer: 3 +2024-08-23 16:03:59,459 INFO SenderThread:10268 [sender.py:transition_state():617] send defer: 4 +2024-08-23 16:03:59,459 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: defer +2024-08-23 16:03:59,459 INFO HandlerThread:10268 [handler.py:handle_request_defer():172] handle defer: 4 +2024-08-23 16:03:59,459 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: defer +2024-08-23 16:03:59,459 INFO SenderThread:10268 [sender.py:send_request_defer():613] handle sender defer: 4 +2024-08-23 16:03:59,459 INFO SenderThread:10268 [sender.py:transition_state():617] send defer: 5 +2024-08-23 16:03:59,460 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: defer +2024-08-23 16:03:59,460 INFO HandlerThread:10268 [handler.py:handle_request_defer():172] handle defer: 5 +2024-08-23 16:03:59,460 DEBUG SenderThread:10268 [sender.py:send():382] send: summary +2024-08-23 16:03:59,461 INFO SenderThread:10268 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-23 16:03:59,461 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: defer +2024-08-23 16:03:59,461 INFO SenderThread:10268 [sender.py:send_request_defer():613] handle sender defer: 5 +2024-08-23 16:03:59,461 INFO SenderThread:10268 [sender.py:transition_state():617] send defer: 6 +2024-08-23 16:03:59,462 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: defer +2024-08-23 16:03:59,462 INFO HandlerThread:10268 [handler.py:handle_request_defer():172] handle defer: 6 +2024-08-23 16:03:59,462 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: defer +2024-08-23 16:03:59,462 INFO SenderThread:10268 [sender.py:send_request_defer():613] handle sender defer: 6 +2024-08-23 16:03:59,462 INFO SenderThread:10268 [sender.py:transition_state():617] send defer: 7 +2024-08-23 16:03:59,462 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:03:59,462 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: defer +2024-08-23 16:03:59,462 INFO HandlerThread:10268 [handler.py:handle_request_defer():172] handle defer: 7 +2024-08-23 16:03:59,462 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: defer +2024-08-23 16:03:59,462 INFO SenderThread:10268 [sender.py:send_request_defer():613] handle sender defer: 7 +2024-08-23 16:03:59,544 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json +2024-08-23 16:04:00,418 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-23 16:04:00,545 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 16:04:01,817 INFO SenderThread:10268 [sender.py:transition_state():617] send defer: 8 +2024-08-23 16:04:01,817 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: poll_exit +2024-08-23 16:04:01,817 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: defer +2024-08-23 16:04:01,818 INFO HandlerThread:10268 [handler.py:handle_request_defer():172] handle defer: 8 +2024-08-23 16:04:01,818 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: defer +2024-08-23 16:04:01,818 INFO SenderThread:10268 [sender.py:send_request_defer():613] handle sender defer: 8 +2024-08-23 16:04:01,818 INFO SenderThread:10268 [job_builder.py:build():296] Attempting to build job artifact +2024-08-23 16:04:01,819 INFO SenderThread:10268 [job_builder.py:_get_source_type():426] is repo sourced job +2024-08-23 16:04:01,834 INFO SenderThread:10268 [job_builder.py:build():402] adding wandb-job metadata file +2024-08-23 16:04:01,842 INFO SenderThread:10268 [sender.py:transition_state():617] send defer: 9 +2024-08-23 16:04:01,843 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: defer +2024-08-23 16:04:01,843 INFO HandlerThread:10268 [handler.py:handle_request_defer():172] handle defer: 9 +2024-08-23 16:04:01,844 DEBUG SenderThread:10268 [sender.py:send():382] send: artifact +2024-08-23 16:04:02,419 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-23 16:04:02,546 INFO Thread-12 :10268 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 16:04:03,592 INFO wandb-upload_1:10268 [upload_job.py:push():86] Skipped uploading /singularity_home/.local/share/wandb/artifacts/staging/tmppgrsaqmr +2024-08-23 16:04:04,595 INFO wandb-upload_0:10268 [upload_job.py:push():89] Uploaded file /singularity_home/.local/share/wandb/artifacts/staging/tmp5zebfo2_ +2024-08-23 16:04:06,794 INFO SenderThread:10268 [sender.py:send_artifact():1494] sent artifact job-https___github.com_cl-tohoku_llm-recipes-failab-m1-yans.git_examples_finetuning.py - {'id': 'QXJ0aWZhY3Q6MTE2MTk4ODkxMQ==', 'state': 'PENDING', 'artifactSequence': {'id': 'QXJ0aWZhY3RDb2xsZWN0aW9uOjQxNjQ1ODQ1MA==', 'latestArtifact': {'id': 'QXJ0aWZhY3Q6MTE2MTk3MTc1OA==', 'versionIndex': 0}}} +2024-08-23 16:04:06,794 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: defer +2024-08-23 16:04:06,794 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: status_report +2024-08-23 16:04:06,795 INFO SenderThread:10268 [sender.py:send_request_defer():613] handle sender defer: 9 +2024-08-23 16:04:06,795 INFO SenderThread:10268 [dir_watcher.py:finish():358] shutting down directory watcher +2024-08-23 16:04:07,420 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: keepalive +2024-08-23 16:04:07,547 INFO SenderThread:10268 [dir_watcher.py:finish():388] scan: /project/wandb/run-20240823_154550-khhvuked/files +2024-08-23 16:04:07,547 INFO SenderThread:10268 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240823_154550-khhvuked/files/requirements.txt requirements.txt +2024-08-23 16:04:07,548 INFO SenderThread:10268 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240823_154550-khhvuked/files/config.yaml config.yaml +2024-08-23 16:04:07,549 INFO SenderThread:10268 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240823_154550-khhvuked/files/wandb-metadata.json wandb-metadata.json +2024-08-23 16:04:07,549 INFO SenderThread:10268 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json wandb-summary.json +2024-08-23 16:04:07,550 INFO SenderThread:10268 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240823_154550-khhvuked/files/output.log output.log +2024-08-23 16:04:07,552 INFO SenderThread:10268 [sender.py:transition_state():617] send defer: 10 +2024-08-23 16:04:07,552 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: poll_exit +2024-08-23 16:04:07,553 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: defer +2024-08-23 16:04:07,554 INFO HandlerThread:10268 [handler.py:handle_request_defer():172] handle defer: 10 +2024-08-23 16:04:07,554 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: defer +2024-08-23 16:04:07,554 INFO SenderThread:10268 [sender.py:send_request_defer():613] handle sender defer: 10 +2024-08-23 16:04:07,554 INFO SenderThread:10268 [file_pusher.py:finish():172] shutting down file pusher +2024-08-23 16:04:07,947 INFO wandb-upload_1:10268 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240823_154550-khhvuked/files/requirements.txt +2024-08-23 16:04:08,019 INFO wandb-upload_2:10268 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240823_154550-khhvuked/files/wandb-summary.json +2024-08-23 16:04:08,023 INFO wandb-upload_3:10268 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240823_154550-khhvuked/files/output.log +2024-08-23 16:04:08,033 INFO wandb-upload_0:10268 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240823_154550-khhvuked/files/config.yaml +2024-08-23 16:04:08,233 INFO Thread-11 (_thread_body):10268 [sender.py:transition_state():617] send defer: 11 +2024-08-23 16:04:08,234 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: defer +2024-08-23 16:04:08,234 INFO HandlerThread:10268 [handler.py:handle_request_defer():172] handle defer: 11 +2024-08-23 16:04:08,234 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: defer +2024-08-23 16:04:08,234 INFO SenderThread:10268 [sender.py:send_request_defer():613] handle sender defer: 11 +2024-08-23 16:04:08,234 INFO SenderThread:10268 [file_pusher.py:join():178] waiting for file pusher +2024-08-23 16:04:08,234 INFO SenderThread:10268 [sender.py:transition_state():617] send defer: 12 +2024-08-23 16:04:08,234 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: defer +2024-08-23 16:04:08,234 INFO HandlerThread:10268 [handler.py:handle_request_defer():172] handle defer: 12 +2024-08-23 16:04:08,234 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: defer +2024-08-23 16:04:08,235 INFO SenderThread:10268 [sender.py:send_request_defer():613] handle sender defer: 12 +2024-08-23 16:04:08,235 INFO SenderThread:10268 [file_stream.py:finish():595] file stream finish called +2024-08-23 16:04:08,420 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-23 16:04:08,766 INFO SenderThread:10268 [file_stream.py:finish():599] file stream finish is done +2024-08-23 16:04:08,766 INFO SenderThread:10268 [sender.py:transition_state():617] send defer: 13 +2024-08-23 16:04:08,766 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: poll_exit +2024-08-23 16:04:08,767 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: defer +2024-08-23 16:04:08,767 INFO HandlerThread:10268 [handler.py:handle_request_defer():172] handle defer: 13 +2024-08-23 16:04:08,767 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: defer +2024-08-23 16:04:08,767 INFO SenderThread:10268 [sender.py:send_request_defer():613] handle sender defer: 13 +2024-08-23 16:04:08,767 INFO SenderThread:10268 [sender.py:transition_state():617] send defer: 14 +2024-08-23 16:04:08,767 DEBUG SenderThread:10268 [sender.py:send():382] send: final +2024-08-23 16:04:08,767 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: defer +2024-08-23 16:04:08,767 DEBUG SenderThread:10268 [sender.py:send():382] send: footer +2024-08-23 16:04:08,768 INFO HandlerThread:10268 [handler.py:handle_request_defer():172] handle defer: 14 +2024-08-23 16:04:08,768 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: defer +2024-08-23 16:04:08,768 INFO SenderThread:10268 [sender.py:send_request_defer():613] handle sender defer: 14 +2024-08-23 16:04:08,768 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-23 16:04:08,768 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: poll_exit +2024-08-23 16:04:08,768 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-23 16:04:08,769 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: server_info +2024-08-23 16:04:08,769 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: poll_exit +2024-08-23 16:04:08,769 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: server_info +2024-08-23 16:04:08,770 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: get_summary +2024-08-23 16:04:08,771 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: sampled_history +2024-08-23 16:04:08,772 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-23 16:04:08,773 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: job_info +2024-08-23 16:04:08,932 DEBUG SenderThread:10268 [sender.py:send_request():409] send_request: job_info +2024-08-23 16:04:08,932 INFO MainThread:10268 [wandb_run.py:_footer_history_summary_info():3866] rendering history +2024-08-23 16:04:08,933 INFO MainThread:10268 [wandb_run.py:_footer_history_summary_info():3898] rendering summary +2024-08-23 16:04:08,933 INFO MainThread:10268 [wandb_run.py:_footer_sync_info():3825] logging synced files +2024-08-23 16:04:08,933 DEBUG HandlerThread:10268 [handler.py:handle_request():146] handle_request: shutdown +2024-08-23 16:04:08,934 INFO HandlerThread:10268 [handler.py:finish():869] shutting down handler +2024-08-23 16:04:09,773 INFO WriterThread:10268 [datastore.py:close():296] close: /project/wandb/run-20240823_154550-khhvuked/run-khhvuked.wandb +2024-08-23 16:04:09,932 INFO SenderThread:10268 [sender.py:finish():1572] shutting down sender +2024-08-23 16:04:09,932 INFO SenderThread:10268 [file_pusher.py:finish():172] shutting down file pusher +2024-08-23 16:04:09,932 INFO SenderThread:10268 [file_pusher.py:join():178] waiting for file pusher diff --git a/wandb/run-20240823_154550-khhvuked/logs/debug.log b/wandb/run-20240823_154550-khhvuked/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..9d4656e2609870e861fc30738198005f7e651fa8 --- /dev/null +++ b/wandb/run-20240823_154550-khhvuked/logs/debug.log @@ -0,0 +1,30 @@ +2024-08-23 15:45:50,522 INFO MainThread:10197 [wandb_setup.py:_flush():76] Current SDK version is 0.16.3 +2024-08-23 15:45:50,522 INFO MainThread:10197 [wandb_setup.py:_flush():76] Configure stats pid to 10197 +2024-08-23 15:45:50,522 INFO MainThread:10197 [wandb_setup.py:_flush():76] Loading settings from /singularity_home/.config/wandb/settings +2024-08-23 15:45:50,522 INFO MainThread:10197 [wandb_setup.py:_flush():76] Loading settings from /project/wandb/settings +2024-08-23 15:45:50,522 INFO MainThread:10197 [wandb_setup.py:_flush():76] Loading settings from environment variables: {'api_key': '***REDACTED***', 'run_notes': 'Train sample'} +2024-08-23 15:45:50,522 INFO MainThread:10197 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False} +2024-08-23 15:45:50,522 INFO MainThread:10197 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'examples/finetuning.py', 'program_abspath': '/project/examples/finetuning.py', 'program': '/project/examples/finetuning.py'} +2024-08-23 15:45:50,522 INFO MainThread:10197 [wandb_init.py:_log_setup():526] Logging user logs to /project/wandb/run-20240823_154550-khhvuked/logs/debug.log +2024-08-23 15:45:50,523 INFO MainThread:10197 [wandb_init.py:_log_setup():527] Logging internal logs to /project/wandb/run-20240823_154550-khhvuked/logs/debug-internal.log +2024-08-23 15:45:50,523 INFO MainThread:10197 [wandb_init.py:init():566] calling init triggers +2024-08-23 15:45:50,523 INFO MainThread:10197 [wandb_init.py:init():573] wandb.init called with sweep_config: {} +config: {'sharding_strategy': 'FULL_SHARD', 'checkpoint_type': 'LOCAL_STATE_DICT', 'fsdp_activation_checkpointing': True, 'fsdp_cpu_offload': False, 'low_cpu_fsdp': False, 'no_meta_device': False, 'data_path': None, 'split': '969, 30, 1', 'train_data_path': ['1754785366', '/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document', '28623823675', '/project/datas/llm-jp-corpus-v2/ja-cc/level0/data_text_document'], 'valid_data_path': ['1754785366', '/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document'], 'test_data_path': ['1754785366', '/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document'], 'data_cache_path': None, 'vocab_size': None, 'vocab_file': None, 'merge_file': None, 'seq_length': 2048, 'num_workers': 2, 'tokenizer_type': 'HFPreTrainedTokenizer', 'tokenizer_model': '/share/pretrained_lm/Qwen/Qwen2-0.5B', 'reset_position_ids': False, 'reset_attention_mask': False, 'eod_mask_loss': False, 'retro_return_doc_ids': False, 'short_seq_prob': 0.1, 'vocab_extra_ids': 0, 'seed': 1234, 'use_mpi': False, 'wandb_entity': 'iwakawa-koichi-q5-tohoku-nlp6723', 'wandb_name': 'Qwen2-0.5b-0.2_train_2024-08-23-15:45:37', 'wandb_project': 'llm_tutorial-0.2', 'quantization': False, 'use_freeze_layers': False, 'freeze_layers': None, 'bf16': True, 'fp16': False, 'mixed_precision': True, 'param_dtype': None, 'load': '/work/llm_recipes/models/Qwen2-0.5b-0.2', 'save': '/work/llm_recipes/models/Qwen2-0.5b-0.2', 'base_model': '/share/pretrained_lm/Qwen/Qwen2-0.5B', 'use_better_transformer': False, 'grad_clip_norm': 1.0, 'eval_interval': 10, 'save_interval': 10, 'eval_iters': 10, 'optimizer': 'anyprecision', 'lr': 2e-05, 'lr_decay_style': 'cosine', 'lr_decay_iters': 7500, 'lr_warmup_iters': 500, 'min_lr': 1e-06, 'train_iters': 7500, 'train_samples': None, 'global_batch_size': 320, 'micro_batch_size': 1, 'make_vocab_size_divisible_by': 128, 'sliding_window_size': 131072, 'skip_batch': None, 'no_save_optimizer_state': False, 'continual_pretraining': False, 'instruction_tuning': False, 'direct_preference_optimization': False, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'weight_decay': 0.1, 'adam_beta1': 0.9, 'adam_beta2': 0.95, 'adam_eps': 1e-06, 'hf_transformer_model_dir': None, 'instruction_train_data_path': None, 'instruction_valid_data_path': None, 'epoch': None, 'instruction_dataset_size': None, 'save_sampler_state': False, 'label_smoothing': 0.0, 'save_n_checkpoints': 10, 'hf_repo_id': 'koichi12/Qwen2-0.5b-0.2', 'create_public_hf_repo': False, 'upload_all_checkpoints_to_hf': True, 'hf_upload_retry_limit': 2, 'exit_duration_in_mins': None, 'source_key': None, 'target_key': None, 'attn_implementation': 'flash_attention_2', 'efficient_instruction_tuning': False, 'remove_padding_masking': False, 'save_start_iter': None, 'valid_micro_batch_size': 1, 'rank': 0, 'world_size': 1, 'padded_vocab_size': 151680, 'gradient_accumulation_steps': 320} +2024-08-23 15:45:50,523 INFO MainThread:10197 [wandb_init.py:init():616] starting backend +2024-08-23 15:45:50,523 INFO MainThread:10197 [wandb_init.py:init():620] setting up manager +2024-08-23 15:45:50,527 INFO MainThread:10197 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-08-23 15:45:50,528 INFO MainThread:10197 [wandb_init.py:init():628] backend started and connected +2024-08-23 15:45:50,533 INFO MainThread:10197 [wandb_init.py:init():720] updated telemetry +2024-08-23 15:45:50,544 INFO MainThread:10197 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout +2024-08-23 15:45:50,990 INFO MainThread:10197 [wandb_run.py:_on_init():2262] communicating current version +2024-08-23 15:45:51,014 INFO MainThread:10197 [wandb_run.py:_on_init():2271] got version response upgrade_message: "wandb version 0.17.7 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2024-08-23 15:45:51,014 INFO MainThread:10197 [wandb_init.py:init():804] starting run threads in backend +2024-08-23 15:45:51,108 INFO MainThread:10197 [wandb_run.py:_console_start():2241] atexit reg +2024-08-23 15:45:51,109 INFO MainThread:10197 [wandb_run.py:_redirect():2096] redirect: wrap_raw +2024-08-23 15:45:51,109 INFO MainThread:10197 [wandb_run.py:_redirect():2161] Wrapping output streams. +2024-08-23 15:45:51,109 INFO MainThread:10197 [wandb_run.py:_redirect():2186] Redirects installed. +2024-08-23 15:45:51,110 INFO MainThread:10197 [wandb_init.py:init():847] run started, returning control to user process +2024-08-23 15:45:58,540 INFO MainThread:10197 [wandb_run.py:_config_callback():1343] config_cb None None {'model_architecture': 'Qwen2ForCausalLM', 'activation_function': 'silu', 'hidden_size': 896, 'model_type': 'qwen2', 'max_position_embeddings': 2048, 'num_attention_heads': 14, 'num_hidden_layers': 24} +2024-08-23 15:45:58,540 INFO MainThread:10197 [wandb_run.py:_config_callback():1343] config_cb None None {'world_size': 1} +2024-08-23 16:04:09,934 WARNING MsgRouterThr:10197 [router.py:message_loop():77] message_loop has been closed diff --git a/wandb/run-20240823_154550-khhvuked/run-khhvuked.wandb b/wandb/run-20240823_154550-khhvuked/run-khhvuked.wandb new file mode 100644 index 0000000000000000000000000000000000000000..4ed1512cad26d2dccb10a1ecc1bed94de46cd017 Binary files /dev/null and b/wandb/run-20240823_154550-khhvuked/run-khhvuked.wandb differ diff --git a/wandb/run-20240824_201431-erlbpc25/files/config.yaml b/wandb/run-20240824_201431-erlbpc25/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..502ea693007c02aa83b1bb29d6f41bd76f3b26f5 --- /dev/null +++ b/wandb/run-20240824_201431-erlbpc25/files/config.yaml @@ -0,0 +1,321 @@ +wandb_version: 1 + +sharding_strategy: + desc: null + value: NO_SHARD +checkpoint_type: + desc: null + value: LOCAL_STATE_DICT +fsdp_activation_checkpointing: + desc: null + value: true +fsdp_cpu_offload: + desc: null + value: false +low_cpu_fsdp: + desc: null + value: false +no_meta_device: + desc: null + value: false +data_path: + desc: null + value: null +split: + desc: null + value: 969, 30, 1 +train_data_path: + desc: null + value: + - '1754785366' + - /project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document + - '28623823675' + - /project/datas/llm-jp-corpus-v2/ja-cc/level0/data_text_document +valid_data_path: + desc: null + value: + - '1205770' + - /work/llm_recipes/datasets/bin/baseline/llm_jp_corpus_v2_ja_wiki_validation_0/data_text_document +test_data_path: + desc: null + value: + - '1205770' + - /work/llm_recipes/datasets/bin/baseline/llm_jp_corpus_v2_ja_wiki_validation_0/data_text_document +data_cache_path: + desc: null + value: null +vocab_size: + desc: null + value: null +vocab_file: + desc: null + value: null +merge_file: + desc: null + value: null +seq_length: + desc: null + value: 1024 +num_workers: + desc: null + value: 4 +tokenizer_type: + desc: null + value: HFPreTrainedTokenizer +tokenizer_model: + desc: null + value: /share/pretrained_lm/Qwen/Qwen2-0.5B +reset_position_ids: + desc: null + value: false +reset_attention_mask: + desc: null + value: false +eod_mask_loss: + desc: null + value: false +retro_return_doc_ids: + desc: null + value: false +short_seq_prob: + desc: null + value: 0.1 +vocab_extra_ids: + desc: null + value: 0 +seed: + desc: null + value: 1234 +use_mpi: + desc: null + value: false +wandb_entity: + desc: null + value: iwakawa-koichi-q5-tohoku-nlp6723 +wandb_name: + desc: null + value: yans-baseline-qwen2-0.5B_train_2024-08-24-20:14:16 +wandb_project: + desc: null + value: yans_experiment +quantization: + desc: null + value: false +use_freeze_layers: + desc: null + value: false +freeze_layers: + desc: null + value: null +bf16: + desc: null + value: true +fp16: + desc: null + value: false +mixed_precision: + desc: null + value: true +param_dtype: + desc: null + value: null +load: + desc: null + value: /work/llm_recipes/models/yans-baseline-qwen2-0.5B +save: + desc: null + value: /work/llm_recipes/models/yans-baseline-qwen2-0.5B +base_model: + desc: null + value: /share/pretrained_lm/Qwen/Qwen2-0.5B +use_better_transformer: + desc: null + value: false +grad_clip_norm: + desc: null + value: 1.0 +eval_interval: + desc: null + value: 200 +save_interval: + desc: null + value: 200 +eval_iters: + desc: null + value: 10 +optimizer: + desc: null + value: anyprecision +lr: + desc: null + value: 3.5e-06 +lr_decay_style: + desc: null + value: cosine +lr_decay_iters: + desc: null + value: 23178 +lr_warmup_iters: + desc: null + value: 500 +min_lr: + desc: null + value: 3.5e-07 +train_iters: + desc: null + value: 23178 +train_samples: + desc: null + value: null +global_batch_size: + desc: null + value: 1280 +micro_batch_size: + desc: null + value: 16 +make_vocab_size_divisible_by: + desc: null + value: 128 +sliding_window_size: + desc: null + value: 131072 +skip_batch: + desc: null + value: null +no_save_optimizer_state: + desc: null + value: false +continual_pretraining: + desc: null + value: false +instruction_tuning: + desc: null + value: false +direct_preference_optimization: + desc: null + value: false +attention_dropout: + desc: null + value: 0.1 +hidden_dropout: + desc: null + value: 0.1 +weight_decay: + desc: null + value: 0.1 +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.95 +adam_eps: + desc: null + value: 1.0e-08 +hf_transformer_model_dir: + desc: null + value: null +instruction_train_data_path: + desc: null + value: null +instruction_valid_data_path: + desc: null + value: null +epoch: + desc: null + value: null +instruction_dataset_size: + desc: null + value: null +save_sampler_state: + desc: null + value: false +label_smoothing: + desc: null + value: 0.0 +save_n_checkpoints: + desc: null + value: 10 +hf_repo_id: + desc: null + value: koichi12/yans-baseline-qwen2-0.5B +create_public_hf_repo: + desc: null + value: false +upload_all_checkpoints_to_hf: + desc: null + value: true +hf_upload_retry_limit: + desc: null + value: 2 +exit_duration_in_mins: + desc: null + value: null +source_key: + desc: null + value: null +target_key: + desc: null + value: null +attn_implementation: + desc: null + value: flash_attention_2 +efficient_instruction_tuning: + desc: null + value: false +remove_padding_masking: + desc: null + value: false +save_start_iter: + desc: null + value: null +valid_micro_batch_size: + desc: null + value: 1 +rank: + desc: null + value: 0 +world_size: + desc: null + value: 8 +padded_vocab_size: + desc: null + value: 151680 +gradient_accumulation_steps: + desc: null + value: 10 +_wandb: + desc: null + value: + python_version: 3.10.12 + cli_version: 0.16.3 + framework: huggingface + huggingface_version: 4.43.3 + is_jupyter_run: false + is_kaggle_kernel: false + start_time: 1724498071.869963 + t: + 1: + - 1 + - 11 + - 49 + - 55 + - 71 + - 105 + 2: + - 1 + - 11 + - 49 + - 55 + - 71 + - 105 + 3: + - 13 + - 16 + - 23 + 4: 3.10.12 + 5: 0.16.3 + 6: 4.43.3 + 8: + - 5 + 13: linux-x86_64 diff --git a/wandb/run-20240824_201431-erlbpc25/files/output.log b/wandb/run-20240824_201431-erlbpc25/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..29ec19129308866369d74ef906daf4374a816b20 --- /dev/null +++ b/wandb/run-20240824_201431-erlbpc25/files/output.log @@ -0,0 +1,51 @@ +Created Hugging Face repository with ID koichi12/yans-baseline-qwen2-0.5B. +Clearing GPU cache for all ranks +--> Running with torch torch_distributed debug set to detail +File not found: /work/llm_recipes/models/yans-baseline-qwen2-0.5B/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-baseline-qwen2-0.5B/latest_iteration.txt +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +File not found: /work/llm_recipes/models/yans-baseline-qwen2-0.5B/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-baseline-qwen2-0.5B/latest_iteration.txt +File not found: /work/llm_recipes/models/yans-baseline-qwen2-0.5B/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-baseline-qwen2-0.5B/latest_iteration.txt +No checkpoint found in /work/llm_recipes/models/yans-baseline-qwen2-0.5B, skipping model loading +--> Model /share/pretrained_lm/Qwen/Qwen2-0.5B +--> /share/pretrained_lm/Qwen/Qwen2-0.5B has 494.032768 Million params +BFloat16 enabled for mixed precision - using bfSixteen policy +Let split = None +Unable to save the indexes because path_to_cache is None +Traceback (most recent call last): + File "/project/megatron_lm/megatron/core/datasets/blended_megatron_dataset_builder.py", line 270, in build_generic_dataset + dataset = cls(*args) + File "/project/megatron_lm/megatron/core/datasets/indexed_dataset.py", line 359, in __init__ + self.initialize(path_prefix, multimodal) + File "/project/megatron_lm/megatron/core/datasets/indexed_dataset.py", line 374, in initialize + self.index = _IndexReader(get_idx_path(self.path_prefix), self.multimodal) + File "/project/megatron_lm/megatron/core/datasets/indexed_dataset.py", line 233, in __init__ + with open(idx_path, "rb") as stream: +FileNotFoundError: [Errno 2] No such file or directory: '/work/llm_recipes/datasets/bin/baseline/llm_jp_corpus_v2_ja_wiki_validation_0/data_text_document.idx' +The above exception was the direct cause of the following exception: +Traceback (most recent call last): + File "/project/examples/finetuning.py", line 13, in + main() + File "/project/src/llama_recipes/finetuning.py", line 162, in main + train_dataset, validation_dataset, test_dataset = build_train_valid_test_datasets() + File "/project/src/llama_recipes/datasets/pretrain_dataset.py", line 76, in build_train_valid_test_datasets + return train_valid_test_datasets_provider(train_val_test_num_samples) + File "/project/src/llama_recipes/datasets/pretrain_dataset.py", line 46, in train_valid_test_datasets_provider + ).build() + File "/project/megatron_lm/megatron/core/datasets/blended_megatron_dataset_builder.py", line 56, in build + return self._build_blended_dataset_splits() + File "/project/megatron_lm/megatron/core/datasets/blended_megatron_dataset_builder.py", line 162, in _build_blended_dataset_splits + self._build_megatron_dataset_splits( + File "/project/megatron_lm/megatron/core/datasets/blended_megatron_dataset_builder.py", line 199, in _build_megatron_dataset_splits + indexed_dataset = self.build_generic_dataset( + File "/project/megatron_lm/megatron/core/datasets/blended_megatron_dataset_builder.py", line 278, in build_generic_dataset + raise Exception(log) from err +Exception: Failed to write dataset materials to the data cache directory. Please supply a directory to which you have write access via the path_to_cache attribute in BlendedMegatronDatasetConfig and retry. Refer to the preserved traceback above for more information. +--> applying fsdp activation checkpointing... + > datasets target sizes (minimum size): + train: 29667840 + validation: 1484800 + test: 12800 +> building train, validation, and test datasets for GPT ... \ No newline at end of file diff --git a/wandb/run-20240824_201431-erlbpc25/files/requirements.txt b/wandb/run-20240824_201431-erlbpc25/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..34a2774e444bdc395654ccf8ce6da6833c7bc1ee --- /dev/null +++ b/wandb/run-20240824_201431-erlbpc25/files/requirements.txt @@ -0,0 +1,375 @@ +absl-py==2.1.0 +accelerate==0.23.0 +aiohttp==3.9.1 +aiosignal==1.3.1 +annotated-types==0.6.0 +antlr4-python3-runtime==4.9.3 +anyio==4.4.0 +apex==0.1 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==23.1.0 +astroid==3.2.4 +asttokens==2.4.1 +astunparse==1.6.3 +async-timeout==4.0.3 +attrs==23.2.0 +audioread==3.0.1 +beautifulsoup4==4.12.3 +bert-score==0.3.13 +bleach==6.1.0 +blis==0.7.11 +build==1.2.1 +cachecontrol==0.14.0 +cachetools==5.3.2 +catalogue==2.0.10 +certifi==2024.2.2 +cffi==1.16.0 +chardet==5.2.0 +charset-normalizer==3.3.2 +cleo==2.1.0 +click==8.1.7 +cloudpathlib==0.16.0 +cloudpickle==3.0.0 +cmake==3.28.1 +colorama==0.4.6 +comm==0.2.1 +confection==0.1.4 +contourpy==1.2.0 +cramjam==2.8.3 +crashtest==0.4.1 +cryptography==43.0.0 +cubinlinker==0.3.0+2.g405ac64 +cuda-python==12.3.0rc4+9.gdb8c48a.dirty +cudf==23.12.0 +cugraph-dgl==23.12.0 +cugraph-service-client==23.12.0 +cugraph-service-server==23.12.0 +cugraph==23.12.0 +cuml==23.12.0 +cupy-cuda12x==12.3.0 +cycler==0.12.1 +cymem==2.0.8 +cython==3.0.8 +dask-cuda==23.12.0 +dask-cudf==23.12.0 +dask==2023.11.0 +dataclasses-json==0.6.7 +dataproperty==1.0.1 +datasets==2.20.0 +debugpy==1.8.1 +decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.8 +distlib==0.3.8 +distributed==2023.11.0 +distro==1.9.0 +dm-tree==0.1.8 +docker-pycreds==0.4.0 +dulwich==0.21.7 +einops==0.7.0 +emoji==2.12.1 +entmax==1.3 +evaluate==0.4.2 +exceptiongroup==1.2.0 +execnet==2.0.2 +executing==2.0.1 +expecttest==0.1.3 +fastjsonschema==2.19.1 +fastparquet==2023.10.1 +fastrlock==0.8.2 +filelock==3.13.1 +flash-attn==2.4.2 +fonttools==4.48.1 +frozenlist==1.4.1 +fsspec==2023.12.2 +fugashi==1.3.2 +fuzzywuzzy==0.18.0 +gast==0.5.4 +gitdb==4.0.11 +gitpython==3.1.43 +google-auth-oauthlib==0.4.6 +google-auth==2.27.0 +graphsurgeon==0.4.6 +greenlet==3.0.3 +grpcio==1.60.1 +h11==0.14.0 +httpcore==1.0.5 +httpx==0.27.0 +huggingface-hub==0.24.5 +hydra-core==1.3.2 +hypothesis==5.35.1 +idna==3.6 +importlib-metadata==7.0.1 +iniconfig==2.0.0 +installer==0.7.0 +intel-openmp==2021.4.0 +ipadic==1.0.0 +ipykernel==6.29.2 +ipython-genutils==0.2.0 +ipython==8.21.0 +isort==5.13.2 +jaraco.classes==3.4.0 +jedi==0.19.1 +jeepney==0.8.0 +jinja2==3.1.3 +jiter==0.5.0 +joblib==1.3.2 +json5==0.9.14 +jsonargparse==3.13.1 +jsonlines==4.0.0 +jsonnet==0.19.1 +jsonpatch==1.33 +jsonpointer==3.0.0 +jsonschema-specifications==2023.12.1 +jsonschema==4.21.1 +jupyter-client==8.6.0 +jupyter-core==5.7.1 +jupyter-tensorboard==0.2.0 +jupyterlab-pygments==0.3.0 +jupyterlab-server==1.2.0 +jupyterlab==2.3.2 +jupytext==1.16.1 +keyring==24.3.1 +kiwisolver==1.4.5 +langchain-community==0.2.12 +langchain-core==0.2.31 +langchain-huggingface==0.0.2 +langchain-openai==0.1.21 +langchain-text-splitters==0.2.2 +langchain==0.2.13 +langcodes==3.3.0 +langsmith==0.1.99 +lazy-loader==0.3 +levenshtein==0.25.1 +librosa==0.10.1 +lightning-utilities==0.11.6 +llm-jp-eval==1.4.0 +llvmlite==0.40.1 +lm-eval==0.3.0 +locket==1.0.0 +logzero==1.7.0 +lxml==5.2.2 +markdown-it-py==3.0.0 +markdown==3.5.2 +markupsafe==2.1.4 +marshmallow==3.21.3 +matplotlib-inline==0.1.6 +matplotlib==3.8.2 +mbstrdecoder==1.1.3 +mccabe==0.7.0 +mdit-py-plugins==0.4.0 +mdurl==0.1.2 +mecab-python3==1.0.6 +mistune==3.0.2 +mkl-devel==2021.1.1 +mkl-include==2021.1.1 +mkl==2021.1.1 +mock==5.1.0 +mojimoji==0.0.13 +more-itertools==9.1.0 +mpmath==1.3.0 +msgpack==1.0.7 +multidict==6.0.4 +multiprocess==0.70.16 +murmurhash==1.0.10 +mypy-extensions==1.0.0 +nbclient==0.9.0 +nbconvert==7.16.0 +nbformat==5.9.2 +neologdn==0.5.3 +nest-asyncio==1.6.0 +networkx==2.6.3 +ninja==1.11.1.1 +nltk==3.8.1 +notebook==6.4.10 +numba==0.57.1+1.g1ff679645 +numexpr==2.10.1 +numpy==1.24.4 +nvfuser==0.1.4a0+d0bb811 +nvidia-dali-cuda120==1.34.0 +nvidia-pyindex==1.0.9 +nvtx==0.2.5 +oauthlib==3.2.2 +omegaconf==2.3.0 +onnx==1.15.0rc2 +openai==1.40.6 +opencv==4.7.0 +optree==0.10.0 +orjson==3.10.7 +packaging==23.2 +pandas==2.2.2 +pandocfilters==1.5.1 +parso==0.8.3 +partd==1.4.1 +pathvalidate==3.2.0 +peft==0.5.0 +pexpect==4.9.0 +pillow==10.2.0 +pip==24.0 +pkginfo==1.11.1 +plac==1.4.3 +platformdirs==4.2.0 +pluggy==1.4.0 +ply==3.11 +poetry-core==1.9.0 +poetry-plugin-export==1.8.0 +poetry==1.8.3 +polygraphy==0.49.4 +pooch==1.8.0 +portalocker==2.10.1 +preshed==3.0.9 +prettytable==3.9.0 +prometheus-client==0.19.0 +prompt-toolkit==3.0.43 +protobuf==4.24.4 +psutil==5.9.4 +ptxcompiler==0.8.1+2.g0d406d6 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow-hotfix==0.6 +pyarrow==15.0.2 +pyasn1-modules==0.3.0 +pyasn1==0.5.1 +pybind11-global==2.11.1 +pybind11==2.11.1 +pycocotools==2.0+nv0.8.0 +pycountry==24.6.1 +pycparser==2.21 +pydantic-core==2.16.2 +pydantic==2.6.1 +pygments==2.17.2 +pylibcugraph==23.12.0 +pylibcugraphops==23.12.0 +pylibraft==23.12.0 +pylint==3.2.6 +pynvml==11.4.1 +pyparsing==3.1.1 +pyproject-hooks==1.1.0 +pytablewriter==1.2.0 +pytest-flakefinder==1.1.0 +pytest-rerunfailures==13.0 +pytest-shard==0.1.2 +pytest-xdist==3.5.0 +pytest==8.0.0 +python-dateutil==2.8.2 +python-dotenv==1.0.0 +python-hostlist==1.23.0 +python-levenshtein==0.25.1 +pytorch-lightning==2.4.0 +pytorch-quantization==2.1.2 +pytz==2023.3.post1 +pyyaml==6.0.1 +pyzmq==25.1.2 +raft-dask==23.12.0 +rapidfuzz==3.9.6 +rapids-dask-dependency==23.12.1 +referencing==0.33.0 +regex==2023.12.25 +requests-oauthlib==1.3.1 +requests-toolbelt==1.0.0 +requests==2.32.3 +rhoknp==1.7.0 +rich==13.7.0 +rmm==23.12.0 +rouge-score==0.1.2 +rpds-py==0.17.1 +rsa==4.9 +sacrebleu==2.4.2 +safetensors==0.4.3 +scikit-learn==1.5.1 +scipy==1.12.0 +secretstorage==3.3.3 +send2trash==1.8.2 +sentence-transformers==3.0.1 +sentencepiece==0.1.99 +sentry-sdk==2.12.0 +setproctitle==1.3.3 +setuptools==68.2.2 +shellingham==1.5.4 +six==1.16.0 +smart-open==6.4.0 +smmap==5.0.1 +sniffio==1.3.1 +sortedcontainers==2.4.0 +soundfile==0.12.1 +soupsieve==2.5 +soxr==0.3.7 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 +spacy==3.7.2 +sphinx-glpi-theme==0.6 +sqlalchemy==2.0.32 +sqlitedict==2.1.0 +srsly==2.4.8 +stack-data==0.6.3 +sumeval==0.2.2 +sympy==1.12 +tabledata==1.3.3 +tabulate==0.9.0 +tbb==2021.11.0 +tblib==3.0.0 +tcolorpy==0.1.6 +tenacity==8.5.0 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.9.0 +tensorrt==8.6.3 +terminado==0.18.0 +termplotlib==0.3.9 +text-generation==0.7.0 +thinc==8.2.3 +threadpoolctl==3.2.0 +thriftpy2==0.4.17 +tiktoken==0.7.0 +tinycss2==1.2.1 +tokenizers==0.19.1 +toml==0.10.2 +tomli==2.0.1 +tomlkit==0.13.2 +toolz==0.12.1 +torch-tensorrt==2.3.0a0 +torch==2.3.0a0+ebedce2 +torchdata==0.7.1a0 +torchmetrics==0.10.3 +torchtext==0.17.0a0 +torchvision==0.18.0a0 +tornado==6.4 +tqdm-multiprocess==0.0.11 +tqdm==4.66.5 +traitlets==5.9.0 +transformer-engine==1.3.0+5b90b7f +transformers==4.43.3 +treelite-runtime==3.9.1 +treelite==3.9.1 +triton==2.2.0+e28a256 +trove-classifiers==2024.7.2 +typepy==1.3.2 +typer==0.9.0 +types-dataclasses==0.6.6 +typing-extensions==4.12.2 +typing-inspect==0.9.0 +tzdata==2024.1 +ucx-py==0.35.0 +uff==0.6.9 +ujson==5.8.0 +unbabel-comet==2.2.2 +unidic-lite==1.0.8 +urllib3==1.26.18 +virtualenv==20.26.3 +wandb==0.16.3 +wasabi==1.1.2 +wcwidth==0.2.13 +weasel==0.3.4 +webencodings==0.5.1 +werkzeug==3.0.1 +wheel==0.42.0 +word2number==1.1 +xdoctest==1.0.2 +xgboost==1.7.6 +xmltodict==0.13.0 +xxhash==3.4.1 +yarl==1.9.4 +zict==3.0.0 +zipp==3.17.0 +zstandard==0.23.0 \ No newline at end of file diff --git a/wandb/run-20240824_201431-erlbpc25/files/wandb-metadata.json b/wandb/run-20240824_201431-erlbpc25/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..be52c70db54790b737d3286cd85b1238e5662975 --- /dev/null +++ b/wandb/run-20240824_201431-erlbpc25/files/wandb-metadata.json @@ -0,0 +1,880 @@ +{ + "os": "Linux-5.15.0-91-generic-x86_64-with-glibc2.35", + "python": "3.10.12", + "heartbeatAt": "2024-08-24T11:14:34.753727", + "startedAt": "2024-08-24T11:14:31.856158", + "docker": null, + "cuda": null, + "args": [ + "--seq-length", + "1024", + "--sliding-window-size", + "131072", + "--micro-batch-size", + "16", + "--valid_micro_batch_size", + "1", + "--global-batch-size", + "1280", + "--train-iters", + "23178", + "--tokenizer-type", + "HFPreTrainedTokenizer", + "--tokenizer-model", + "/share/pretrained_lm/Qwen/Qwen2-0.5B", + "--train-data-path", + "1754785366", + "/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document", + "28623823675", + "/project/datas/llm-jp-corpus-v2/ja-cc/level0/data_text_document", + "--valid-data-path", + "1205770", + "/work/llm_recipes/datasets/bin/baseline/llm_jp_corpus_v2_ja_wiki_validation_0/data_text_document", + "--test-data-path", + "1205770", + "/work/llm_recipes/datasets/bin/baseline/llm_jp_corpus_v2_ja_wiki_validation_0/data_text_document", + "--lr", + "3.5e-6", + "--min-lr", + "3.5e-7", + "--lr-decay-style", + "cosine", + "--lr-warmup-iters", + "500", + "--lr-decay-iters", + "23178", + "--weight-decay", + "0.1", + "--grad-clip-norm", + "1.0", + "--optimizer", + "anyprecision", + "--adam-beta1", + "0.9", + "--adam-beta2", + "0.95", + "--adam-eps", + "1e-8", + "--save-interval", + "200", + "--eval-interval", + "200", + "--eval-iters", + "10", + "--bf16", + "--mixed-precision", + "--base-model", + "/share/pretrained_lm/Qwen/Qwen2-0.5B", + "--save", + "/work/llm_recipes/models/yans-baseline-qwen2-0.5B", + "--load", + "/work/llm_recipes/models/yans-baseline-qwen2-0.5B", + "--num-workers", + "4", + "--fsdp-activation-checkpointing", + "--sharding-strategy", + "NO_SHARD", + "--checkpoint-type", + "LOCAL_STATE_DICT", + "--save-n-checkpoints", + "10", + "--upload-all-checkpoints-to-hf", + "--hf-upload-retry-limit", + "2", + "--hf-repo-id", + "koichi12/yans-baseline-qwen2-0.5B", + "--wandb-entity", + "iwakawa-koichi-q5-tohoku-nlp6723", + "--wandb-project", + "yans_experiment", + "--wandb-name", + "yans-baseline-qwen2-0.5B_train_2024-08-24-20:14:16" + ], + "state": "running", + "program": "/project/examples/finetuning.py", + "codePathLocal": "examples/finetuning.py", + "codePath": "examples/finetuning.py", + "git": { + "remote": "https://github.com/cl-tohoku/llm-recipes-failab-m1-yans.git", + "commit": "887a2cc5d104c10264701f95cbbb0a6a116768d6" + }, + "email": null, + "root": "/project", + "host": "gpu-koiwa-00", + "username": "koiwa", + "executable": "/usr/bin/python", + "cpu_count": 144, + "cpu_count_logical": 144, + "cpu_freq": { + "current": 2400.0340000000015, + "min": 0.0, + "max": 0.0 + }, + "cpu_freq_per_core": [ + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + } + ], + "disk": { + "/": { + "total": 0.0625, + "used": 1.1444091796875e-05 + } + }, + "gpu": "NVIDIA A100-SXM4-40GB", + "gpu_count": 8, + "gpu_devices": [ + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + }, + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + }, + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + }, + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + }, + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + }, + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + }, + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + }, + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + } + ], + "memory": { + "total": 453.4449462890625 + } +} diff --git a/wandb/run-20240824_201431-erlbpc25/files/wandb-summary.json b/wandb/run-20240824_201431-erlbpc25/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..e682bae6b5eaeba8295fd0fffdc51474a259249e --- /dev/null +++ b/wandb/run-20240824_201431-erlbpc25/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 5}} \ No newline at end of file diff --git a/wandb/run-20240824_201431-erlbpc25/logs/debug-internal.log b/wandb/run-20240824_201431-erlbpc25/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..c17baa20b5c4d48fd0be042ac9b86f6095fefb9c --- /dev/null +++ b/wandb/run-20240824_201431-erlbpc25/logs/debug-internal.log @@ -0,0 +1,190 @@ +2024-08-24 20:14:31,872 INFO StreamThr :17554 [internal.py:wandb_internal():86] W&B internal server running at pid: 17554, started at: 2024-08-24 20:14:31.871295 +2024-08-24 20:14:31,873 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: status +2024-08-24 20:14:31,877 INFO WriterThread:17554 [datastore.py:open_for_write():87] open: /project/wandb/run-20240824_201431-erlbpc25/run-erlbpc25.wandb +2024-08-24 20:14:31,877 DEBUG SenderThread:17554 [sender.py:send():382] send: header +2024-08-24 20:14:31,893 DEBUG SenderThread:17554 [sender.py:send():382] send: run +2024-08-24 20:14:34,608 INFO SenderThread:17554 [dir_watcher.py:__init__():211] watching files in: /project/wandb/run-20240824_201431-erlbpc25/files +2024-08-24 20:14:34,608 INFO SenderThread:17554 [sender.py:_start_run_threads():1136] run started: erlbpc25 with start time 1724498071.869963 +2024-08-24 20:14:34,613 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: check_version +2024-08-24 20:14:34,613 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: check_version +2024-08-24 20:14:34,683 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: run_start +2024-08-24 20:14:34,690 DEBUG HandlerThread:17554 [system_info.py:__init__():27] System info init +2024-08-24 20:14:34,690 DEBUG HandlerThread:17554 [system_info.py:__init__():42] System info init done +2024-08-24 20:14:34,690 INFO HandlerThread:17554 [system_monitor.py:start():194] Starting system monitor +2024-08-24 20:14:34,691 INFO SystemMonitor:17554 [system_monitor.py:_start():158] Starting system asset monitoring threads +2024-08-24 20:14:34,691 INFO HandlerThread:17554 [system_monitor.py:probe():214] Collecting system info +2024-08-24 20:14:34,691 INFO SystemMonitor:17554 [interfaces.py:start():190] Started cpu monitoring +2024-08-24 20:14:34,691 INFO SystemMonitor:17554 [interfaces.py:start():190] Started disk monitoring +2024-08-24 20:14:34,692 INFO SystemMonitor:17554 [interfaces.py:start():190] Started gpu monitoring +2024-08-24 20:14:34,694 INFO SystemMonitor:17554 [interfaces.py:start():190] Started memory monitoring +2024-08-24 20:14:34,695 INFO SystemMonitor:17554 [interfaces.py:start():190] Started network monitoring +2024-08-24 20:14:34,753 DEBUG HandlerThread:17554 [system_info.py:probe():151] Probing system +2024-08-24 20:14:34,755 DEBUG HandlerThread:17554 [system_info.py:_probe_git():136] Probing git +2024-08-24 20:14:34,769 DEBUG HandlerThread:17554 [system_info.py:_probe_git():144] Probing git done +2024-08-24 20:14:34,769 DEBUG HandlerThread:17554 [system_info.py:probe():199] Probing system done +2024-08-24 20:14:34,769 DEBUG HandlerThread:17554 [system_monitor.py:probe():223] {'os': 'Linux-5.15.0-91-generic-x86_64-with-glibc2.35', 'python': '3.10.12', 'heartbeatAt': '2024-08-24T11:14:34.753727', 'startedAt': '2024-08-24T11:14:31.856158', 'docker': None, 'cuda': None, 'args': ('--seq-length', '1024', '--sliding-window-size', '131072', '--micro-batch-size', '16', '--valid_micro_batch_size', '1', '--global-batch-size', '1280', '--train-iters', '23178', '--tokenizer-type', 'HFPreTrainedTokenizer', '--tokenizer-model', '/share/pretrained_lm/Qwen/Qwen2-0.5B', '--train-data-path', '1754785366', '/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document', '28623823675', '/project/datas/llm-jp-corpus-v2/ja-cc/level0/data_text_document', '--valid-data-path', '1205770', '/work/llm_recipes/datasets/bin/baseline/llm_jp_corpus_v2_ja_wiki_validation_0/data_text_document', '--test-data-path', '1205770', '/work/llm_recipes/datasets/bin/baseline/llm_jp_corpus_v2_ja_wiki_validation_0/data_text_document', '--lr', '3.5e-6', '--min-lr', '3.5e-7', '--lr-decay-style', 'cosine', '--lr-warmup-iters', '500', '--lr-decay-iters', '23178', '--weight-decay', '0.1', '--grad-clip-norm', '1.0', '--optimizer', 'anyprecision', '--adam-beta1', '0.9', '--adam-beta2', '0.95', '--adam-eps', '1e-8', '--save-interval', '200', '--eval-interval', '200', '--eval-iters', '10', '--bf16', '--mixed-precision', '--base-model', '/share/pretrained_lm/Qwen/Qwen2-0.5B', '--save', '/work/llm_recipes/models/yans-baseline-qwen2-0.5B', '--load', '/work/llm_recipes/models/yans-baseline-qwen2-0.5B', '--num-workers', '4', '--fsdp-activation-checkpointing', '--sharding-strategy', 'NO_SHARD', '--checkpoint-type', 'LOCAL_STATE_DICT', '--save-n-checkpoints', '10', '--upload-all-checkpoints-to-hf', '--hf-upload-retry-limit', '2', '--hf-repo-id', 'koichi12/yans-baseline-qwen2-0.5B', '--wandb-entity', 'iwakawa-koichi-q5-tohoku-nlp6723', '--wandb-project', 'yans_experiment', '--wandb-name', 'yans-baseline-qwen2-0.5B_train_2024-08-24-20:14:16'), 'state': 'running', 'program': '/project/examples/finetuning.py', 'codePathLocal': 'examples/finetuning.py', 'codePath': 'examples/finetuning.py', 'git': {'remote': 'https://github.com/cl-tohoku/llm-recipes-failab-m1-yans.git', 'commit': '887a2cc5d104c10264701f95cbbb0a6a116768d6'}, 'email': None, 'root': '/project', 'host': 'gpu-koiwa-00', 'username': 'koiwa', 'executable': '/usr/bin/python', 'cpu_count': 144, 'cpu_count_logical': 144, 'cpu_freq': {'current': 2400.0340000000015, 'min': 0.0, 'max': 0.0}, 'cpu_freq_per_core': [{'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}], 'disk': {'/': {'total': 0.0625, 'used': 1.1444091796875e-05}}, 'gpu': 'NVIDIA A100-SXM4-40GB', 'gpu_count': 8, 'gpu_devices': [{'name': 'NVIDIA A100-SXM4-40GB', 'memory_total': 42949672960}, {'name': 'NVIDIA A100-SXM4-40GB', 'memory_total': 42949672960}, {'name': 'NVIDIA A100-SXM4-40GB', 'memory_total': 42949672960}, {'name': 'NVIDIA A100-SXM4-40GB', 'memory_total': 42949672960}, {'name': 'NVIDIA A100-SXM4-40GB', 'memory_total': 42949672960}, {'name': 'NVIDIA A100-SXM4-40GB', 'memory_total': 42949672960}, {'name': 'NVIDIA A100-SXM4-40GB', 'memory_total': 42949672960}, {'name': 'NVIDIA A100-SXM4-40GB', 'memory_total': 42949672960}], 'memory': {'total': 453.4449462890625}} +2024-08-24 20:14:34,769 INFO HandlerThread:17554 [system_monitor.py:probe():224] Finished collecting system info +2024-08-24 20:14:34,769 INFO HandlerThread:17554 [system_monitor.py:probe():227] Publishing system info +2024-08-24 20:14:34,771 INFO HandlerThread:17554 [system_monitor.py:probe():229] Finished publishing system info +2024-08-24 20:14:34,777 DEBUG SenderThread:17554 [sender.py:send():382] send: files +2024-08-24 20:14:34,777 INFO SenderThread:17554 [sender.py:_save_file():1403] saving file wandb-metadata.json with policy now +2024-08-24 20:14:34,789 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: python_packages +2024-08-24 20:14:34,789 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: stop_status +2024-08-24 20:14:34,789 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: python_packages +2024-08-24 20:14:34,790 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-24 20:14:34,791 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: stop_status +2024-08-24 20:14:34,965 DEBUG SenderThread:17554 [sender.py:send():382] send: telemetry +2024-08-24 20:14:35,443 INFO wandb-upload_0:17554 [upload_job.py:push():131] Uploaded file /tmp/tmpsf0_afskwandb/zwhg2kol-wandb-metadata.json +2024-08-24 20:14:35,610 INFO Thread-12 :17554 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240824_201431-erlbpc25/files/requirements.txt +2024-08-24 20:14:35,610 INFO Thread-12 :17554 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240824_201431-erlbpc25/files/wandb-metadata.json +2024-08-24 20:14:35,610 INFO Thread-12 :17554 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240824_201431-erlbpc25/files/output.log +2024-08-24 20:14:36,972 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: status_report +2024-08-24 20:14:37,610 INFO Thread-12 :17554 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240824_201431-erlbpc25/files/output.log +2024-08-24 20:14:39,612 INFO Thread-12 :17554 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240824_201431-erlbpc25/files/output.log +2024-08-24 20:14:40,471 DEBUG SenderThread:17554 [sender.py:send():382] send: exit +2024-08-24 20:14:40,471 INFO SenderThread:17554 [sender.py:send_exit():589] handling exit code: 1 +2024-08-24 20:14:40,471 INFO SenderThread:17554 [sender.py:send_exit():591] handling runtime: 5 +2024-08-24 20:14:40,472 INFO SenderThread:17554 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-24 20:14:40,473 INFO SenderThread:17554 [sender.py:send_exit():597] send defer +2024-08-24 20:14:40,473 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: defer +2024-08-24 20:14:40,473 INFO HandlerThread:17554 [handler.py:handle_request_defer():172] handle defer: 0 +2024-08-24 20:14:40,473 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: defer +2024-08-24 20:14:40,473 INFO SenderThread:17554 [sender.py:send_request_defer():613] handle sender defer: 0 +2024-08-24 20:14:40,473 INFO SenderThread:17554 [sender.py:transition_state():617] send defer: 1 +2024-08-24 20:14:40,473 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: defer +2024-08-24 20:14:40,473 INFO HandlerThread:17554 [handler.py:handle_request_defer():172] handle defer: 1 +2024-08-24 20:14:40,473 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: defer +2024-08-24 20:14:40,473 INFO SenderThread:17554 [sender.py:send_request_defer():613] handle sender defer: 1 +2024-08-24 20:14:40,473 INFO SenderThread:17554 [sender.py:transition_state():617] send defer: 2 +2024-08-24 20:14:40,474 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: defer +2024-08-24 20:14:40,474 INFO HandlerThread:17554 [handler.py:handle_request_defer():172] handle defer: 2 +2024-08-24 20:14:40,474 INFO HandlerThread:17554 [system_monitor.py:finish():203] Stopping system monitor +2024-08-24 20:14:40,474 DEBUG SystemMonitor:17554 [system_monitor.py:_start():172] Starting system metrics aggregation loop +2024-08-24 20:14:40,474 INFO HandlerThread:17554 [interfaces.py:finish():202] Joined cpu monitor +2024-08-24 20:14:40,474 DEBUG SystemMonitor:17554 [system_monitor.py:_start():179] Finished system metrics aggregation loop +2024-08-24 20:14:40,475 INFO HandlerThread:17554 [interfaces.py:finish():202] Joined disk monitor +2024-08-24 20:14:40,475 DEBUG SystemMonitor:17554 [system_monitor.py:_start():183] Publishing last batch of metrics +2024-08-24 20:14:40,613 INFO Thread-12 :17554 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240824_201431-erlbpc25/files/wandb-summary.json +2024-08-24 20:14:41,611 INFO HandlerThread:17554 [interfaces.py:finish():202] Joined gpu monitor +2024-08-24 20:14:41,611 INFO HandlerThread:17554 [interfaces.py:finish():202] Joined memory monitor +2024-08-24 20:14:41,611 INFO HandlerThread:17554 [interfaces.py:finish():202] Joined network monitor +2024-08-24 20:14:41,611 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-24 20:14:41,612 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: defer +2024-08-24 20:14:41,613 INFO SenderThread:17554 [sender.py:send_request_defer():613] handle sender defer: 2 +2024-08-24 20:14:41,613 INFO SenderThread:17554 [sender.py:transition_state():617] send defer: 3 +2024-08-24 20:14:41,613 DEBUG SenderThread:17554 [sender.py:send():382] send: stats +2024-08-24 20:14:41,613 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: defer +2024-08-24 20:14:41,613 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: poll_exit +2024-08-24 20:14:41,614 INFO HandlerThread:17554 [handler.py:handle_request_defer():172] handle defer: 3 +2024-08-24 20:14:41,614 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: defer +2024-08-24 20:14:41,614 INFO SenderThread:17554 [sender.py:send_request_defer():613] handle sender defer: 3 +2024-08-24 20:14:41,614 INFO SenderThread:17554 [sender.py:transition_state():617] send defer: 4 +2024-08-24 20:14:41,614 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: defer +2024-08-24 20:14:41,614 INFO HandlerThread:17554 [handler.py:handle_request_defer():172] handle defer: 4 +2024-08-24 20:14:41,615 INFO Thread-12 :17554 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240824_201431-erlbpc25/files/output.log +2024-08-24 20:14:41,615 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: defer +2024-08-24 20:14:41,615 INFO SenderThread:17554 [sender.py:send_request_defer():613] handle sender defer: 4 +2024-08-24 20:14:41,615 INFO SenderThread:17554 [sender.py:transition_state():617] send defer: 5 +2024-08-24 20:14:41,615 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: defer +2024-08-24 20:14:41,615 INFO HandlerThread:17554 [handler.py:handle_request_defer():172] handle defer: 5 +2024-08-24 20:14:41,615 DEBUG SenderThread:17554 [sender.py:send():382] send: summary +2024-08-24 20:14:41,616 INFO SenderThread:17554 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-24 20:14:41,617 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: defer +2024-08-24 20:14:41,617 INFO SenderThread:17554 [sender.py:send_request_defer():613] handle sender defer: 5 +2024-08-24 20:14:41,617 INFO SenderThread:17554 [sender.py:transition_state():617] send defer: 6 +2024-08-24 20:14:41,617 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: defer +2024-08-24 20:14:41,617 INFO HandlerThread:17554 [handler.py:handle_request_defer():172] handle defer: 6 +2024-08-24 20:14:41,617 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: defer +2024-08-24 20:14:41,617 INFO SenderThread:17554 [sender.py:send_request_defer():613] handle sender defer: 6 +2024-08-24 20:14:41,620 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: status_report +2024-08-24 20:14:41,817 INFO SenderThread:17554 [sender.py:transition_state():617] send defer: 7 +2024-08-24 20:14:41,817 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: defer +2024-08-24 20:14:41,817 INFO HandlerThread:17554 [handler.py:handle_request_defer():172] handle defer: 7 +2024-08-24 20:14:41,817 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: defer +2024-08-24 20:14:41,817 INFO SenderThread:17554 [sender.py:send_request_defer():613] handle sender defer: 7 +2024-08-24 20:14:42,471 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-24 20:14:42,616 INFO Thread-12 :17554 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240824_201431-erlbpc25/files/wandb-summary.json +2024-08-24 20:14:42,616 INFO Thread-12 :17554 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240824_201431-erlbpc25/files/config.yaml +2024-08-24 20:14:45,000 INFO SenderThread:17554 [sender.py:transition_state():617] send defer: 8 +2024-08-24 20:14:45,000 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: poll_exit +2024-08-24 20:14:45,000 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: defer +2024-08-24 20:14:45,000 INFO HandlerThread:17554 [handler.py:handle_request_defer():172] handle defer: 8 +2024-08-24 20:14:45,000 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: defer +2024-08-24 20:14:45,000 INFO SenderThread:17554 [sender.py:send_request_defer():613] handle sender defer: 8 +2024-08-24 20:14:45,000 INFO SenderThread:17554 [job_builder.py:build():296] Attempting to build job artifact +2024-08-24 20:14:45,001 INFO SenderThread:17554 [job_builder.py:_get_source_type():426] is repo sourced job +2024-08-24 20:14:45,016 INFO SenderThread:17554 [job_builder.py:build():402] adding wandb-job metadata file +2024-08-24 20:14:45,025 INFO SenderThread:17554 [sender.py:transition_state():617] send defer: 9 +2024-08-24 20:14:45,026 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: defer +2024-08-24 20:14:45,026 DEBUG SenderThread:17554 [sender.py:send():382] send: artifact +2024-08-24 20:14:45,026 INFO HandlerThread:17554 [handler.py:handle_request_defer():172] handle defer: 9 +2024-08-24 20:14:45,472 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-24 20:14:45,618 INFO Thread-12 :17554 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240824_201431-erlbpc25/files/output.log +2024-08-24 20:14:45,884 INFO SenderThread:17554 [sender.py:send_artifact():1494] sent artifact job-https___github.com_cl-tohoku_llm-recipes-failab-m1-yans.git_examples_finetuning.py - {'id': 'QXJ0aWZhY3Q6MTE2MzU1Mzg0Mw==', 'state': 'COMMITTED', 'artifactSequence': {'id': 'QXJ0aWZhY3RDb2xsZWN0aW9uOjQxNjgwMzg3NA==', 'latestArtifact': {'id': 'QXJ0aWZhY3Q6MTE2MzU1Mzg0Mw==', 'versionIndex': 0}}} +2024-08-24 20:14:45,884 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: defer +2024-08-24 20:14:45,884 INFO SenderThread:17554 [sender.py:send_request_defer():613] handle sender defer: 9 +2024-08-24 20:14:45,884 INFO SenderThread:17554 [dir_watcher.py:finish():358] shutting down directory watcher +2024-08-24 20:14:46,619 INFO SenderThread:17554 [dir_watcher.py:finish():388] scan: /project/wandb/run-20240824_201431-erlbpc25/files +2024-08-24 20:14:46,619 INFO SenderThread:17554 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240824_201431-erlbpc25/files/requirements.txt requirements.txt +2024-08-24 20:14:46,619 INFO SenderThread:17554 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240824_201431-erlbpc25/files/config.yaml config.yaml +2024-08-24 20:14:46,621 INFO SenderThread:17554 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240824_201431-erlbpc25/files/wandb-metadata.json wandb-metadata.json +2024-08-24 20:14:46,621 INFO SenderThread:17554 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240824_201431-erlbpc25/files/wandb-summary.json wandb-summary.json +2024-08-24 20:14:46,622 INFO SenderThread:17554 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240824_201431-erlbpc25/files/output.log output.log +2024-08-24 20:14:46,624 INFO SenderThread:17554 [sender.py:transition_state():617] send defer: 10 +2024-08-24 20:14:46,624 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: poll_exit +2024-08-24 20:14:46,624 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: defer +2024-08-24 20:14:46,624 INFO HandlerThread:17554 [handler.py:handle_request_defer():172] handle defer: 10 +2024-08-24 20:14:46,626 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: defer +2024-08-24 20:14:46,626 INFO SenderThread:17554 [sender.py:send_request_defer():613] handle sender defer: 10 +2024-08-24 20:14:46,626 INFO SenderThread:17554 [file_pusher.py:finish():172] shutting down file pusher +2024-08-24 20:14:47,027 INFO wandb-upload_0:17554 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240824_201431-erlbpc25/files/requirements.txt +2024-08-24 20:14:47,072 INFO wandb-upload_1:17554 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240824_201431-erlbpc25/files/config.yaml +2024-08-24 20:14:47,084 INFO wandb-upload_3:17554 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240824_201431-erlbpc25/files/output.log +2024-08-24 20:14:47,107 INFO wandb-upload_2:17554 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240824_201431-erlbpc25/files/wandb-summary.json +2024-08-24 20:14:47,307 INFO Thread-11 (_thread_body):17554 [sender.py:transition_state():617] send defer: 11 +2024-08-24 20:14:47,307 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: defer +2024-08-24 20:14:47,307 INFO HandlerThread:17554 [handler.py:handle_request_defer():172] handle defer: 11 +2024-08-24 20:14:47,308 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: defer +2024-08-24 20:14:47,308 INFO SenderThread:17554 [sender.py:send_request_defer():613] handle sender defer: 11 +2024-08-24 20:14:47,308 INFO SenderThread:17554 [file_pusher.py:join():178] waiting for file pusher +2024-08-24 20:14:47,308 INFO SenderThread:17554 [sender.py:transition_state():617] send defer: 12 +2024-08-24 20:14:47,308 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: defer +2024-08-24 20:14:47,308 INFO HandlerThread:17554 [handler.py:handle_request_defer():172] handle defer: 12 +2024-08-24 20:14:47,308 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: defer +2024-08-24 20:14:47,308 INFO SenderThread:17554 [sender.py:send_request_defer():613] handle sender defer: 12 +2024-08-24 20:14:47,308 INFO SenderThread:17554 [file_stream.py:finish():595] file stream finish called +2024-08-24 20:14:47,472 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-24 20:14:47,478 INFO SenderThread:17554 [file_stream.py:finish():599] file stream finish is done +2024-08-24 20:14:47,478 INFO SenderThread:17554 [sender.py:transition_state():617] send defer: 13 +2024-08-24 20:14:47,478 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: poll_exit +2024-08-24 20:14:47,478 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: defer +2024-08-24 20:14:47,478 INFO HandlerThread:17554 [handler.py:handle_request_defer():172] handle defer: 13 +2024-08-24 20:14:47,479 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: defer +2024-08-24 20:14:47,479 INFO SenderThread:17554 [sender.py:send_request_defer():613] handle sender defer: 13 +2024-08-24 20:14:47,479 INFO SenderThread:17554 [sender.py:transition_state():617] send defer: 14 +2024-08-24 20:14:47,479 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: defer +2024-08-24 20:14:47,479 DEBUG SenderThread:17554 [sender.py:send():382] send: final +2024-08-24 20:14:47,479 INFO HandlerThread:17554 [handler.py:handle_request_defer():172] handle defer: 14 +2024-08-24 20:14:47,479 DEBUG SenderThread:17554 [sender.py:send():382] send: footer +2024-08-24 20:14:47,480 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: defer +2024-08-24 20:14:47,480 INFO SenderThread:17554 [sender.py:send_request_defer():613] handle sender defer: 14 +2024-08-24 20:14:47,480 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-24 20:14:47,480 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-24 20:14:47,480 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: poll_exit +2024-08-24 20:14:47,481 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: poll_exit +2024-08-24 20:14:47,481 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: server_info +2024-08-24 20:14:47,481 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: server_info +2024-08-24 20:14:47,482 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: get_summary +2024-08-24 20:14:47,483 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: sampled_history +2024-08-24 20:14:47,483 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-24 20:14:47,483 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: job_info +2024-08-24 20:14:47,638 DEBUG SenderThread:17554 [sender.py:send_request():409] send_request: job_info +2024-08-24 20:14:47,638 INFO MainThread:17554 [wandb_run.py:_footer_history_summary_info():3866] rendering history +2024-08-24 20:14:47,638 INFO MainThread:17554 [wandb_run.py:_footer_history_summary_info():3898] rendering summary +2024-08-24 20:14:47,638 INFO MainThread:17554 [wandb_run.py:_footer_sync_info():3825] logging synced files +2024-08-24 20:14:47,638 DEBUG HandlerThread:17554 [handler.py:handle_request():146] handle_request: shutdown +2024-08-24 20:14:47,638 INFO HandlerThread:17554 [handler.py:finish():869] shutting down handler +2024-08-24 20:14:48,483 INFO WriterThread:17554 [datastore.py:close():296] close: /project/wandb/run-20240824_201431-erlbpc25/run-erlbpc25.wandb +2024-08-24 20:14:48,638 INFO SenderThread:17554 [sender.py:finish():1572] shutting down sender +2024-08-24 20:14:48,638 INFO SenderThread:17554 [file_pusher.py:finish():172] shutting down file pusher +2024-08-24 20:14:48,638 INFO SenderThread:17554 [file_pusher.py:join():178] waiting for file pusher diff --git a/wandb/run-20240824_201431-erlbpc25/logs/debug.log b/wandb/run-20240824_201431-erlbpc25/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..c4176ee124c7135f6d658451457d5879288b1ec1 --- /dev/null +++ b/wandb/run-20240824_201431-erlbpc25/logs/debug.log @@ -0,0 +1,28 @@ +2024-08-24 20:14:31,862 INFO MainThread:16926 [wandb_setup.py:_flush():76] Current SDK version is 0.16.3 +2024-08-24 20:14:31,863 INFO MainThread:16926 [wandb_setup.py:_flush():76] Configure stats pid to 16926 +2024-08-24 20:14:31,863 INFO MainThread:16926 [wandb_setup.py:_flush():76] Loading settings from /singularity_home/.config/wandb/settings +2024-08-24 20:14:31,863 INFO MainThread:16926 [wandb_setup.py:_flush():76] Loading settings from /project/wandb/settings +2024-08-24 20:14:31,863 INFO MainThread:16926 [wandb_setup.py:_flush():76] Loading settings from environment variables: {'api_key': '***REDACTED***', 'run_notes': 'Train baseline'} +2024-08-24 20:14:31,863 INFO MainThread:16926 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False} +2024-08-24 20:14:31,863 INFO MainThread:16926 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'examples/finetuning.py', 'program_abspath': '/project/examples/finetuning.py', 'program': '/project/examples/finetuning.py'} +2024-08-24 20:14:31,863 INFO MainThread:16926 [wandb_init.py:_log_setup():526] Logging user logs to /project/wandb/run-20240824_201431-erlbpc25/logs/debug.log +2024-08-24 20:14:31,863 INFO MainThread:16926 [wandb_init.py:_log_setup():527] Logging internal logs to /project/wandb/run-20240824_201431-erlbpc25/logs/debug-internal.log +2024-08-24 20:14:31,863 INFO MainThread:16926 [wandb_init.py:init():566] calling init triggers +2024-08-24 20:14:31,863 INFO MainThread:16926 [wandb_init.py:init():573] wandb.init called with sweep_config: {} +config: {'sharding_strategy': 'NO_SHARD', 'checkpoint_type': 'LOCAL_STATE_DICT', 'fsdp_activation_checkpointing': True, 'fsdp_cpu_offload': False, 'low_cpu_fsdp': False, 'no_meta_device': False, 'data_path': None, 'split': '969, 30, 1', 'train_data_path': ['1754785366', '/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document', '28623823675', '/project/datas/llm-jp-corpus-v2/ja-cc/level0/data_text_document'], 'valid_data_path': ['1205770', '/work/llm_recipes/datasets/bin/baseline/llm_jp_corpus_v2_ja_wiki_validation_0/data_text_document'], 'test_data_path': ['1205770', '/work/llm_recipes/datasets/bin/baseline/llm_jp_corpus_v2_ja_wiki_validation_0/data_text_document'], 'data_cache_path': None, 'vocab_size': None, 'vocab_file': None, 'merge_file': None, 'seq_length': 1024, 'num_workers': 4, 'tokenizer_type': 'HFPreTrainedTokenizer', 'tokenizer_model': '/share/pretrained_lm/Qwen/Qwen2-0.5B', 'reset_position_ids': False, 'reset_attention_mask': False, 'eod_mask_loss': False, 'retro_return_doc_ids': False, 'short_seq_prob': 0.1, 'vocab_extra_ids': 0, 'seed': 1234, 'use_mpi': False, 'wandb_entity': 'iwakawa-koichi-q5-tohoku-nlp6723', 'wandb_name': 'yans-baseline-qwen2-0.5B_train_2024-08-24-20:14:16', 'wandb_project': 'yans_experiment', 'quantization': False, 'use_freeze_layers': False, 'freeze_layers': None, 'bf16': True, 'fp16': False, 'mixed_precision': True, 'param_dtype': None, 'load': '/work/llm_recipes/models/yans-baseline-qwen2-0.5B', 'save': '/work/llm_recipes/models/yans-baseline-qwen2-0.5B', 'base_model': '/share/pretrained_lm/Qwen/Qwen2-0.5B', 'use_better_transformer': False, 'grad_clip_norm': 1.0, 'eval_interval': 200, 'save_interval': 200, 'eval_iters': 10, 'optimizer': 'anyprecision', 'lr': 3.5e-06, 'lr_decay_style': 'cosine', 'lr_decay_iters': 23178, 'lr_warmup_iters': 500, 'min_lr': 3.5e-07, 'train_iters': 23178, 'train_samples': None, 'global_batch_size': 1280, 'micro_batch_size': 16, 'make_vocab_size_divisible_by': 128, 'sliding_window_size': 131072, 'skip_batch': None, 'no_save_optimizer_state': False, 'continual_pretraining': False, 'instruction_tuning': False, 'direct_preference_optimization': False, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'weight_decay': 0.1, 'adam_beta1': 0.9, 'adam_beta2': 0.95, 'adam_eps': 1e-08, 'hf_transformer_model_dir': None, 'instruction_train_data_path': None, 'instruction_valid_data_path': None, 'epoch': None, 'instruction_dataset_size': None, 'save_sampler_state': False, 'label_smoothing': 0.0, 'save_n_checkpoints': 10, 'hf_repo_id': 'koichi12/yans-baseline-qwen2-0.5B', 'create_public_hf_repo': False, 'upload_all_checkpoints_to_hf': True, 'hf_upload_retry_limit': 2, 'exit_duration_in_mins': None, 'source_key': None, 'target_key': None, 'attn_implementation': 'flash_attention_2', 'efficient_instruction_tuning': False, 'remove_padding_masking': False, 'save_start_iter': None, 'valid_micro_batch_size': 1, 'rank': 0, 'world_size': 8, 'padded_vocab_size': 151680, 'gradient_accumulation_steps': 10} +2024-08-24 20:14:31,863 INFO MainThread:16926 [wandb_init.py:init():616] starting backend +2024-08-24 20:14:31,863 INFO MainThread:16926 [wandb_init.py:init():620] setting up manager +2024-08-24 20:14:31,868 INFO MainThread:16926 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-08-24 20:14:31,869 INFO MainThread:16926 [wandb_init.py:init():628] backend started and connected +2024-08-24 20:14:31,876 INFO MainThread:16926 [wandb_init.py:init():720] updated telemetry +2024-08-24 20:14:31,888 INFO MainThread:16926 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout +2024-08-24 20:14:34,613 INFO MainThread:16926 [wandb_run.py:_on_init():2262] communicating current version +2024-08-24 20:14:34,634 INFO MainThread:16926 [wandb_run.py:_on_init():2271] got version response upgrade_message: "wandb version 0.17.7 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2024-08-24 20:14:34,635 INFO MainThread:16926 [wandb_init.py:init():804] starting run threads in backend +2024-08-24 20:14:34,787 INFO MainThread:16926 [wandb_run.py:_console_start():2241] atexit reg +2024-08-24 20:14:34,787 INFO MainThread:16926 [wandb_run.py:_redirect():2096] redirect: wrap_raw +2024-08-24 20:14:34,787 INFO MainThread:16926 [wandb_run.py:_redirect():2161] Wrapping output streams. +2024-08-24 20:14:34,788 INFO MainThread:16926 [wandb_run.py:_redirect():2186] Redirects installed. +2024-08-24 20:14:34,789 INFO MainThread:16926 [wandb_init.py:init():847] run started, returning control to user process +2024-08-24 20:14:48,639 WARNING MsgRouterThr:16926 [router.py:message_loop():77] message_loop has been closed diff --git a/wandb/run-20240824_201431-erlbpc25/run-erlbpc25.wandb b/wandb/run-20240824_201431-erlbpc25/run-erlbpc25.wandb new file mode 100644 index 0000000000000000000000000000000000000000..d0a0ba1c4b8dc0fe1752c16881d51dc72759e882 Binary files /dev/null and b/wandb/run-20240824_201431-erlbpc25/run-erlbpc25.wandb differ diff --git a/wandb/run-20240829_195743-wevlcym0/files/config.yaml b/wandb/run-20240829_195743-wevlcym0/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2c4811755c5fc51ba174834fbb4c2eebcfbff53a --- /dev/null +++ b/wandb/run-20240829_195743-wevlcym0/files/config.yaml @@ -0,0 +1,342 @@ +wandb_version: 1 + +sharding_strategy: + desc: null + value: FULL_SHARD +checkpoint_type: + desc: null + value: LOCAL_STATE_DICT +fsdp_activation_checkpointing: + desc: null + value: true +fsdp_cpu_offload: + desc: null + value: false +low_cpu_fsdp: + desc: null + value: false +no_meta_device: + desc: null + value: false +data_path: + desc: null + value: null +split: + desc: null + value: 969, 30, 1 +train_data_path: + desc: null + value: + - '1754785366' + - /project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document + - '28623823675' + - /project/datas/llm-jp-corpus-v2/ja-cc/level0/data_text_document +valid_data_path: + desc: null + value: + - '1205770' + - /work/llm_recipes/datasets/bin/baseline/llm_jp_corpus_v2_ja_wiki_validation_0/data_text_document +test_data_path: + desc: null + value: + - '1205770' + - /work/llm_recipes/datasets/bin/baseline/llm_jp_corpus_v2_ja_wiki_validation_0/data_text_document +data_cache_path: + desc: null + value: null +vocab_size: + desc: null + value: null +vocab_file: + desc: null + value: null +merge_file: + desc: null + value: null +seq_length: + desc: null + value: 1024 +num_workers: + desc: null + value: 4 +tokenizer_type: + desc: null + value: HFPreTrainedTokenizer +tokenizer_model: + desc: null + value: /share/pretrained_lm/Qwen/Qwen2-1.5B +reset_position_ids: + desc: null + value: false +reset_attention_mask: + desc: null + value: false +eod_mask_loss: + desc: null + value: false +retro_return_doc_ids: + desc: null + value: false +short_seq_prob: + desc: null + value: 0.1 +vocab_extra_ids: + desc: null + value: 0 +seed: + desc: null + value: 1234 +use_mpi: + desc: null + value: false +wandb_entity: + desc: null + value: iwakawa-koichi-q5-tohoku-nlp6723 +wandb_name: + desc: null + value: yans-baseline-qwen2-1.5B-3.5e-5_train_2024-08-29-19:57:17 +wandb_project: + desc: null + value: yans_experiment +quantization: + desc: null + value: false +use_freeze_layers: + desc: null + value: false +freeze_layers: + desc: null + value: null +bf16: + desc: null + value: true +fp16: + desc: null + value: false +mixed_precision: + desc: null + value: true +param_dtype: + desc: null + value: null +load: + desc: null + value: /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5 +save: + desc: null + value: /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5 +base_model: + desc: null + value: /share/pretrained_lm/Qwen/Qwen2-1.5B +use_better_transformer: + desc: null + value: false +grad_clip_norm: + desc: null + value: 1.0 +eval_interval: + desc: null + value: 200 +save_interval: + desc: null + value: 200 +eval_iters: + desc: null + value: 10 +optimizer: + desc: null + value: anyprecision +lr: + desc: null + value: 3.5e-05 +lr_decay_style: + desc: null + value: cosine +lr_decay_iters: + desc: null + value: 23178 +lr_warmup_iters: + desc: null + value: 500 +min_lr: + desc: null + value: 3.5e-06 +train_iters: + desc: null + value: 23178 +train_samples: + desc: null + value: null +global_batch_size: + desc: null + value: 1280 +micro_batch_size: + desc: null + value: 16 +make_vocab_size_divisible_by: + desc: null + value: 128 +sliding_window_size: + desc: null + value: 131072 +skip_batch: + desc: null + value: null +no_save_optimizer_state: + desc: null + value: false +continual_pretraining: + desc: null + value: false +instruction_tuning: + desc: null + value: false +direct_preference_optimization: + desc: null + value: false +attention_dropout: + desc: null + value: 0.1 +hidden_dropout: + desc: null + value: 0.1 +weight_decay: + desc: null + value: 0.1 +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.95 +adam_eps: + desc: null + value: 1.0e-08 +hf_transformer_model_dir: + desc: null + value: null +instruction_train_data_path: + desc: null + value: null +instruction_valid_data_path: + desc: null + value: null +epoch: + desc: null + value: null +instruction_dataset_size: + desc: null + value: null +save_sampler_state: + desc: null + value: false +label_smoothing: + desc: null + value: 0.0 +save_n_checkpoints: + desc: null + value: 10 +hf_repo_id: + desc: null + value: koichi12/yans-baseline-qwen2-1.5B-3.5e-5 +create_public_hf_repo: + desc: null + value: false +upload_all_checkpoints_to_hf: + desc: null + value: true +hf_upload_retry_limit: + desc: null + value: 2 +exit_duration_in_mins: + desc: null + value: null +source_key: + desc: null + value: null +target_key: + desc: null + value: null +attn_implementation: + desc: null + value: flash_attention_2 +efficient_instruction_tuning: + desc: null + value: false +remove_padding_masking: + desc: null + value: false +save_start_iter: + desc: null + value: null +valid_micro_batch_size: + desc: null + value: 1 +rank: + desc: null + value: 0 +world_size: + desc: null + value: 8 +padded_vocab_size: + desc: null + value: 151680 +gradient_accumulation_steps: + desc: null + value: 10 +_wandb: + desc: null + value: + python_version: 3.10.12 + cli_version: 0.16.3 + framework: huggingface + huggingface_version: 4.43.3 + is_jupyter_run: false + is_kaggle_kernel: false + start_time: 1724929063.797075 + t: + 1: + - 1 + - 11 + - 49 + - 55 + - 71 + - 105 + 2: + - 1 + - 11 + - 49 + - 55 + - 71 + - 105 + 3: + - 13 + - 16 + - 23 + 4: 3.10.12 + 5: 0.16.3 + 6: 4.43.3 + 8: + - 5 + 13: linux-x86_64 +model_architecture: + desc: null + value: Qwen2ForCausalLM +activation_function: + desc: null + value: silu +hidden_size: + desc: null + value: 1536 +model_type: + desc: null + value: qwen2 +max_position_embeddings: + desc: null + value: 1024 +num_attention_heads: + desc: null + value: 12 +num_hidden_layers: + desc: null + value: 28 diff --git a/wandb/run-20240829_195743-wevlcym0/files/output.log b/wandb/run-20240829_195743-wevlcym0/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..4af59e6d2cbe4f36ede2850294bf89ed3e0588c9 --- /dev/null +++ b/wandb/run-20240829_195743-wevlcym0/files/output.log @@ -0,0 +1,23941 @@ +Created Hugging Face repository with ID koichi12/yans-baseline-qwen2-1.5B-3.5e-5. +Clearing GPU cache for all ranks +--> Running with torch torch_distributed debug set to detail +Loading model state dict from /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0013800/model.pt +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +Loaded model state dict from /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0013800/model.pt +--> Model /share/pretrained_lm/Qwen/Qwen2-1.5B +--> /share/pretrained_lm/Qwen/Qwen2-1.5B has 1543.714304 Million params +BFloat16 enabled for mixed precision - using bfSixteen policy +--> applying fsdp activation checkpointing... + > datasets target sizes (minimum size): + train: 29667840 + validation: 1484800 + test: 12800 +> building train, validation, and test datasets for GPT ... +Let split = None +> finished creating GPT datasets ... +Loading optimizer state dict from /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0013800/optimizer.pt +Unable to save the indexes because path_to_cache is None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Loaded optimizer state dict from /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0013800/optimizer.pt +model info: FullyShardedDataParallel( + (_fsdp_wrapped_module): Qwen2ForCausalLM( + (model): Qwen2Model( + (embed_tokens): Embedding(151936, 1536) + (layers): ModuleList( + (0-27): 28 x FullyShardedDataParallel( + (_fsdp_wrapped_module): CheckpointWrapper( + (_checkpoint_wrapped_module): Qwen2DecoderLayer( + (self_attn): Qwen2FlashAttention2( + (q_proj): Linear(in_features=1536, out_features=1536, bias=True) + (k_proj): Linear(in_features=1536, out_features=256, bias=True) + (v_proj): Linear(in_features=1536, out_features=256, bias=True) + (o_proj): Linear(in_features=1536, out_features=1536, bias=False) + (rotary_emb): Qwen2RotaryEmbedding() + ) + (mlp): Qwen2MLP( + (gate_proj): Linear(in_features=1536, out_features=8960, bias=False) + (up_proj): Linear(in_features=1536, out_features=8960, bias=False) + (down_proj): Linear(in_features=8960, out_features=1536, bias=False) + (act_fn): SiLU() + ) + (input_layernorm): Qwen2RMSNorm() + (post_attention_layernorm): Qwen2RMSNorm() + ) + ) + ) + ) + (norm): Qwen2RMSNorm() + ) + (lm_head): Linear(in_features=1536, out_features=151936, bias=False) + ) +) +model config: Qwen2Config { + "_name_or_path": "/share/pretrained_lm/Qwen/Qwen2-1.5B", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 1536, + "initializer_range": 0.02, + "intermediate_size": 8960, + "label_smoothing": 0.0, + "max_position_embeddings": 1024, + "max_window_layers": 28, + "model_type": "qwen2", + "num_attention_heads": 12, + "num_hidden_layers": 28, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.43.3", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} +[rank0]:[2024-08-29 19:58:45,342] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _flatten_optim_state_dict() profiling: defaultdict(, {}) +------------------------------------------------------------------ +iteration: 13801 , TFLOPS: 79.59332552517235, Tokens per sec: 65037.75520109389, Loss: 2.2596147060394287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13802 , TFLOPS: 97.941791685012, Tokens per sec: 80030.75923183761, Loss: 2.269324541091919 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13803 , TFLOPS: 95.49472213572162, Tokens per sec: 78031.1957099382, Loss: 2.260453701019287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13804 , TFLOPS: 96.33040944739113, Tokens per sec: 78714.05732480859, Loss: 2.264991283416748 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13805 , TFLOPS: 97.50511571442541, Tokens per sec: 79673.9400552318, Loss: 2.250718116760254 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13806 , TFLOPS: 96.82513165705544, Tokens per sec: 79118.30757760792, Loss: 2.2358238697052 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13807 , TFLOPS: 96.85753319094671, Tokens per sec: 79144.78370504017, Loss: 2.2572388648986816 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13808 , TFLOPS: 98.36843597553093, Tokens per sec: 80379.38126442173, Loss: 2.2752459049224854 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13809 , TFLOPS: 97.72739045646698, Tokens per sec: 79855.56646881485, Loss: 2.255483627319336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13810 , TFLOPS: 98.32708102085047, Tokens per sec: 80345.58906638109, Loss: 2.2517361640930176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13811 , TFLOPS: 97.70808854363544, Tokens per sec: 79839.79437896477, Loss: 2.266526937484741 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13812 , TFLOPS: 97.68228112453824, Tokens per sec: 79818.70647247836, Loss: 2.264561414718628 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13813 , TFLOPS: 98.24697928936921, Tokens per sec: 80280.13587958574, Loss: 2.250159740447998 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13814 , TFLOPS: 97.23444210378479, Tokens per sec: 79452.76568021873, Loss: 2.254728078842163 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13815 , TFLOPS: 98.33290442460837, Tokens per sec: 80350.34751949941, Loss: 2.229076385498047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13816 , TFLOPS: 97.60985782976867, Tokens per sec: 79759.52753398068, Loss: 2.249537229537964 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13817 , TFLOPS: 98.3849281842594, Tokens per sec: 80392.85747272098, Loss: 2.2592833042144775 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13818 , TFLOPS: 97.31606570879683, Tokens per sec: 79519.4624290526, Loss: 2.2392935752868652 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13819 , TFLOPS: 98.2396201239358, Tokens per sec: 80274.12251606831, Loss: 2.2692103385925293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13820 , TFLOPS: 98.37206223004254, Tokens per sec: 80382.34437033127, Loss: 2.250643730163574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13821 , TFLOPS: 96.95883048985388, Tokens per sec: 79227.55633560166, Loss: 2.2567367553710938 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13822 , TFLOPS: 97.57570454723623, Tokens per sec: 79731.62000763972, Loss: 2.256143808364868 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13823 , TFLOPS: 98.38763596585858, Tokens per sec: 80395.07007077016, Loss: 2.2563376426696777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13824 , TFLOPS: 97.81360928672521, Tokens per sec: 79926.018094489, Loss: 2.2654035091400146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13825 , TFLOPS: 97.65062970144291, Tokens per sec: 79792.84327988706, Loss: 2.2704715728759766 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13826 , TFLOPS: 97.68918845416395, Tokens per sec: 79824.35062932603, Loss: 2.259049415588379 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13827 , TFLOPS: 97.77281624153616, Tokens per sec: 79892.68504715861, Loss: 2.265547037124634 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13828 , TFLOPS: 98.2634094090476, Tokens per sec: 80293.56135332385, Loss: 2.280200481414795 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13829 , TFLOPS: 96.51393787864617, Tokens per sec: 78864.0231304292, Loss: 2.2866740226745605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13830 , TFLOPS: 98.42048368384164, Tokens per sec: 80421.91078670962, Loss: 2.2458879947662354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13831 , TFLOPS: 97.69109077540271, Tokens per sec: 79825.90506498032, Loss: 2.2461605072021484 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13832 , TFLOPS: 97.52366419170029, Tokens per sec: 79689.09649349337, Loss: 2.23152756690979 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13833 , TFLOPS: 97.73879727844268, Tokens per sec: 79864.88727668891, Loss: 2.2631328105926514 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13834 , TFLOPS: 97.82189944524553, Tokens per sec: 79932.79219642362, Loss: 2.2438712120056152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13835 , TFLOPS: 96.99952658329848, Tokens per sec: 79260.8101611659, Loss: 2.2589728832244873 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13836 , TFLOPS: 97.83721018771632, Tokens per sec: 79945.30299823008, Loss: 2.259610414505005 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13837 , TFLOPS: 96.59155894751294, Tokens per sec: 78927.44930394429, Loss: 2.2744977474212646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13838 , TFLOPS: 98.38474231859831, Tokens per sec: 80392.7055971047, Loss: 2.2461998462677 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13839 , TFLOPS: 97.60946789498297, Tokens per sec: 79759.20890822948, Loss: 2.2496867179870605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13840 , TFLOPS: 97.71099890638816, Tokens per sec: 79842.1725112894, Loss: 2.2384121417999268 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13841 , TFLOPS: 97.64670721738666, Tokens per sec: 79789.63811719099, Loss: 2.253124713897705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13842 , TFLOPS: 96.8898326369491, Tokens per sec: 79171.17641383043, Loss: 2.271409273147583 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13843 , TFLOPS: 97.80997351273966, Tokens per sec: 79923.04720997225, Loss: 2.2556257247924805 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13844 , TFLOPS: 97.80025351147306, Tokens per sec: 79915.10474672221, Loss: 2.246366262435913 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13845 , TFLOPS: 96.53674923940069, Tokens per sec: 78882.66288051837, Loss: 2.273291826248169 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13846 , TFLOPS: 97.70378300702494, Tokens per sec: 79836.27621416595, Loss: 2.2376413345336914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13847 , TFLOPS: 97.7703737547908, Tokens per sec: 79890.68922836377, Loss: 2.255610942840576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13848 , TFLOPS: 98.40703098196076, Tokens per sec: 80410.91823769938, Loss: 2.244906425476074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13849 , TFLOPS: 98.37915408534603, Tokens per sec: 80388.13930786034, Loss: 2.260293483734131 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13850 , TFLOPS: 97.85442519177825, Tokens per sec: 79959.36982120271, Loss: 2.2376415729522705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13851 , TFLOPS: 98.36052436384522, Tokens per sec: 80372.91648285031, Loss: 2.246540069580078 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13852 , TFLOPS: 97.75534857869934, Tokens per sec: 79878.41177019701, Loss: 2.2830302715301514 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13853 , TFLOPS: 97.75895456783071, Tokens per sec: 79881.35831674267, Loss: 2.2631869316101074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13854 , TFLOPS: 97.57413456288701, Tokens per sec: 79730.33713301299, Loss: 2.265070676803589 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13855 , TFLOPS: 98.3643544733857, Tokens per sec: 80376.04616395164, Loss: 2.2628393173217773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13856 , TFLOPS: 96.49875846696841, Tokens per sec: 78851.61964239448, Loss: 2.263749122619629 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13857 , TFLOPS: 98.36257030969404, Tokens per sec: 80374.58827787073, Loss: 2.247046709060669 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13858 , TFLOPS: 98.40856905302141, Tokens per sec: 80412.17503515651, Loss: 2.2603559494018555 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13859 , TFLOPS: 97.24391427425817, Tokens per sec: 79460.5056345479, Loss: 2.273606061935425 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13860 , TFLOPS: 97.71728236237625, Tokens per sec: 79847.30688492782, Loss: 2.2445271015167236 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13861 , TFLOPS: 97.7879851843329, Tokens per sec: 79905.07998080115, Loss: 2.2566092014312744 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13862 , TFLOPS: 97.04453606269138, Tokens per sec: 79297.5885654249, Loss: 2.273345708847046 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13863 , TFLOPS: 97.75981804843389, Tokens per sec: 79882.06388897149, Loss: 2.264882802963257 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13864 , TFLOPS: 97.69793321675438, Tokens per sec: 79831.49619994887, Loss: 2.2787718772888184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13865 , TFLOPS: 97.20123269825834, Tokens per sec: 79425.62942007692, Loss: 2.2645459175109863 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13866 , TFLOPS: 97.820811697652, Tokens per sec: 79931.90336986384, Loss: 2.2529261112213135 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13867 , TFLOPS: 96.48808366313, Tokens per sec: 78842.8969853842, Loss: 2.2495808601379395 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13868 , TFLOPS: 98.00003010801015, Tokens per sec: 80078.34734645978, Loss: 2.2596545219421387 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13869 , TFLOPS: 97.32652400490504, Tokens per sec: 79528.0081720231, Loss: 2.271104574203491 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13870 , TFLOPS: 97.63135424853344, Tokens per sec: 79777.09281112043, Loss: 2.2564175128936768 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13871 , TFLOPS: 97.17659054038897, Tokens per sec: 79405.49367853628, Loss: 2.2669522762298584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13872 , TFLOPS: 97.80973463972909, Tokens per sec: 79922.85202068617, Loss: 2.260845184326172 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13873 , TFLOPS: 97.60374921339776, Tokens per sec: 79754.53602629426, Loss: 2.2397799491882324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13874 , TFLOPS: 97.11475627037271, Tokens per sec: 79354.96730475029, Loss: 2.2710928916931152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13875 , TFLOPS: 97.20394810812455, Tokens per sec: 79427.84825137947, Loss: 2.264061450958252 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13876 , TFLOPS: 98.38045402280548, Tokens per sec: 80389.20152022182, Loss: 2.249868869781494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13877 , TFLOPS: 96.28385651879704, Tokens per sec: 78676.01772847534, Loss: 2.2568585872650146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13878 , TFLOPS: 98.2555841164596, Tokens per sec: 80287.16710530924, Loss: 2.238173484802246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13879 , TFLOPS: 96.99782772418492, Tokens per sec: 79259.42197965167, Loss: 2.25247859954834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13880 , TFLOPS: 97.61128286612838, Tokens per sec: 79760.69196787389, Loss: 2.2810826301574707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13881 , TFLOPS: 97.75118944528775, Tokens per sec: 79875.01323522077, Loss: 2.2404632568359375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13882 , TFLOPS: 97.22630485910376, Tokens per sec: 79446.11652811855, Loss: 2.247237205505371 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13883 , TFLOPS: 97.00739925740827, Tokens per sec: 79267.24312584165, Loss: 2.265458822250366 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13884 , TFLOPS: 97.76234967602194, Tokens per sec: 79884.13254704357, Loss: 2.2562835216522217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13885 , TFLOPS: 97.0636900037689, Tokens per sec: 79313.23974374565, Loss: 2.273463010787964 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13886 , TFLOPS: 98.3325546704199, Tokens per sec: 80350.06172634853, Loss: 2.2346878051757812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13887 , TFLOPS: 97.7613545973368, Tokens per sec: 79883.31944263434, Loss: 2.255250930786133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13888 , TFLOPS: 96.5546636104496, Tokens per sec: 78897.30117425938, Loss: 2.2364022731781006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13889 , TFLOPS: 98.39815143738255, Tokens per sec: 80403.66253324473, Loss: 2.2528152465820312 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13890 , TFLOPS: 98.40740761087527, Tokens per sec: 80411.22599088073, Loss: 2.249401330947876 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13891 , TFLOPS: 97.24268716688306, Tokens per sec: 79459.50293352315, Loss: 2.2727508544921875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13892 , TFLOPS: 97.67535729063768, Tokens per sec: 79813.04882956287, Loss: 2.230760097503662 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13893 , TFLOPS: 97.84322383125257, Tokens per sec: 79950.21690116849, Loss: 2.25217866897583 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13894 , TFLOPS: 97.75240216896466, Tokens per sec: 79876.00418295547, Loss: 2.261176586151123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13895 , TFLOPS: 97.78170795432824, Tokens per sec: 79899.95069457407, Loss: 2.286738395690918 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13896 , TFLOPS: 98.36170378307668, Tokens per sec: 80373.88021666529, Loss: 2.2534193992614746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13897 , TFLOPS: 96.43723064500007, Tokens per sec: 78801.3436751971, Loss: 2.222801923751831 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13898 , TFLOPS: 97.10538318743251, Tokens per sec: 79347.30831739522, Loss: 2.251890182495117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13899 , TFLOPS: 97.70041251105388, Tokens per sec: 79833.52209514371, Loss: 2.2709529399871826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13900 , TFLOPS: 97.76685228361946, Tokens per sec: 79887.81174361893, Loss: 2.267094612121582 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13901 , TFLOPS: 97.37346519062321, Tokens per sec: 79566.36502324704, Loss: 2.2364814281463623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13902 , TFLOPS: 97.04608683910756, Tokens per sec: 79298.8557447551, Loss: 2.2662901878356934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13903 , TFLOPS: 97.80003031730597, Tokens per sec: 79914.92236902275, Loss: 2.2433509826660156 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13904 , TFLOPS: 97.96908635699982, Tokens per sec: 80053.06241094622, Loss: 2.2703354358673096 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13905 , TFLOPS: 97.03273305747078, Tokens per sec: 79287.94402602277, Loss: 2.2572543621063232 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13906 , TFLOPS: 97.88115194793878, Tokens per sec: 79981.20894166936, Loss: 2.22434663772583 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13907 , TFLOPS: 97.81794575920975, Tokens per sec: 79929.56153778684, Loss: 2.262289524078369 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13908 , TFLOPS: 97.65446886850849, Tokens per sec: 79795.9803621253, Loss: 2.2562508583068848 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13909 , TFLOPS: 97.67280148640106, Tokens per sec: 79810.96041612889, Loss: 2.2383852005004883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13910 , TFLOPS: 97.754561618531, Tokens per sec: 79877.7687247853, Loss: 2.275786876678467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13911 , TFLOPS: 96.99326423105221, Tokens per sec: 79255.69303193818, Loss: 2.2589004039764404 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13912 , TFLOPS: 98.32266710811332, Tokens per sec: 80341.98234465932, Loss: 2.2710578441619873 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13913 , TFLOPS: 97.69196926662832, Tokens per sec: 79826.62290277514, Loss: 2.2493643760681152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13914 , TFLOPS: 97.746999270396, Tokens per sec: 79871.58933545204, Loss: 2.224362850189209 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13915 , TFLOPS: 97.08632149230965, Tokens per sec: 79331.73251561864, Loss: 2.2721338272094727 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13916 , TFLOPS: 97.09270329876837, Tokens per sec: 79336.94725395838, Loss: 2.2659835815429688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13917 , TFLOPS: 97.66680318852227, Tokens per sec: 79806.05905252219, Loss: 2.245145559310913 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13918 , TFLOPS: 96.81920375276361, Tokens per sec: 79113.46373441324, Loss: 2.259817123413086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13919 , TFLOPS: 98.3815864123243, Tokens per sec: 80390.12682484795, Loss: 2.254650115966797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13920 , TFLOPS: 96.05210345969321, Tokens per sec: 78486.64633802715, Loss: 2.254591941833496 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13921 , TFLOPS: 97.75651838009843, Tokens per sec: 79879.36764503358, Loss: 2.2492940425872803 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13922 , TFLOPS: 97.78079315014098, Tokens per sec: 79899.20318452352, Loss: 2.2600722312927246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13923 , TFLOPS: 96.66608356288003, Tokens per sec: 78988.34528559502, Loss: 2.2545533180236816 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13924 , TFLOPS: 98.41296110123425, Tokens per sec: 80415.7638908126, Loss: 2.263108015060425 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13925 , TFLOPS: 98.37988289193986, Tokens per sec: 80388.7348344893, Loss: 2.2515783309936523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13926 , TFLOPS: 97.24279566107693, Tokens per sec: 79459.59158692183, Loss: 2.2656097412109375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13927 , TFLOPS: 98.29986793474373, Tokens per sec: 80323.3525531961, Loss: 2.2570478916168213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13928 , TFLOPS: 98.32059296533218, Tokens per sec: 80340.28750919968, Loss: 2.247905969619751 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13929 , TFLOPS: 97.23392407701483, Tokens per sec: 79452.3423872098, Loss: 2.2602345943450928 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13930 , TFLOPS: 97.65938069507702, Tokens per sec: 79799.9939420561, Loss: 2.2551751136779785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13931 , TFLOPS: 97.35558359158198, Tokens per sec: 79551.75350835714, Loss: 2.259614944458008 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13932 , TFLOPS: 97.65810944297664, Tokens per sec: 79798.95516923994, Loss: 2.252659320831299 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13933 , TFLOPS: 97.18721676450365, Tokens per sec: 79414.17663980373, Loss: 2.2551121711730957 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13934 , TFLOPS: 98.3296849350813, Tokens per sec: 80347.7167917298, Loss: 2.240861654281616 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13935 , TFLOPS: 97.28066163514657, Tokens per sec: 79490.53284909103, Loss: 2.256812572479248 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13936 , TFLOPS: 96.55432085243837, Tokens per sec: 78897.02109786501, Loss: 2.257736921310425 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13937 , TFLOPS: 98.42116089349491, Tokens per sec: 80422.46415215022, Loss: 2.2665534019470215 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13938 , TFLOPS: 96.68324769114979, Tokens per sec: 79002.37053664822, Loss: 2.2519373893737793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13939 , TFLOPS: 97.32421718960522, Tokens per sec: 79526.12321385891, Loss: 2.2735211849212646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13940 , TFLOPS: 97.66768951284864, Tokens per sec: 79806.7832909453, Loss: 2.2747318744659424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13941 , TFLOPS: 98.3929984459028, Tokens per sec: 80399.45189125683, Loss: 2.2603566646575928 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13942 , TFLOPS: 98.40162537924016, Tokens per sec: 80406.50118056379, Loss: 2.2503349781036377 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13943 , TFLOPS: 97.11043557333913, Tokens per sec: 79351.4367519795, Loss: 2.2298834323883057 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13944 , TFLOPS: 98.40619700590901, Tokens per sec: 80410.23677439905, Loss: 2.270961046218872 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13945 , TFLOPS: 98.47148583312995, Tokens per sec: 80463.5859558052, Loss: 2.2781405448913574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13946 , TFLOPS: 97.67377984950083, Tokens per sec: 79811.759861803, Loss: 2.265284538269043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13947 , TFLOPS: 97.13787976862567, Tokens per sec: 79373.86211042457, Loss: 2.259486675262451 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13948 , TFLOPS: 97.68744900756029, Tokens per sec: 79822.92928272883, Loss: 2.2513022422790527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13949 , TFLOPS: 97.55546214836015, Tokens per sec: 79715.07942243238, Loss: 2.262249708175659 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13950 , TFLOPS: 97.6567540176349, Tokens per sec: 79797.84761630146, Loss: 2.2329695224761963 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13951 , TFLOPS: 97.67148661345134, Tokens per sec: 79809.88599959383, Loss: 2.2641890048980713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13952 , TFLOPS: 97.27015263332568, Tokens per sec: 79481.9456731774, Loss: 2.2585127353668213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13953 , TFLOPS: 97.01609116824207, Tokens per sec: 79274.34550993366, Loss: 2.276017665863037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13954 , TFLOPS: 98.4109187223479, Tokens per sec: 80414.09501044906, Loss: 2.2741920948028564 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13955 , TFLOPS: 95.90297367115238, Tokens per sec: 78364.78854886815, Loss: 2.2500500679016113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13956 , TFLOPS: 97.6947177639106, Tokens per sec: 79828.8687706571, Loss: 2.2714531421661377 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13957 , TFLOPS: 98.30092697907463, Tokens per sec: 80324.2179255811, Loss: 2.268861770629883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13958 , TFLOPS: 95.943099536192, Tokens per sec: 78397.57642612366, Loss: 2.2550177574157715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13959 , TFLOPS: 97.7354195475096, Tokens per sec: 79862.12724579296, Loss: 2.2638027667999268 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13960 , TFLOPS: 97.67421292447774, Tokens per sec: 79812.11373851499, Loss: 2.2539727687835693 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13961 , TFLOPS: 96.49543635802596, Tokens per sec: 78848.9050616589, Loss: 2.2558035850524902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13962 , TFLOPS: 98.42898243754102, Tokens per sec: 80428.85533713475, Loss: 2.2494864463806152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13963 , TFLOPS: 96.65346901614662, Tokens per sec: 78978.03761473182, Loss: 2.2586233615875244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13964 , TFLOPS: 98.46860952299373, Tokens per sec: 80461.2356487501, Loss: 2.2570457458496094 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13965 , TFLOPS: 96.7606385928317, Tokens per sec: 79065.60863463153, Loss: 2.221125841140747 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13966 , TFLOPS: 98.36071196376233, Tokens per sec: 80373.06977557184, Loss: 2.266270399093628 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13967 , TFLOPS: 96.47658485972644, Tokens per sec: 78833.50101711764, Loss: 2.244919776916504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13968 , TFLOPS: 96.54845339882131, Tokens per sec: 78892.22665047283, Loss: 2.2767961025238037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13969 , TFLOPS: 98.4144357123565, Tokens per sec: 80416.96883352047, Loss: 2.2633261680603027 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13970 , TFLOPS: 97.38150154552596, Tokens per sec: 79572.93173571226, Loss: 2.2547695636749268 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13971 , TFLOPS: 97.26613209904923, Tokens per sec: 79478.66039112235, Loss: 2.270465612411499 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13972 , TFLOPS: 97.64746557616655, Tokens per sec: 79790.25779167192, Loss: 2.250779151916504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13973 , TFLOPS: 97.74340746181834, Tokens per sec: 79868.65437620184, Loss: 2.2632060050964355 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13974 , TFLOPS: 97.64325893345952, Tokens per sec: 79786.82043562738, Loss: 2.25986909866333 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13975 , TFLOPS: 97.68127001967457, Tokens per sec: 79817.88027266611, Loss: 2.238424777984619 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13976 , TFLOPS: 98.38983642698287, Tokens per sec: 80396.86812419974, Loss: 2.257504940032959 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13977 , TFLOPS: 98.42843860140809, Tokens per sec: 80428.41095463095, Loss: 2.2576940059661865 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13978 , TFLOPS: 96.32023456258291, Tokens per sec: 78705.74316450694, Loss: 2.237407684326172 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13979 , TFLOPS: 98.37703223135955, Tokens per sec: 80386.4054863466, Loss: 2.2717559337615967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13980 , TFLOPS: 97.75803666576756, Tokens per sec: 79880.60827533796, Loss: 2.2343361377716064 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13981 , TFLOPS: 97.05496578458896, Tokens per sec: 79306.11095967234, Loss: 2.2489452362060547 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13982 , TFLOPS: 97.66885468037658, Tokens per sec: 79807.73537932646, Loss: 2.25634765625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13983 , TFLOPS: 97.74332147587559, Tokens per sec: 79868.58411487455, Loss: 2.26004958152771 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13984 , TFLOPS: 97.14731361373391, Tokens per sec: 79381.57074811129, Loss: 2.24473237991333 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13985 , TFLOPS: 96.99719545546353, Tokens per sec: 79258.90533660338, Loss: 2.2664263248443604 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13986 , TFLOPS: 98.35276518169974, Tokens per sec: 80366.57625538013, Loss: 2.2451953887939453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13987 , TFLOPS: 96.4208821098566, Tokens per sec: 78787.98486628264, Loss: 2.249216079711914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13988 , TFLOPS: 97.02188674444739, Tokens per sec: 79279.08122444242, Loss: 2.229457378387451 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13989 , TFLOPS: 97.66657740688547, Tokens per sec: 79805.87456053463, Loss: 2.231452465057373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13990 , TFLOPS: 96.55676267848905, Tokens per sec: 78899.01637678922, Loss: 2.266808271408081 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13991 , TFLOPS: 97.01864630421267, Tokens per sec: 79276.43337731028, Loss: 2.2742252349853516 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13992 , TFLOPS: 98.44660949786078, Tokens per sec: 80443.25886188293, Loss: 2.263580322265625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13993 , TFLOPS: 96.4527458627144, Tokens per sec: 78814.02155898871, Loss: 2.235729932785034 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13994 , TFLOPS: 97.66537966143233, Tokens per sec: 79804.89585189216, Loss: 2.233151435852051 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13995 , TFLOPS: 97.02488135303706, Tokens per sec: 79281.52819620912, Loss: 2.249980926513672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13996 , TFLOPS: 97.2266871287066, Tokens per sec: 79446.42889045153, Loss: 2.2573626041412354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13997 , TFLOPS: 97.14682987695913, Tokens per sec: 79381.17547434013, Loss: 2.2551581859588623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13998 , TFLOPS: 97.75150865878635, Tokens per sec: 79875.27407278775, Loss: 2.239215850830078 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 13999 , TFLOPS: 97.74728382964855, Tokens per sec: 79871.8218561425, Loss: 2.233224391937256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14000 , TFLOPS: 96.56203163648966, Tokens per sec: 78903.32177800656, Loss: 2.2409512996673584 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014000 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014000/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014000/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014000/optimizer.pt +[rank0]:[2024-08-29 20:54:08,307] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007945060002384707, 'preprocessing_with_comm': 0.0016361080051865429, 'state_converting': 2.6492018519929843, : 2.660455354998703}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014000/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014000/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014000/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014000/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014000/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014000, took 16.61s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0011800 +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0012000 + eval ppl=7.421717643737793, eval loss=2.0044105052948 +------------------------------------------------------------------ +iteration: 14001 , TFLOPS: 98.11009806914952, Tokens per sec: 80168.28671090814, Loss: 2.2296581268310547 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14002 , TFLOPS: 96.39532424677235, Tokens per sec: 78767.10087843856, Loss: 2.260441303253174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14003 , TFLOPS: 97.45102033940172, Tokens per sec: 79629.73733176109, Loss: 2.23488187789917 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14004 , TFLOPS: 96.1874614249468, Tokens per sec: 78597.25081585985, Loss: 2.246175765991211 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14005 , TFLOPS: 98.32442580008413, Tokens per sec: 80343.41941714144, Loss: 2.282729387283325 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14006 , TFLOPS: 96.45229202641508, Tokens per sec: 78813.65071766575, Loss: 2.2303953170776367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14007 , TFLOPS: 96.52530587415475, Tokens per sec: 78873.31221219749, Loss: 2.2516517639160156 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14008 , TFLOPS: 97.81334870619746, Tokens per sec: 79925.80516743183, Loss: 2.2802648544311523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14009 , TFLOPS: 97.17084365425981, Tokens per sec: 79400.79774994214, Loss: 2.258230209350586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14010 , TFLOPS: 96.58612880691184, Tokens per sec: 78923.01219627488, Loss: 2.268176794052124 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14011 , TFLOPS: 98.36425570704098, Tokens per sec: 80375.9654594289, Loss: 2.249727964401245 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14012 , TFLOPS: 97.1172899137901, Tokens per sec: 79357.03761000842, Loss: 2.2552237510681152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14013 , TFLOPS: 98.35540706384498, Tokens per sec: 80368.73500528911, Loss: 2.2467331886291504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14014 , TFLOPS: 96.71047244212257, Tokens per sec: 79024.6165814955, Loss: 2.237793445587158 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14015 , TFLOPS: 97.68661155854441, Tokens per sec: 79822.2449815801, Loss: 2.256727695465088 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14016 , TFLOPS: 97.73106796241461, Tokens per sec: 79858.57145359209, Loss: 2.2466683387756348 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14017 , TFLOPS: 96.6342145237009, Tokens per sec: 78962.3042732997, Loss: 2.248655080795288 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14018 , TFLOPS: 97.5959433948184, Tokens per sec: 79748.15769099341, Loss: 2.255056142807007 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14019 , TFLOPS: 97.65906818329329, Tokens per sec: 79799.73858063285, Loss: 2.2673113346099854 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14020 , TFLOPS: 97.77970530435928, Tokens per sec: 79898.31427773164, Loss: 2.2339611053466797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14021 , TFLOPS: 97.13513542906222, Tokens per sec: 79371.61963992169, Loss: 2.241530418395996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14022 , TFLOPS: 98.41779778984302, Tokens per sec: 80419.71607358228, Loss: 2.257817506790161 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14023 , TFLOPS: 97.77138091230779, Tokens per sec: 79891.51220269744, Loss: 2.2437496185302734 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14024 , TFLOPS: 97.14560758148842, Tokens per sec: 79380.17670524634, Loss: 2.2630605697631836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14025 , TFLOPS: 97.715038587673, Tokens per sec: 79845.47343885791, Loss: 2.2540435791015625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14026 , TFLOPS: 98.39919819405272, Tokens per sec: 80404.51786506583, Loss: 2.2495627403259277 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14027 , TFLOPS: 97.2125715537701, Tokens per sec: 79434.89469080369, Loss: 2.2756357192993164 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14028 , TFLOPS: 98.42027550098067, Tokens per sec: 80421.7406751348, Loss: 2.2483413219451904 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14029 , TFLOPS: 97.82657935148492, Tokens per sec: 79936.61626828332, Loss: 2.2557454109191895 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14030 , TFLOPS: 96.97116947017783, Tokens per sec: 79237.63883405797, Loss: 2.2515246868133545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14031 , TFLOPS: 97.1505601625606, Tokens per sec: 79384.22358673118, Loss: 2.250917911529541 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14032 , TFLOPS: 97.14984176146835, Tokens per sec: 79383.63656270542, Loss: 2.2660655975341797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14033 , TFLOPS: 97.81968951667095, Tokens per sec: 79930.98640689669, Loss: 2.264944553375244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14034 , TFLOPS: 97.7710046829605, Tokens per sec: 79891.20477601278, Loss: 2.240943670272827 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14035 , TFLOPS: 97.10087578922398, Tokens per sec: 79343.62520628828, Loss: 2.244555711746216 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14036 , TFLOPS: 97.0934832258846, Tokens per sec: 79337.58455248216, Loss: 2.254629135131836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14037 , TFLOPS: 94.64268046845258, Tokens per sec: 77334.97053010891, Loss: 2.244093894958496 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14038 , TFLOPS: 96.401801375601, Tokens per sec: 78772.39350714059, Loss: 2.258683204650879 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14039 , TFLOPS: 97.89451422727139, Tokens per sec: 79992.12760408773, Loss: 2.2625553607940674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14040 , TFLOPS: 96.61709345559971, Tokens per sec: 78948.31420781871, Loss: 2.2745723724365234 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14041 , TFLOPS: 96.0802676674565, Tokens per sec: 78509.66003720173, Loss: 2.268893241882324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14042 , TFLOPS: 97.16444524515869, Tokens per sec: 79395.5694451559, Loss: 2.251591682434082 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14043 , TFLOPS: 98.41631703956352, Tokens per sec: 80418.506114411, Loss: 2.2571048736572266 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14044 , TFLOPS: 96.50421722584957, Tokens per sec: 78856.08013479723, Loss: 2.2562782764434814 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14045 , TFLOPS: 96.67414266046558, Tokens per sec: 78994.93058169179, Loss: 2.2403042316436768 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14046 , TFLOPS: 97.82057374308751, Tokens per sec: 79931.70893106368, Loss: 2.255037546157837 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14047 , TFLOPS: 97.28964371705644, Tokens per sec: 79497.87233943894, Loss: 2.2474334239959717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14048 , TFLOPS: 95.91104073403827, Tokens per sec: 78371.38035361687, Loss: 2.2538058757781982 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14049 , TFLOPS: 97.80409277598152, Tokens per sec: 79918.24190858354, Loss: 2.256967067718506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14050 , TFLOPS: 97.73382502565502, Tokens per sec: 79860.82432093895, Loss: 2.255920171737671 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14051 , TFLOPS: 97.75541092213238, Tokens per sec: 79878.46271262101, Loss: 2.278820514678955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14052 , TFLOPS: 97.82844456450037, Tokens per sec: 79938.1403818531, Loss: 2.2379283905029297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14053 , TFLOPS: 97.37810043260252, Tokens per sec: 79570.15259879008, Loss: 2.2832555770874023 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14054 , TFLOPS: 97.74779980732929, Tokens per sec: 79872.24347478787, Loss: 2.2644870281219482 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14055 , TFLOPS: 96.42479234279274, Tokens per sec: 78791.18001827311, Loss: 2.2522754669189453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14056 , TFLOPS: 97.19611338799982, Tokens per sec: 79421.4462998819, Loss: 2.2594823837280273 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14057 , TFLOPS: 97.69108543786874, Tokens per sec: 79825.9007035439, Loss: 2.264373779296875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14058 , TFLOPS: 97.79054787184644, Tokens per sec: 79907.17401873792, Loss: 2.2567925453186035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14059 , TFLOPS: 96.64506592509055, Tokens per sec: 78971.1712327151, Loss: 2.2583789825439453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14060 , TFLOPS: 97.75181686464653, Tokens per sec: 79875.52591573005, Loss: 2.270443916320801 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14061 , TFLOPS: 97.8335529276406, Tokens per sec: 79942.31455687595, Loss: 2.2412331104278564 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14062 , TFLOPS: 95.68110190865339, Tokens per sec: 78183.49142024295, Loss: 2.2375471591949463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14063 , TFLOPS: 97.12997932789283, Tokens per sec: 79367.40645692633, Loss: 2.240083932876587 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14064 , TFLOPS: 97.81811492495648, Tokens per sec: 79929.6997674732, Loss: 2.254379987716675 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14065 , TFLOPS: 97.19701984992055, Tokens per sec: 79422.18699325176, Loss: 2.273496627807617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14066 , TFLOPS: 97.87476327727077, Tokens per sec: 79975.98859440752, Loss: 2.2712247371673584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14067 , TFLOPS: 97.75389302746261, Tokens per sec: 79877.22240181225, Loss: 2.2547457218170166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14068 , TFLOPS: 96.53667029112233, Tokens per sec: 78882.59836984781, Loss: 2.2512850761413574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14069 , TFLOPS: 97.26407443485122, Tokens per sec: 79476.97902073718, Loss: 2.239288806915283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14070 , TFLOPS: 96.69227691681185, Tokens per sec: 79009.74854936961, Loss: 2.269456624984741 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14071 , TFLOPS: 97.96524472593163, Tokens per sec: 80049.92331530801, Loss: 2.255535840988159 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14072 , TFLOPS: 96.08634920019804, Tokens per sec: 78514.62941415742, Loss: 2.2571935653686523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14073 , TFLOPS: 97.14461162336826, Tokens per sec: 79379.3628822281, Loss: 2.2620556354522705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14074 , TFLOPS: 97.25530193491242, Tokens per sec: 79469.81078521295, Loss: 2.2516562938690186 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14075 , TFLOPS: 97.7706551102105, Tokens per sec: 79890.91913111995, Loss: 2.270319700241089 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14076 , TFLOPS: 96.62878430248352, Tokens per sec: 78957.86709975665, Loss: 2.2352073192596436 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14077 , TFLOPS: 97.39351512936224, Tokens per sec: 79582.74834432206, Loss: 2.2559893131256104 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14078 , TFLOPS: 96.58628640649978, Tokens per sec: 78923.14097495515, Loss: 2.256356716156006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14079 , TFLOPS: 97.24154258100407, Tokens per sec: 79458.56766293714, Loss: 2.252819299697876 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14080 , TFLOPS: 97.09084858685645, Tokens per sec: 79335.43172111061, Loss: 2.26877760887146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14081 , TFLOPS: 98.43760400463538, Tokens per sec: 80435.90024154643, Loss: 2.274152994155884 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14082 , TFLOPS: 97.09782552606865, Tokens per sec: 79341.1327577433, Loss: 2.2396867275238037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14083 , TFLOPS: 97.1569094582993, Tokens per sec: 79389.41175972468, Loss: 2.248788595199585 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14084 , TFLOPS: 97.80234963409028, Tokens per sec: 79916.81754247175, Loss: 2.2499914169311523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14085 , TFLOPS: 97.24386052052186, Tokens per sec: 79460.46171098622, Loss: 2.262418508529663 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14086 , TFLOPS: 97.20390153678244, Tokens per sec: 79427.81019673697, Loss: 2.269366502761841 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14087 , TFLOPS: 96.4688369830297, Tokens per sec: 78827.17002762064, Loss: 2.260371685028076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14088 , TFLOPS: 96.84379111914967, Tokens per sec: 79133.55470442241, Loss: 2.261925458908081 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14089 , TFLOPS: 98.36454128037175, Tokens per sec: 80376.19880874878, Loss: 2.2463550567626953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14090 , TFLOPS: 97.07557422300917, Tokens per sec: 79322.9506452139, Loss: 2.2418529987335205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14091 , TFLOPS: 96.6035755729233, Tokens per sec: 78937.26839790256, Loss: 2.2569589614868164 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14092 , TFLOPS: 98.36934871842834, Tokens per sec: 80380.12709013929, Loss: 2.271028995513916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14093 , TFLOPS: 96.51935078975657, Tokens per sec: 78868.44615943823, Loss: 2.254146099090576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14094 , TFLOPS: 97.1489094341904, Tokens per sec: 79382.87473408636, Loss: 2.2481677532196045 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14095 , TFLOPS: 97.04289243689199, Tokens per sec: 79296.24551647353, Loss: 2.257378578186035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14096 , TFLOPS: 97.84462210892309, Tokens per sec: 79951.35946984794, Loss: 2.253431558609009 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14097 , TFLOPS: 96.62854772712073, Tokens per sec: 78957.67378793772, Loss: 2.277533769607544 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14098 , TFLOPS: 97.91377800159813, Tokens per sec: 80007.86852998377, Loss: 2.2568907737731934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14099 , TFLOPS: 97.71344272084957, Tokens per sec: 79844.16941499521, Loss: 2.256182909011841 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14100 , TFLOPS: 95.59259397010622, Tokens per sec: 78111.16930525903, Loss: 2.2527644634246826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14101 , TFLOPS: 96.71273170645239, Tokens per sec: 79026.46268453804, Loss: 2.2373979091644287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14102 , TFLOPS: 97.77857584913568, Tokens per sec: 79897.39137079367, Loss: 2.258146047592163 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14103 , TFLOPS: 97.2707720010859, Tokens per sec: 79482.45177451822, Loss: 2.2222869396209717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14104 , TFLOPS: 96.88008106258953, Tokens per sec: 79163.20815139337, Loss: 2.250718832015991 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14105 , TFLOPS: 97.81622994036118, Tokens per sec: 79928.15949803658, Loss: 2.2531723976135254 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14106 , TFLOPS: 97.15108647152957, Tokens per sec: 79384.65364733813, Loss: 2.253286361694336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14107 , TFLOPS: 97.81427948147417, Tokens per sec: 79926.56572787072, Loss: 2.265833854675293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14108 , TFLOPS: 96.69528962868631, Tokens per sec: 79012.21031379634, Loss: 2.2734477519989014 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14109 , TFLOPS: 97.91317334613252, Tokens per sec: 80007.37445043761, Loss: 2.2578697204589844 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14110 , TFLOPS: 97.74223577848504, Tokens per sec: 79867.6969635884, Loss: 2.2488327026367188 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14111 , TFLOPS: 96.54654454990147, Tokens per sec: 78890.66688088229, Loss: 2.2434895038604736 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14112 , TFLOPS: 97.86234744949671, Tokens per sec: 79965.84330192152, Loss: 2.258098602294922 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14113 , TFLOPS: 96.55116460010875, Tokens per sec: 78894.44204283733, Loss: 2.250835418701172 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14114 , TFLOPS: 96.99565585075302, Tokens per sec: 79257.64728596149, Loss: 2.249755620956421 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14115 , TFLOPS: 97.87221851019338, Tokens per sec: 79973.90919972045, Loss: 2.27154278755188 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14116 , TFLOPS: 97.24714771068136, Tokens per sec: 79463.14775868534, Loss: 2.265993356704712 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14117 , TFLOPS: 97.75089555572471, Tokens per sec: 79874.77309049343, Loss: 2.265528917312622 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14118 , TFLOPS: 97.19411280742946, Tokens per sec: 79419.81157399835, Loss: 2.243411064147949 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14119 , TFLOPS: 97.77860549638336, Tokens per sec: 79897.41559632294, Loss: 2.246440887451172 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14120 , TFLOPS: 96.45008687193236, Tokens per sec: 78811.84882917219, Loss: 2.270195960998535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14121 , TFLOPS: 97.81410855920521, Tokens per sec: 79926.42606288481, Loss: 2.226814031600952 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14122 , TFLOPS: 97.81702036084967, Tokens per sec: 79928.80537096494, Loss: 2.264181137084961 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14123 , TFLOPS: 97.2861112949801, Tokens per sec: 79494.98590642774, Loss: 2.2328803539276123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14124 , TFLOPS: 96.56542589824339, Tokens per sec: 78906.09531666161, Loss: 2.2490663528442383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14125 , TFLOPS: 97.20089414910828, Tokens per sec: 79425.35278285138, Loss: 2.2806050777435303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14126 , TFLOPS: 97.31497697236252, Tokens per sec: 79518.57279448555, Loss: 2.2604100704193115 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14127 , TFLOPS: 98.3973143435316, Tokens per sec: 80402.9785223104, Loss: 2.247205972671509 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14128 , TFLOPS: 96.60237237859755, Tokens per sec: 78936.28523684602, Loss: 2.2645256519317627 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14129 , TFLOPS: 96.75109981506405, Tokens per sec: 79057.8142537676, Loss: 2.2569124698638916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14130 , TFLOPS: 98.38275652876138, Tokens per sec: 80391.08295711015, Loss: 2.239245653152466 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14131 , TFLOPS: 96.52800587546528, Tokens per sec: 78875.51845277254, Loss: 2.2712039947509766 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14132 , TFLOPS: 96.53747674581989, Tokens per sec: 78883.25734474117, Loss: 2.23983097076416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14133 , TFLOPS: 97.79512662193056, Tokens per sec: 79910.91543329916, Loss: 2.2509164810180664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14134 , TFLOPS: 97.13713729057135, Tokens per sec: 79373.2554124933, Loss: 2.2494282722473145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14135 , TFLOPS: 97.83660028356586, Tokens per sec: 79944.8046298484, Loss: 2.248063564300537 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14136 , TFLOPS: 96.7309270995146, Tokens per sec: 79041.33060860026, Loss: 2.2761714458465576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14137 , TFLOPS: 97.81546582352082, Tokens per sec: 79927.53511849619, Loss: 2.2723324298858643 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14138 , TFLOPS: 95.97734240893276, Tokens per sec: 78425.55715893011, Loss: 2.2466659545898438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14139 , TFLOPS: 96.64528662454725, Tokens per sec: 78971.35157192248, Loss: 2.2735788822174072 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14140 , TFLOPS: 97.18235760536442, Tokens per sec: 79410.20609578518, Loss: 2.2538199424743652 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14141 , TFLOPS: 96.80718976838106, Tokens per sec: 79103.64679850679, Loss: 2.2627224922180176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14142 , TFLOPS: 97.2911492622281, Tokens per sec: 79499.10255915488, Loss: 2.2564635276794434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14143 , TFLOPS: 96.01974454118262, Tokens per sec: 78460.20503271946, Loss: 2.266191244125366 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14144 , TFLOPS: 96.77042204395195, Tokens per sec: 79073.60294439034, Loss: 2.2715506553649902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14145 , TFLOPS: 97.665983381161, Tokens per sec: 79805.38916682359, Loss: 2.253209114074707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14146 , TFLOPS: 96.67575013951082, Tokens per sec: 78996.24409419985, Loss: 2.261749267578125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14147 , TFLOPS: 97.85185060385488, Tokens per sec: 79957.26605913474, Loss: 2.2449963092803955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14148 , TFLOPS: 97.7400235873622, Tokens per sec: 79865.88932527506, Loss: 2.2472457885742188 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14149 , TFLOPS: 97.22950042609038, Tokens per sec: 79448.72770816453, Loss: 2.257896661758423 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14150 , TFLOPS: 97.78982998635625, Tokens per sec: 79906.58741602386, Loss: 2.244964599609375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14151 , TFLOPS: 97.1183376107564, Tokens per sec: 79357.8937101697, Loss: 2.266228437423706 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14152 , TFLOPS: 97.76566517979767, Tokens per sec: 79886.84173052739, Loss: 2.238168239593506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14153 , TFLOPS: 97.0298395453053, Tokens per sec: 79285.57966274672, Loss: 2.2223925590515137 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14154 , TFLOPS: 97.8598837037879, Tokens per sec: 79963.8301118804, Loss: 2.2618329524993896 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14155 , TFLOPS: 97.07381453199773, Tokens per sec: 79321.51275638999, Loss: 2.2476682662963867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14156 , TFLOPS: 96.16821851834885, Tokens per sec: 78581.52694152218, Loss: 2.244014024734497 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14157 , TFLOPS: 97.66036683822243, Tokens per sec: 79800.7997450058, Loss: 2.261610984802246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14158 , TFLOPS: 97.28140148161631, Tokens per sec: 79491.1373966868, Loss: 2.2583870887756348 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14159 , TFLOPS: 97.80567747934879, Tokens per sec: 79919.53681049883, Loss: 2.2606537342071533 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14160 , TFLOPS: 98.3511020467563, Tokens per sec: 80365.21726500493, Loss: 2.2463157176971436 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14161 , TFLOPS: 96.11582501764902, Tokens per sec: 78538.71486337164, Loss: 2.2430739402770996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14162 , TFLOPS: 97.84156777234446, Tokens per sec: 79948.86369280393, Loss: 2.242689371109009 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14163 , TFLOPS: 97.82839845978523, Tokens per sec: 79938.10270850352, Loss: 2.2639858722686768 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14164 , TFLOPS: 97.1823336477508, Tokens per sec: 79410.18651940236, Loss: 2.2620067596435547 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14165 , TFLOPS: 97.8212667514476, Tokens per sec: 79932.27520603433, Loss: 2.257791042327881 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14166 , TFLOPS: 97.10509903514267, Tokens per sec: 79347.07612924447, Loss: 2.2638206481933594 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14167 , TFLOPS: 97.16000285589553, Tokens per sec: 79391.9394545317, Loss: 2.247779369354248 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14168 , TFLOPS: 97.74475338092773, Tokens per sec: 79869.75416135316, Loss: 2.2480483055114746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14169 , TFLOPS: 97.80370787084385, Tokens per sec: 79917.92739268721, Loss: 2.2448291778564453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14170 , TFLOPS: 96.49476798090754, Tokens per sec: 78848.35891350987, Loss: 2.224740505218506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14171 , TFLOPS: 97.18184525658434, Tokens per sec: 79409.78744240795, Loss: 2.2611613273620605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14172 , TFLOPS: 96.54723590838739, Tokens per sec: 78891.23180769828, Loss: 2.267063617706299 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14173 , TFLOPS: 97.70365225692211, Tokens per sec: 79836.1693748911, Loss: 2.2458789348602295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14174 , TFLOPS: 96.73282491219825, Tokens per sec: 79042.88136019811, Loss: 2.275439977645874 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14175 , TFLOPS: 97.72989454647596, Tokens per sec: 79857.6126252222, Loss: 2.260885238647461 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14176 , TFLOPS: 95.24863850822615, Tokens per sec: 77830.11444315553, Loss: 2.234208583831787 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14177 , TFLOPS: 96.55859812128546, Tokens per sec: 78900.51616434666, Loss: 2.2626912593841553 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14178 , TFLOPS: 97.19459173170125, Tokens per sec: 79420.20291534944, Loss: 2.2659926414489746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14179 , TFLOPS: 96.2945644778099, Tokens per sec: 78684.76747743215, Loss: 2.263812780380249 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14180 , TFLOPS: 97.15663909027126, Tokens per sec: 79389.19083504908, Loss: 2.2457480430603027 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14181 , TFLOPS: 96.07700276753941, Tokens per sec: 78506.99220343346, Loss: 2.249652862548828 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14182 , TFLOPS: 97.2748918787528, Tokens per sec: 79485.81823261493, Loss: 2.2604634761810303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14183 , TFLOPS: 97.69325225431713, Tokens per sec: 79827.6712650427, Loss: 2.2711679935455322 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14184 , TFLOPS: 97.11410325719349, Tokens per sec: 79354.43371087148, Loss: 2.2457337379455566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14185 , TFLOPS: 97.72217567281257, Tokens per sec: 79851.30533485036, Loss: 2.25833797454834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14186 , TFLOPS: 97.19812859015993, Tokens per sec: 79423.09297344285, Loss: 2.252694606781006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14187 , TFLOPS: 97.12965743889961, Tokens per sec: 79367.1434331438, Loss: 2.2508275508880615 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14188 , TFLOPS: 97.78835708366896, Tokens per sec: 79905.38386932219, Loss: 2.2508528232574463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14189 , TFLOPS: 97.2375466532948, Tokens per sec: 79455.30248754159, Loss: 2.246185302734375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14190 , TFLOPS: 97.75035818125623, Tokens per sec: 79874.33398798198, Loss: 2.255101203918457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14191 , TFLOPS: 97.72944169872441, Tokens per sec: 79857.24259166708, Loss: 2.2548635005950928 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14192 , TFLOPS: 97.83250345289692, Tokens per sec: 79941.457004047, Loss: 2.264509916305542 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14193 , TFLOPS: 97.31822805425612, Tokens per sec: 79521.22933719111, Loss: 2.2426562309265137 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14194 , TFLOPS: 97.75697128853177, Tokens per sec: 79879.73772817336, Loss: 2.237962484359741 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14195 , TFLOPS: 98.37439006734819, Tokens per sec: 80384.24650611756, Loss: 2.2589833736419678 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14196 , TFLOPS: 95.28928596374895, Tokens per sec: 77863.3285254218, Loss: 2.2613978385925293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14197 , TFLOPS: 97.20481812463697, Tokens per sec: 79428.559164268, Loss: 2.258993148803711 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14198 , TFLOPS: 98.4680922783151, Tokens per sec: 80460.81299480813, Loss: 2.232125759124756 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14199 , TFLOPS: 97.30405276609527, Tokens per sec: 79509.6463443305, Loss: 2.2602062225341797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14200 , TFLOPS: 96.63543416816654, Tokens per sec: 78963.30087618902, Loss: 2.2697315216064453 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014200 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014200/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014200/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014200/optimizer.pt +[rank0]:[2024-08-29 21:49:34,045] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007797578000463545, 'preprocessing_with_comm': 0.0018581949989311397, 'state_converting': 2.662169488001382, : 2.6734733290068107}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014200/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014200/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014200/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014200/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014200/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014200, took 16.59s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0012200 + eval ppl=7.799999713897705, eval loss=2.054123640060425 +------------------------------------------------------------------ +iteration: 14201 , TFLOPS: 96.1792501900349, Tokens per sec: 78590.54120443735, Loss: 2.250190258026123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14202 , TFLOPS: 96.63287729827886, Tokens per sec: 78961.2115919841, Loss: 2.2603421211242676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14203 , TFLOPS: 97.02803215387837, Tokens per sec: 79284.10279668539, Loss: 2.2381973266601562 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14204 , TFLOPS: 96.5295223711635, Tokens per sec: 78876.75762044563, Loss: 2.2384848594665527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14205 , TFLOPS: 97.74719728944322, Tokens per sec: 79871.75114191299, Loss: 2.263848304748535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14206 , TFLOPS: 96.6187761901275, Tokens per sec: 78949.68921351882, Loss: 2.2309556007385254 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14207 , TFLOPS: 97.87270723090899, Tokens per sec: 79974.30854599798, Loss: 2.252664804458618 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14208 , TFLOPS: 97.75789920547348, Tokens per sec: 79880.49595299312, Loss: 2.2483410835266113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14209 , TFLOPS: 97.80575841569852, Tokens per sec: 79919.60294567369, Loss: 2.2727999687194824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14210 , TFLOPS: 98.3952789996001, Tokens per sec: 80401.31539038954, Loss: 2.268383502960205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14211 , TFLOPS: 97.78765126645025, Tokens per sec: 79904.80712790343, Loss: 2.2332663536071777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14212 , TFLOPS: 96.44156914973871, Tokens per sec: 78804.88877910208, Loss: 2.254711627960205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14213 , TFLOPS: 97.85297371959311, Tokens per sec: 79958.183785916, Loss: 2.2698774337768555 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14214 , TFLOPS: 98.25995596482413, Tokens per sec: 80290.73945514929, Loss: 2.255699872970581 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14215 , TFLOPS: 97.82608412389365, Tokens per sec: 79936.2116050702, Loss: 2.256960391998291 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14216 , TFLOPS: 97.80138816785, Tokens per sec: 79916.03190365648, Loss: 2.2356150150299072 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14217 , TFLOPS: 96.34681286672071, Tokens per sec: 78727.46098100436, Loss: 2.2760071754455566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14218 , TFLOPS: 97.14935791141258, Tokens per sec: 79383.24119636945, Loss: 2.249368190765381 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14219 , TFLOPS: 97.76617744044061, Tokens per sec: 79887.2603118855, Loss: 2.2656233310699463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14220 , TFLOPS: 98.29892532552313, Tokens per sec: 80322.58232293703, Loss: 2.257324695587158 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14221 , TFLOPS: 97.79547342712459, Tokens per sec: 79911.19881675075, Loss: 2.251582622528076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14222 , TFLOPS: 96.53874140058299, Tokens per sec: 78884.29072670323, Loss: 2.2539377212524414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14223 , TFLOPS: 97.57476183420137, Tokens per sec: 79730.84969255135, Loss: 2.2413034439086914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14224 , TFLOPS: 97.80271399935495, Tokens per sec: 79917.11527470911, Loss: 2.25584077835083 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14225 , TFLOPS: 96.5246533307564, Tokens per sec: 78872.77900218873, Loss: 2.2460572719573975 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14226 , TFLOPS: 97.84726377987481, Tokens per sec: 79953.51804718525, Loss: 2.258009195327759 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14227 , TFLOPS: 97.24998124729295, Tokens per sec: 79465.46311439239, Loss: 2.233571767807007 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14228 , TFLOPS: 95.4555837804099, Tokens per sec: 77999.2147523021, Loss: 2.2475509643554688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14229 , TFLOPS: 97.80782619367238, Tokens per sec: 79921.29258028547, Loss: 2.2570641040802 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14230 , TFLOPS: 97.7477394017096, Tokens per sec: 79872.19411580104, Loss: 2.2469449043273926 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14231 , TFLOPS: 94.86049874871492, Tokens per sec: 77512.95545405262, Loss: 2.260701894760132 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14232 , TFLOPS: 97.83911156518933, Tokens per sec: 79946.85666270906, Loss: 2.234604597091675 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14233 , TFLOPS: 96.77187518493082, Tokens per sec: 79074.79034329134, Loss: 2.255171298980713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14234 , TFLOPS: 96.64484477875286, Tokens per sec: 78970.99052834975, Loss: 2.2390964031219482 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14235 , TFLOPS: 95.90142850466341, Tokens per sec: 78363.52595355388, Loss: 2.2374560832977295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14236 , TFLOPS: 97.85677439257644, Tokens per sec: 79961.2894136487, Loss: 2.2633461952209473 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14237 , TFLOPS: 96.51207036455355, Tokens per sec: 78862.49713659003, Loss: 2.2654502391815186 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14238 , TFLOPS: 97.17863982289997, Tokens per sec: 79407.16820002925, Loss: 2.2445476055145264 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14239 , TFLOPS: 97.8021261980251, Tokens per sec: 79916.63496711112, Loss: 2.2574660778045654 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14240 , TFLOPS: 96.02244123248663, Tokens per sec: 78462.40856860299, Loss: 2.2475295066833496 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14241 , TFLOPS: 97.59034886734902, Tokens per sec: 79743.58625858222, Loss: 2.2717220783233643 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14242 , TFLOPS: 96.50008389813537, Tokens per sec: 78852.70268632063, Loss: 2.260152816772461 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14243 , TFLOPS: 97.8294740851881, Tokens per sec: 79938.98162970926, Loss: 2.2520060539245605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14244 , TFLOPS: 96.68951557129995, Tokens per sec: 79007.49218286849, Loss: 2.2553224563598633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14245 , TFLOPS: 97.84922611510338, Tokens per sec: 79955.12152181551, Loss: 2.2545158863067627 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14246 , TFLOPS: 97.74068509881214, Tokens per sec: 79866.42986330962, Loss: 2.251329183578491 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14247 , TFLOPS: 97.8400812436505, Tokens per sec: 79947.64901194144, Loss: 2.2818334102630615 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14248 , TFLOPS: 97.7959489502745, Tokens per sec: 79911.58737895761, Loss: 2.2583210468292236 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14249 , TFLOPS: 96.82265496292392, Tokens per sec: 79116.28380707729, Loss: 2.284168004989624 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14250 , TFLOPS: 96.52440612404114, Tokens per sec: 78872.57700321803, Loss: 2.2667653560638428 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14251 , TFLOPS: 98.39413140157843, Tokens per sec: 80400.37765850425, Loss: 2.2394089698791504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14252 , TFLOPS: 98.39996554839784, Tokens per sec: 80405.14489005468, Loss: 2.264493942260742 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14253 , TFLOPS: 97.83969344988024, Tokens per sec: 79947.33213566906, Loss: 2.2716867923736572 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14254 , TFLOPS: 97.83781716033747, Tokens per sec: 79945.79897118367, Loss: 2.252464771270752 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14255 , TFLOPS: 97.06984573939732, Tokens per sec: 79318.26975379037, Loss: 2.254155397415161 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14256 , TFLOPS: 97.27692817089756, Tokens per sec: 79487.48213934524, Loss: 2.2606396675109863 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14257 , TFLOPS: 97.79662508606422, Tokens per sec: 79912.13986691668, Loss: 2.2659833431243896 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14258 , TFLOPS: 97.71897485990286, Tokens per sec: 79848.68986822557, Loss: 2.261563539505005 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14259 , TFLOPS: 97.70578498598664, Tokens per sec: 79837.91208271107, Loss: 2.2707486152648926 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14260 , TFLOPS: 96.51369711988563, Tokens per sec: 78863.82640024828, Loss: 2.278876543045044 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14261 , TFLOPS: 97.77432754345222, Tokens per sec: 79893.9199708586, Loss: 2.268498659133911 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14262 , TFLOPS: 97.84605700751996, Tokens per sec: 79952.53196242901, Loss: 2.2477779388427734 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14263 , TFLOPS: 96.61274507422375, Tokens per sec: 78944.76103345912, Loss: 2.262766122817993 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14264 , TFLOPS: 97.03313420725208, Tokens per sec: 79288.27181583559, Loss: 2.2591781616210938 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14265 , TFLOPS: 97.17133858016643, Tokens per sec: 79401.20216664096, Loss: 2.2518272399902344 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14266 , TFLOPS: 96.61100068671264, Tokens per sec: 78943.33564951927, Loss: 2.2517733573913574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14267 , TFLOPS: 97.78456202709157, Tokens per sec: 79902.28283089936, Loss: 2.246506452560425 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14268 , TFLOPS: 97.76132593076035, Tokens per sec: 79883.29601843678, Loss: 2.270632028579712 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14269 , TFLOPS: 94.69574256175089, Tokens per sec: 77378.32893248275, Loss: 2.257190704345703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14270 , TFLOPS: 97.81056878495886, Tokens per sec: 79923.53362222623, Loss: 2.252570390701294 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14271 , TFLOPS: 96.78753492776835, Tokens per sec: 79087.58632228152, Loss: 2.2526161670684814 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14272 , TFLOPS: 96.75452999229881, Tokens per sec: 79060.61713988674, Loss: 2.2643373012542725 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14273 , TFLOPS: 94.01555701022917, Tokens per sec: 76822.53180880057, Loss: 2.24257755279541 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14274 , TFLOPS: 97.69694127991016, Tokens per sec: 79830.68566281872, Loss: 2.268589973449707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14275 , TFLOPS: 97.01470987639988, Tokens per sec: 79273.21682081178, Loss: 2.2386178970336914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14276 , TFLOPS: 96.48738851299716, Tokens per sec: 78842.3289603159, Loss: 2.2646453380584717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14277 , TFLOPS: 97.89979528199633, Tokens per sec: 79996.44288984993, Loss: 2.2581844329833984 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14278 , TFLOPS: 96.63408853403462, Tokens per sec: 78962.20132390012, Loss: 2.2363059520721436 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14279 , TFLOPS: 95.86871508823211, Tokens per sec: 78336.79497886961, Loss: 2.2622363567352295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14280 , TFLOPS: 97.32366896934562, Tokens per sec: 79525.67524897234, Loss: 2.2608840465545654 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14281 , TFLOPS: 97.00684657117627, Tokens per sec: 79266.79151169409, Loss: 2.2720983028411865 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14282 , TFLOPS: 97.12459613774637, Tokens per sec: 79363.00771368202, Loss: 2.2482175827026367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14283 , TFLOPS: 97.83219864254254, Tokens per sec: 79941.20793565991, Loss: 2.259436845779419 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14284 , TFLOPS: 97.74000309038536, Tokens per sec: 79865.87257666764, Loss: 2.2434067726135254 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14285 , TFLOPS: 97.84206084348344, Tokens per sec: 79949.26659392436, Loss: 2.2628331184387207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14286 , TFLOPS: 97.74172884994293, Tokens per sec: 79867.28273922713, Loss: 2.2529213428497314 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14287 , TFLOPS: 97.11153411613085, Tokens per sec: 79352.33439957364, Loss: 2.279772996902466 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14288 , TFLOPS: 96.45147710991574, Tokens per sec: 78812.98482841633, Loss: 2.245201349258423 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14289 , TFLOPS: 97.83895804818714, Tokens per sec: 79946.7312200147, Loss: 2.2504987716674805 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14290 , TFLOPS: 98.44494508888704, Tokens per sec: 80441.8988304648, Loss: 2.2506916522979736 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14291 , TFLOPS: 97.11377593449517, Tokens per sec: 79354.16624706864, Loss: 2.249598979949951 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14292 , TFLOPS: 97.84860133853444, Tokens per sec: 79954.61100079787, Loss: 2.272825002670288 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14293 , TFLOPS: 97.07508853009023, Tokens per sec: 79322.55377302699, Loss: 2.2442150115966797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14294 , TFLOPS: 97.17170826193738, Tokens per sec: 79401.50424313247, Loss: 2.2592597007751465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14295 , TFLOPS: 97.79423467140967, Tokens per sec: 79910.18659756769, Loss: 2.2495737075805664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14296 , TFLOPS: 98.38348624465661, Tokens per sec: 80391.67922675272, Loss: 2.240835189819336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14297 , TFLOPS: 97.7459072433326, Tokens per sec: 79870.69701202725, Loss: 2.2555103302001953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14298 , TFLOPS: 96.64375541367258, Tokens per sec: 78970.10038009992, Loss: 2.237551689147949 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14299 , TFLOPS: 97.7397547181581, Tokens per sec: 79865.66962532706, Loss: 2.2634854316711426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14300 , TFLOPS: 97.13216609956464, Tokens per sec: 79369.19332435189, Loss: 2.2538323402404785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14301 , TFLOPS: 96.57260867273182, Tokens per sec: 78911.96454659717, Loss: 2.251660108566284 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14302 , TFLOPS: 97.83266128340924, Tokens per sec: 79941.58597142153, Loss: 2.2671091556549072 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14303 , TFLOPS: 97.22751145294941, Tokens per sec: 79447.10246701032, Loss: 2.251373767852783 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14304 , TFLOPS: 96.59667492415201, Tokens per sec: 78931.62970015305, Loss: 2.252980947494507 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14305 , TFLOPS: 97.78607009506423, Tokens per sec: 79903.51511206095, Loss: 2.253105640411377 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14306 , TFLOPS: 97.10690010190791, Tokens per sec: 79348.54782726192, Loss: 2.2662229537963867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14307 , TFLOPS: 95.4833228484089, Tokens per sec: 78021.88105883123, Loss: 2.2525582313537598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14308 , TFLOPS: 97.83766247292746, Tokens per sec: 79945.67257211894, Loss: 2.2564051151275635 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14309 , TFLOPS: 96.67652820217951, Tokens per sec: 78996.87986923555, Loss: 2.2540695667266846 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14310 , TFLOPS: 96.70602816676886, Tokens per sec: 79020.98504969817, Loss: 2.236234188079834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14311 , TFLOPS: 96.11047593968664, Tokens per sec: 78534.34399407108, Loss: 2.273894786834717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14312 , TFLOPS: 97.74207471129557, Tokens per sec: 79867.56535144165, Loss: 2.2554938793182373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14313 , TFLOPS: 97.04410834244166, Tokens per sec: 79297.23906419837, Loss: 2.264068365097046 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14314 , TFLOPS: 96.54836879415582, Tokens per sec: 78892.15751782274, Loss: 2.2613296508789062 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14315 , TFLOPS: 97.82481424972184, Tokens per sec: 79935.17395819494, Loss: 2.2654080390930176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14316 , TFLOPS: 96.76761559620722, Tokens per sec: 79071.30972369372, Loss: 2.2441389560699463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14317 , TFLOPS: 97.74829041642741, Tokens per sec: 79872.64436411134, Loss: 2.255427837371826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14318 , TFLOPS: 96.55220266680305, Tokens per sec: 78895.29027385545, Loss: 2.2603249549865723 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14319 , TFLOPS: 97.73985510805907, Tokens per sec: 79865.75165649943, Loss: 2.2672312259674072 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14320 , TFLOPS: 96.23571696124067, Tokens per sec: 78636.68165677358, Loss: 2.255645275115967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14321 , TFLOPS: 97.8905421244745, Tokens per sec: 79988.88189664137, Loss: 2.251188278198242 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14322 , TFLOPS: 98.4013620784834, Tokens per sec: 80406.28603073748, Loss: 2.2603609561920166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14323 , TFLOPS: 97.23394058780417, Tokens per sec: 79452.35587860079, Loss: 2.2562317848205566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14324 , TFLOPS: 98.41876974896881, Tokens per sec: 80420.51028640442, Loss: 2.239945888519287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14325 , TFLOPS: 97.10063121824957, Tokens per sec: 79343.42536104927, Loss: 2.2810330390930176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14326 , TFLOPS: 96.42318725166894, Tokens per sec: 78789.86845699712, Loss: 2.245605945587158 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14327 , TFLOPS: 97.65479978083779, Tokens per sec: 79796.250759108, Loss: 2.275254249572754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14328 , TFLOPS: 98.36509537812252, Tokens per sec: 80376.65157628468, Loss: 2.2803616523742676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14329 , TFLOPS: 97.21059822226233, Tokens per sec: 79433.28223083062, Loss: 2.243701934814453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14330 , TFLOPS: 97.81566288057111, Tokens per sec: 79927.69613888468, Loss: 2.2453441619873047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14331 , TFLOPS: 96.48515083867512, Tokens per sec: 78840.5004990245, Loss: 2.2602765560150146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14332 , TFLOPS: 96.68370285349697, Tokens per sec: 79002.74246151901, Loss: 2.247711658477783 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14333 , TFLOPS: 97.75692043016298, Tokens per sec: 79879.69617049099, Loss: 2.2259984016418457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14334 , TFLOPS: 98.42875419215889, Tokens per sec: 80428.66883195746, Loss: 2.2531466484069824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14335 , TFLOPS: 97.75850747407577, Tokens per sec: 79880.99298492628, Loss: 2.2640490531921387 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14336 , TFLOPS: 96.51007788275408, Tokens per sec: 78860.86902842062, Loss: 2.273608446121216 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14337 , TFLOPS: 97.37920506915134, Tokens per sec: 79571.05522575, Loss: 2.250265598297119 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14338 , TFLOPS: 97.89374387327763, Tokens per sec: 79991.49812800874, Loss: 2.255401134490967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14339 , TFLOPS: 96.6172693477982, Tokens per sec: 78948.45793386197, Loss: 2.2606067657470703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14340 , TFLOPS: 97.83779842281224, Tokens per sec: 79945.78366026947, Loss: 2.241891384124756 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14341 , TFLOPS: 96.34780901059236, Tokens per sec: 78728.2749558049, Loss: 2.242532968521118 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14342 , TFLOPS: 96.51880001284243, Tokens per sec: 78867.99610544342, Loss: 2.2770161628723145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14343 , TFLOPS: 97.76598530941224, Tokens per sec: 79887.10331667632, Loss: 2.2499804496765137 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14344 , TFLOPS: 97.87525166535447, Tokens per sec: 79976.38766888295, Loss: 2.250849962234497 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14345 , TFLOPS: 95.43959140331869, Tokens per sec: 77986.14696930046, Loss: 2.230046510696411 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14346 , TFLOPS: 97.2304461767178, Tokens per sec: 79449.50050534798, Loss: 2.2495627403259277 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14347 , TFLOPS: 96.80105401679081, Tokens per sec: 79098.63311793409, Loss: 2.2432117462158203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14348 , TFLOPS: 96.71791567604814, Tokens per sec: 79030.69863954197, Loss: 2.2441606521606445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14349 , TFLOPS: 96.48405056442147, Tokens per sec: 78839.60143660828, Loss: 2.2432920932769775 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14350 , TFLOPS: 97.16778579488987, Tokens per sec: 79398.29909433465, Loss: 2.2726752758026123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14351 , TFLOPS: 97.23166744626958, Tokens per sec: 79450.49843613702, Loss: 2.2502896785736084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14352 , TFLOPS: 97.24799737908441, Tokens per sec: 79463.84204460988, Loss: 2.2450361251831055 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14353 , TFLOPS: 97.10181543950331, Tokens per sec: 79344.39301872028, Loss: 2.286618232727051 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14354 , TFLOPS: 96.68815522547965, Tokens per sec: 79006.3806092803, Loss: 2.2567288875579834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14355 , TFLOPS: 97.79706795383805, Tokens per sec: 79912.50174557511, Loss: 2.2678167819976807 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14356 , TFLOPS: 96.53376738510573, Tokens per sec: 78880.2263306142, Loss: 2.276350259780884 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14357 , TFLOPS: 97.80126936280283, Tokens per sec: 79915.93482499411, Loss: 2.2467710971832275 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14358 , TFLOPS: 96.66732946802952, Tokens per sec: 78989.36334676511, Loss: 2.2552847862243652 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14359 , TFLOPS: 97.92255127999536, Tokens per sec: 80015.03740160643, Loss: 2.248483896255493 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14360 , TFLOPS: 97.73295612663121, Tokens per sec: 79860.11432117914, Loss: 2.2792859077453613 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14361 , TFLOPS: 97.89914030021008, Tokens per sec: 79995.90768737163, Loss: 2.237617015838623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14362 , TFLOPS: 97.70936534873483, Tokens per sec: 79840.83768927937, Loss: 2.2596750259399414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14363 , TFLOPS: 97.78895841768741, Tokens per sec: 79905.87523482837, Loss: 2.24271297454834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14364 , TFLOPS: 95.94419098715747, Tokens per sec: 78398.46827880401, Loss: 2.2534637451171875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14365 , TFLOPS: 98.43128324495352, Tokens per sec: 80430.73538609955, Loss: 2.258394956588745 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14366 , TFLOPS: 97.8384138624621, Tokens per sec: 79946.2865518502, Loss: 2.2561731338500977 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14367 , TFLOPS: 97.31206741691831, Tokens per sec: 79516.19532183347, Loss: 2.2683534622192383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14368 , TFLOPS: 97.76033225375545, Tokens per sec: 79882.48405937655, Loss: 2.269038200378418 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14369 , TFLOPS: 97.0681817207554, Tokens per sec: 79316.91004132241, Loss: 2.2554519176483154 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14370 , TFLOPS: 97.23416545124512, Tokens per sec: 79452.5396203069, Loss: 2.256784439086914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14371 , TFLOPS: 97.82400270617777, Tokens per sec: 79934.51082507409, Loss: 2.275883436203003 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14372 , TFLOPS: 98.42198039646489, Tokens per sec: 80423.13378912288, Loss: 2.248861074447632 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14373 , TFLOPS: 97.79151858916796, Tokens per sec: 79907.96721684981, Loss: 2.2722485065460205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14374 , TFLOPS: 96.60387100857145, Tokens per sec: 78937.50980597585, Loss: 2.24572491645813 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14375 , TFLOPS: 97.76215054193563, Tokens per sec: 79883.96982945548, Loss: 2.2835001945495605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14376 , TFLOPS: 97.23731698676443, Tokens per sec: 79455.11482110746, Loss: 2.258246660232544 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14377 , TFLOPS: 96.58770592391521, Tokens per sec: 78924.30089917695, Loss: 2.2672181129455566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14378 , TFLOPS: 97.82891455508664, Tokens per sec: 79938.52442325978, Loss: 2.272784948348999 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14379 , TFLOPS: 97.25729007762672, Tokens per sec: 79471.43534780414, Loss: 2.244311571121216 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14380 , TFLOPS: 96.53427754306476, Tokens per sec: 78880.64319381517, Loss: 2.233464241027832 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14381 , TFLOPS: 97.85118769406945, Tokens per sec: 79956.72437848424, Loss: 2.244229316711426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14382 , TFLOPS: 97.72280474201429, Tokens per sec: 79851.81936348877, Loss: 2.2634592056274414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14383 , TFLOPS: 94.81280932277603, Tokens per sec: 77473.98719648292, Loss: 2.261124610900879 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14384 , TFLOPS: 97.85594656796613, Tokens per sec: 79960.61297684953, Loss: 2.261474609375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14385 , TFLOPS: 96.82263499486875, Tokens per sec: 79116.26749066537, Loss: 2.2718288898468018 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14386 , TFLOPS: 96.63331765059887, Tokens per sec: 78961.57141520045, Loss: 2.235370635986328 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14387 , TFLOPS: 95.97137543177782, Tokens per sec: 78420.68138829316, Loss: 2.2456374168395996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14388 , TFLOPS: 97.76507141849679, Tokens per sec: 79886.35655288366, Loss: 2.2534337043762207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14389 , TFLOPS: 97.21231437768402, Tokens per sec: 79434.68454560346, Loss: 2.2225584983825684 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14390 , TFLOPS: 97.13321555723591, Tokens per sec: 79370.05086323053, Loss: 2.2563459873199463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14391 , TFLOPS: 97.20818207268368, Tokens per sec: 79431.30793281268, Loss: 2.261293649673462 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14392 , TFLOPS: 96.78409717685848, Tokens per sec: 79084.7772475175, Loss: 2.2538259029388428 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14393 , TFLOPS: 97.79439497548192, Tokens per sec: 79910.31758615172, Loss: 2.2556371688842773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14394 , TFLOPS: 96.5441601070965, Tokens per sec: 78888.71849128533, Loss: 2.248119831085205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14395 , TFLOPS: 97.84276570529899, Tokens per sec: 79949.84255465861, Loss: 2.2460100650787354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14396 , TFLOPS: 97.20170526107358, Tokens per sec: 79426.0155633181, Loss: 2.2593326568603516 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14397 , TFLOPS: 97.21429926226268, Tokens per sec: 79436.30644588816, Loss: 2.2467093467712402 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14398 , TFLOPS: 98.45337198430597, Tokens per sec: 80448.78466363919, Loss: 2.2796902656555176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14399 , TFLOPS: 97.82991108115282, Tokens per sec: 79939.3387103613, Loss: 2.2760934829711914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14400 , TFLOPS: 97.76853741399675, Tokens per sec: 79889.18870702935, Loss: 2.271782875061035 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014400 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014400/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014400/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014400/optimizer.pt +[rank0]:[2024-08-29 22:44:56,985] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.0077993279992369935, 'preprocessing_with_comm': 0.0016155180055648088, 'state_converting': 2.627334229997359, : 2.6385821040021256}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014400/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014400/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014400/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014400/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014400/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014400, took 14.79s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0012400 + eval ppl=7.092208385467529, eval loss=1.9589967727661133 +------------------------------------------------------------------ +iteration: 14401 , TFLOPS: 96.56104489052078, Tokens per sec: 78902.5154824743, Loss: 2.229123115539551 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14402 , TFLOPS: 96.78038988143658, Tokens per sec: 79081.74792099398, Loss: 2.2691898345947266 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14403 , TFLOPS: 97.88546685359916, Tokens per sec: 79984.73476216003, Loss: 2.259824752807617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14404 , TFLOPS: 96.18738905691464, Tokens per sec: 78597.19168207786, Loss: 2.2370643615722656 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14405 , TFLOPS: 97.26734470183231, Tokens per sec: 79479.65124007159, Loss: 2.2456178665161133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14406 , TFLOPS: 96.29694700894966, Tokens per sec: 78686.71430495825, Loss: 2.2604868412017822 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14407 , TFLOPS: 97.92333510659287, Tokens per sec: 80015.67788649673, Loss: 2.2470602989196777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14408 , TFLOPS: 98.38698760705469, Tokens per sec: 80394.54028010127, Loss: 2.234720230102539 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14409 , TFLOPS: 97.81687877184785, Tokens per sec: 79928.68967494674, Loss: 2.264860153198242 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14410 , TFLOPS: 98.39474780635244, Tokens per sec: 80400.88133871276, Loss: 2.2573108673095703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14411 , TFLOPS: 98.4037798657057, Tokens per sec: 80408.2616669162, Loss: 2.2502427101135254 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14412 , TFLOPS: 97.38638801774546, Tokens per sec: 79576.92459795179, Loss: 2.2563610076904297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14413 , TFLOPS: 97.55529973421886, Tokens per sec: 79714.94670965661, Loss: 2.25931978225708 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14414 , TFLOPS: 97.07895722928028, Tokens per sec: 79325.71498672434, Loss: 2.256479263305664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14415 , TFLOPS: 98.34725224090627, Tokens per sec: 80362.07148954207, Loss: 2.2388641834259033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14416 , TFLOPS: 97.72925477996701, Tokens per sec: 79857.08985553868, Loss: 2.2537665367126465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14417 , TFLOPS: 97.77286457420678, Tokens per sec: 79892.72454102797, Loss: 2.2473807334899902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14418 , TFLOPS: 98.3259602905216, Tokens per sec: 80344.67328877919, Loss: 2.2321908473968506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14419 , TFLOPS: 97.68218049623687, Tokens per sec: 79818.6242465029, Loss: 2.2530324459075928 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14420 , TFLOPS: 96.40147850126895, Tokens per sec: 78772.12967821235, Loss: 2.2725589275360107 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14421 , TFLOPS: 98.29328086853297, Tokens per sec: 80317.97009182913, Loss: 2.2618651390075684 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14422 , TFLOPS: 98.41321727993643, Tokens per sec: 80415.97322102479, Loss: 2.2668063640594482 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14423 , TFLOPS: 96.59155844194197, Tokens per sec: 78927.44889082926, Loss: 2.2610535621643066 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14424 , TFLOPS: 97.10043396052532, Tokens per sec: 79343.26417668491, Loss: 2.2500855922698975 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14425 , TFLOPS: 98.32320454375635, Tokens per sec: 80342.42149715808, Loss: 2.2462880611419678 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14426 , TFLOPS: 97.67465796413326, Tokens per sec: 79812.47739187386, Loss: 2.245051145553589 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14427 , TFLOPS: 96.10008801955554, Tokens per sec: 78525.8557571231, Loss: 2.2571449279785156 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14428 , TFLOPS: 98.4057040223795, Tokens per sec: 80409.83394486655, Loss: 2.2309064865112305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14429 , TFLOPS: 97.83919751708736, Tokens per sec: 79946.92689621756, Loss: 2.2572805881500244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14430 , TFLOPS: 95.91593575236051, Tokens per sec: 78375.38019909697, Loss: 2.2589941024780273 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14431 , TFLOPS: 97.78146867153248, Tokens per sec: 79899.75517044197, Loss: 2.2380452156066895 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14432 , TFLOPS: 97.80960482131154, Tokens per sec: 79922.74594271538, Loss: 2.24314284324646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14433 , TFLOPS: 95.80951423681961, Tokens per sec: 78288.42043921497, Loss: 2.263568162918091 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14434 , TFLOPS: 98.44438698285725, Tokens per sec: 80441.44278766093, Loss: 2.2499191761016846 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14435 , TFLOPS: 96.06205015330393, Tokens per sec: 78494.77403742712, Loss: 2.2567102909088135 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14436 , TFLOPS: 97.18783194115449, Tokens per sec: 79414.6793164812, Loss: 2.252835512161255 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14437 , TFLOPS: 96.68706678775135, Tokens per sec: 79005.49121879363, Loss: 2.287567615509033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14438 , TFLOPS: 96.31063181870506, Tokens per sec: 78697.89651528774, Loss: 2.276362180709839 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14439 , TFLOPS: 97.07330533429588, Tokens per sec: 79321.09667783989, Loss: 2.251758098602295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14440 , TFLOPS: 96.76569448499637, Tokens per sec: 79069.73993426953, Loss: 2.2474899291992188 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14441 , TFLOPS: 97.87061534558403, Tokens per sec: 79972.59921264903, Loss: 2.2670741081237793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14442 , TFLOPS: 96.28122673275948, Tokens per sec: 78673.86886260736, Loss: 2.247166156768799 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14443 , TFLOPS: 97.33201742128686, Tokens per sec: 79532.49698396181, Loss: 2.2516233921051025 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14444 , TFLOPS: 97.38191435514496, Tokens per sec: 79573.26905307859, Loss: 2.2668635845184326 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14445 , TFLOPS: 97.82398300838612, Tokens per sec: 79934.49472950147, Loss: 2.2503662109375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14446 , TFLOPS: 98.42879543391584, Tokens per sec: 80428.70253165871, Loss: 2.2673375606536865 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14447 , TFLOPS: 97.13913306977163, Tokens per sec: 79374.88621505379, Loss: 2.249760866165161 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14448 , TFLOPS: 98.33034906876865, Tokens per sec: 80348.25947246207, Loss: 2.2684295177459717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14449 , TFLOPS: 98.47988731984582, Tokens per sec: 80470.45102687425, Loss: 2.244795799255371 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14450 , TFLOPS: 98.47205544942898, Tokens per sec: 80464.05140394637, Loss: 2.265960693359375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14451 , TFLOPS: 97.87065909781887, Tokens per sec: 79972.6349637264, Loss: 2.2782459259033203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14452 , TFLOPS: 97.13131166736092, Tokens per sec: 79368.49514580317, Loss: 2.246060609817505 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14453 , TFLOPS: 97.64965834063229, Tokens per sec: 79792.04955596365, Loss: 2.2627155780792236 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14454 , TFLOPS: 98.36615797363945, Tokens per sec: 80377.5198504352, Loss: 2.249711513519287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14455 , TFLOPS: 97.75259781152583, Tokens per sec: 79876.16404752848, Loss: 2.254333257675171 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14456 , TFLOPS: 98.34584551195006, Tokens per sec: 80360.92201509952, Loss: 2.2638683319091797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14457 , TFLOPS: 97.80949810350756, Tokens per sec: 79922.65874085059, Loss: 2.2545695304870605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14458 , TFLOPS: 96.48595497165388, Tokens per sec: 78841.15757678168, Loss: 2.2515463829040527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14459 , TFLOPS: 98.39409151536593, Tokens per sec: 80400.34506645324, Loss: 2.2537035942077637 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14460 , TFLOPS: 98.45398502043169, Tokens per sec: 80449.28559123847, Loss: 2.2463910579681396 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14461 , TFLOPS: 97.33093927847908, Tokens per sec: 79531.6160057197, Loss: 2.265045642852783 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14462 , TFLOPS: 96.55826489324562, Tokens per sec: 78900.24387513727, Loss: 2.272002696990967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14463 , TFLOPS: 98.48690128197244, Tokens per sec: 80476.18231588336, Loss: 2.26216721534729 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14464 , TFLOPS: 98.41402513537469, Tokens per sec: 80416.63334049944, Loss: 2.25344181060791 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14465 , TFLOPS: 95.49720317444486, Tokens per sec: 78033.2230305466, Loss: 2.2533507347106934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14466 , TFLOPS: 98.47003847474731, Tokens per sec: 80462.40328201243, Loss: 2.234520196914673 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14467 , TFLOPS: 98.29956711988252, Tokens per sec: 80323.10674962935, Loss: 2.2443230152130127 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14468 , TFLOPS: 95.09437247734638, Tokens per sec: 77704.05969816276, Loss: 2.25748872756958 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14469 , TFLOPS: 98.41952344267182, Tokens per sec: 80421.12614893094, Loss: 2.2628560066223145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14470 , TFLOPS: 97.0756378915685, Tokens per sec: 79323.0026704327, Loss: 2.267989158630371 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14471 , TFLOPS: 96.4424213842743, Tokens per sec: 78805.58516187959, Loss: 2.2468783855438232 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14472 , TFLOPS: 97.7333577270094, Tokens per sec: 79860.44247918627, Loss: 2.2464990615844727 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14473 , TFLOPS: 96.65541054779938, Tokens per sec: 78979.62409022477, Loss: 2.2402663230895996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14474 , TFLOPS: 97.05291646437212, Tokens per sec: 79304.43640736898, Loss: 2.2470037937164307 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14475 , TFLOPS: 96.595678416084, Tokens per sec: 78930.81542775827, Loss: 2.26751446723938 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14476 , TFLOPS: 96.61483480294211, Tokens per sec: 78946.4686045893, Loss: 2.2575063705444336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14477 , TFLOPS: 97.1085609670153, Tokens per sec: 79349.90496289564, Loss: 2.250622510910034 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14478 , TFLOPS: 96.10771362196776, Tokens per sec: 78532.0868331546, Loss: 2.2632994651794434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14479 , TFLOPS: 97.14380453791068, Tokens per sec: 79378.70339192449, Loss: 2.26420259475708 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14480 , TFLOPS: 95.76889844038215, Tokens per sec: 78255.2322264023, Loss: 2.258486747741699 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14481 , TFLOPS: 97.1950323289667, Tokens per sec: 79420.56293871703, Loss: 2.245954990386963 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14482 , TFLOPS: 97.19683255005339, Tokens per sec: 79422.03394570877, Loss: 2.265528440475464 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14483 , TFLOPS: 97.87004887205842, Tokens per sec: 79972.13633254894, Loss: 2.2524988651275635 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14484 , TFLOPS: 98.4426483022105, Tokens per sec: 80440.02206694684, Loss: 2.252950429916382 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14485 , TFLOPS: 97.91770938420093, Tokens per sec: 80011.0809639113, Loss: 2.2427501678466797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14486 , TFLOPS: 98.45248070591809, Tokens per sec: 80448.0563771249, Loss: 2.287827968597412 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14487 , TFLOPS: 98.47514521888114, Tokens per sec: 80466.57613410348, Loss: 2.259209394454956 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14488 , TFLOPS: 98.41595948308245, Tokens per sec: 80418.21394580598, Loss: 2.2521402835845947 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14489 , TFLOPS: 97.14483746676312, Tokens per sec: 79379.54742467977, Loss: 2.240509510040283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14490 , TFLOPS: 97.18497773666945, Tokens per sec: 79412.34707252271, Loss: 2.2524733543395996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14491 , TFLOPS: 98.37593914917157, Tokens per sec: 80385.51230075237, Loss: 2.223289728164673 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14492 , TFLOPS: 98.34185623918856, Tokens per sec: 80357.66227763309, Loss: 2.2654638290405273 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14493 , TFLOPS: 97.79496034118804, Tokens per sec: 79910.77956102401, Loss: 2.2701663970947266 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14494 , TFLOPS: 98.39743341323549, Tokens per sec: 80403.07581723057, Loss: 2.2645506858825684 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14495 , TFLOPS: 97.04360002511669, Tokens per sec: 79296.82370502688, Loss: 2.2752535343170166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14496 , TFLOPS: 97.07537176133933, Tokens per sec: 79322.78520857164, Loss: 2.2584996223449707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14497 , TFLOPS: 98.43935008112157, Tokens per sec: 80437.32700559132, Loss: 2.25459623336792 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14498 , TFLOPS: 98.42631154381394, Tokens per sec: 80426.67288111543, Loss: 2.266246795654297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14499 , TFLOPS: 97.144600561923, Tokens per sec: 79379.35384363643, Loss: 2.254066228866577 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14500 , TFLOPS: 97.27378360824483, Tokens per sec: 79484.91263625344, Loss: 2.2501466274261475 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14501 , TFLOPS: 98.38702177221334, Tokens per sec: 80394.5681973319, Loss: 2.235835075378418 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14502 , TFLOPS: 98.34918107909114, Tokens per sec: 80363.64759287564, Loss: 2.2638659477233887 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14503 , TFLOPS: 95.4661762538655, Tokens per sec: 78007.8701350372, Loss: 2.2433414459228516 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14504 , TFLOPS: 98.39732495044484, Tokens per sec: 80402.98718949224, Loss: 2.2730600833892822 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14505 , TFLOPS: 98.4399099166635, Tokens per sec: 80437.78446162408, Loss: 2.281510353088379 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14506 , TFLOPS: 95.35203018788147, Tokens per sec: 77914.59844614052, Loss: 2.2499563694000244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14507 , TFLOPS: 98.37563067779969, Tokens per sec: 80385.26024085365, Loss: 2.2753329277038574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14508 , TFLOPS: 97.68941667033296, Tokens per sec: 79824.53711063253, Loss: 2.256741762161255 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14509 , TFLOPS: 93.84896142089508, Tokens per sec: 76686.40226420369, Loss: 2.2429890632629395 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14510 , TFLOPS: 98.39652880800823, Tokens per sec: 80402.33664101276, Loss: 2.248007297515869 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14511 , TFLOPS: 96.6016257139214, Tokens per sec: 78935.67511791826, Loss: 2.2493643760681152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14512 , TFLOPS: 96.94565883386674, Tokens per sec: 79216.79343642599, Loss: 2.266268491744995 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14513 , TFLOPS: 96.5504348205119, Tokens per sec: 78893.84572114254, Loss: 2.2468175888061523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14514 , TFLOPS: 96.54810461818289, Tokens per sec: 78891.94165283476, Loss: 2.2482941150665283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14515 , TFLOPS: 97.06672842258513, Tokens per sec: 79315.72251397624, Loss: 2.255526542663574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14516 , TFLOPS: 94.98752064446111, Tokens per sec: 77616.74831490133, Loss: 2.267472982406616 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14517 , TFLOPS: 97.902995475254, Tokens per sec: 79999.05785014106, Loss: 2.2494735717773438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14518 , TFLOPS: 95.73631882530846, Tokens per sec: 78228.61058425168, Loss: 2.248288869857788 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14519 , TFLOPS: 96.64481755428996, Tokens per sec: 78970.9682825403, Loss: 2.2545342445373535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14520 , TFLOPS: 97.29987893160911, Tokens per sec: 79506.2357967278, Loss: 2.2676875591278076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14521 , TFLOPS: 97.29338967126814, Tokens per sec: 79500.93325505477, Loss: 2.2368199825286865 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14522 , TFLOPS: 98.42636591877444, Tokens per sec: 80426.7173122954, Loss: 2.252495050430298 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14523 , TFLOPS: 97.82603401598891, Tokens per sec: 79936.17066061134, Loss: 2.2687532901763916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14524 , TFLOPS: 98.44517018013785, Tokens per sec: 80442.08275832019, Loss: 2.233128309249878 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14525 , TFLOPS: 98.37658683784966, Tokens per sec: 80386.04154384421, Loss: 2.268282413482666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14526 , TFLOPS: 98.487859213633, Tokens per sec: 80476.96506650257, Loss: 2.2643754482269287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14527 , TFLOPS: 97.7872909738795, Tokens per sec: 79904.51272356912, Loss: 2.2386460304260254 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14528 , TFLOPS: 97.04380223111887, Tokens per sec: 79296.9889327565, Loss: 2.262474298477173 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14529 , TFLOPS: 98.37419371807071, Tokens per sec: 80384.08606406846, Loss: 2.2609758377075195 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14530 , TFLOPS: 97.66265982546578, Tokens per sec: 79802.6734039091, Loss: 2.2659504413604736 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14531 , TFLOPS: 97.25111486366262, Tokens per sec: 79466.38942150993, Loss: 2.2602174282073975 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14532 , TFLOPS: 98.41334255283226, Tokens per sec: 80416.07558473278, Loss: 2.2540464401245117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14533 , TFLOPS: 98.38086705657203, Tokens per sec: 80389.5390207449, Loss: 2.2532644271850586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14534 , TFLOPS: 97.07839088494421, Tokens per sec: 79325.25221218835, Loss: 2.2751007080078125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14535 , TFLOPS: 97.8555581474952, Tokens per sec: 79960.2955884839, Loss: 2.2693865299224854 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14536 , TFLOPS: 97.68905359461617, Tokens per sec: 79824.24043211793, Loss: 2.278367757797241 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14537 , TFLOPS: 97.81900672707864, Tokens per sec: 79930.42848194425, Loss: 2.2596874237060547 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14538 , TFLOPS: 96.643548367868, Tokens per sec: 78969.93119764315, Loss: 2.249556064605713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14539 , TFLOPS: 98.36959453934494, Tokens per sec: 80380.32795673824, Loss: 2.2435691356658936 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14540 , TFLOPS: 97.901481398898, Tokens per sec: 79997.82065937477, Loss: 2.259572744369507 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14541 , TFLOPS: 96.07704874828949, Tokens per sec: 78507.029775488, Loss: 2.2641799449920654 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14542 , TFLOPS: 97.87891395906775, Tokens per sec: 79979.3802233519, Loss: 2.232185125350952 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14543 , TFLOPS: 98.39262380246548, Tokens per sec: 80399.14576046, Loss: 2.268061637878418 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14544 , TFLOPS: 95.92462018789861, Tokens per sec: 78382.47647493241, Loss: 2.2648277282714844 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14545 , TFLOPS: 97.70256263102932, Tokens per sec: 79835.27901352466, Loss: 2.2553658485412598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14546 , TFLOPS: 97.87173624932672, Tokens per sec: 79973.51513195182, Loss: 2.251645803451538 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14547 , TFLOPS: 96.44661034311588, Tokens per sec: 78809.00806798243, Loss: 2.269707202911377 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14548 , TFLOPS: 97.20515325848942, Tokens per sec: 79428.83301076594, Loss: 2.233006000518799 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14549 , TFLOPS: 96.80936057038099, Tokens per sec: 79105.42061670234, Loss: 2.2653310298919678 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14550 , TFLOPS: 97.73212731415954, Tokens per sec: 79859.43707717302, Loss: 2.2500789165496826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14551 , TFLOPS: 95.9108286862186, Tokens per sec: 78371.20708388483, Loss: 2.260255813598633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14552 , TFLOPS: 97.85068914609772, Tokens per sec: 79956.31700210278, Loss: 2.271822690963745 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14553 , TFLOPS: 96.56099577999078, Tokens per sec: 78902.47535299601, Loss: 2.2650139331817627 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14554 , TFLOPS: 97.08867366128753, Tokens per sec: 79333.65453344068, Loss: 2.2860467433929443 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14555 , TFLOPS: 97.1333775444971, Tokens per sec: 79370.18322719155, Loss: 2.257601261138916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14556 , TFLOPS: 96.6546937023604, Tokens per sec: 78979.03833736334, Loss: 2.2440907955169678 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14557 , TFLOPS: 97.16548152950843, Tokens per sec: 79396.4162197744, Loss: 2.2613775730133057 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14558 , TFLOPS: 97.35879416805281, Tokens per sec: 79554.37695303904, Loss: 2.26373291015625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14559 , TFLOPS: 96.5974058271267, Tokens per sec: 78932.22693978879, Loss: 2.2664246559143066 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14560 , TFLOPS: 98.44501502701306, Tokens per sec: 80441.95597870793, Loss: 2.252657413482666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14561 , TFLOPS: 97.38212529715082, Tokens per sec: 79573.44141922172, Loss: 2.265110969543457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14562 , TFLOPS: 97.79251979149775, Tokens per sec: 79908.78532504673, Loss: 2.235436201095581 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14563 , TFLOPS: 98.41020697169627, Tokens per sec: 80413.51342066952, Loss: 2.2554092407226562 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14564 , TFLOPS: 98.46615806657294, Tokens per sec: 80459.23250060256, Loss: 2.274446964263916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14565 , TFLOPS: 97.2259643455887, Tokens per sec: 79445.8382857598, Loss: 2.239095687866211 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14566 , TFLOPS: 97.18748903220721, Tokens per sec: 79414.39911675309, Loss: 2.2625303268432617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14567 , TFLOPS: 98.45919342762178, Tokens per sec: 80453.54151482982, Loss: 2.2671844959259033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14568 , TFLOPS: 98.36027185175992, Tokens per sec: 80372.71014872518, Loss: 2.2529964447021484 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14569 , TFLOPS: 97.20283639283754, Tokens per sec: 79426.93984020036, Loss: 2.23653244972229 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14570 , TFLOPS: 97.83138980193576, Tokens per sec: 79940.54701117871, Loss: 2.246431350708008 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14571 , TFLOPS: 98.41863966627925, Tokens per sec: 80420.40399249014, Loss: 2.264248847961426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14572 , TFLOPS: 96.40329372515507, Tokens per sec: 78773.61294437772, Loss: 2.2328836917877197 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14573 , TFLOPS: 97.3013686253192, Tokens per sec: 79507.45306380643, Loss: 2.2468936443328857 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14574 , TFLOPS: 98.45479608695105, Tokens per sec: 80449.94833457014, Loss: 2.2492783069610596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14575 , TFLOPS: 97.75028094297188, Tokens per sec: 79874.27087459156, Loss: 2.2770869731903076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14576 , TFLOPS: 96.05543720508722, Tokens per sec: 78489.37042720681, Loss: 2.249372720718384 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14577 , TFLOPS: 98.40663185354585, Tokens per sec: 80410.59209959708, Loss: 2.247451066970825 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14578 , TFLOPS: 96.48002137409507, Tokens per sec: 78836.30908147199, Loss: 2.2566726207733154 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14579 , TFLOPS: 96.7489548622491, Tokens per sec: 79056.06155760652, Loss: 2.271850824356079 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14580 , TFLOPS: 98.00207294012185, Tokens per sec: 80080.01659716542, Loss: 2.2458295822143555 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14581 , TFLOPS: 97.97432963705816, Tokens per sec: 80057.34683005592, Loss: 2.257181167602539 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14582 , TFLOPS: 94.96655047312652, Tokens per sec: 77599.6130480836, Loss: 2.2728264331817627 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14583 , TFLOPS: 97.70176023085718, Tokens per sec: 79834.62335168838, Loss: 2.260366201400757 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14584 , TFLOPS: 97.26267767096762, Tokens per sec: 79475.83768901198, Loss: 2.247561454772949 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14585 , TFLOPS: 96.58105486364467, Tokens per sec: 78918.86614661636, Loss: 2.253446102142334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14586 , TFLOPS: 96.74731900086886, Tokens per sec: 79054.7248531619, Loss: 2.2613115310668945 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14587 , TFLOPS: 96.87384067517738, Tokens per sec: 79158.10897019703, Loss: 2.2549328804016113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14588 , TFLOPS: 97.24445177246587, Tokens per sec: 79460.94483816983, Loss: 2.253516912460327 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14589 , TFLOPS: 95.6307091186529, Tokens per sec: 78142.31417430773, Loss: 2.2739388942718506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14590 , TFLOPS: 97.84574970367966, Tokens per sec: 79952.2808565504, Loss: 2.25199818611145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14591 , TFLOPS: 97.08125482744992, Tokens per sec: 79327.5924133342, Loss: 2.2472026348114014 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14592 , TFLOPS: 96.53036169876994, Tokens per sec: 78877.44345663906, Loss: 2.2601394653320312 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14593 , TFLOPS: 97.81390295762836, Tokens per sec: 79926.25806054373, Loss: 2.236220359802246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14594 , TFLOPS: 96.76447073392043, Tokens per sec: 79068.73997576322, Loss: 2.2394628524780273 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14595 , TFLOPS: 97.23795217779293, Tokens per sec: 79455.63385204812, Loss: 2.23850417137146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14596 , TFLOPS: 97.31332775896293, Tokens per sec: 79517.22517976222, Loss: 2.2513678073883057 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14597 , TFLOPS: 96.07078213097577, Tokens per sec: 78501.90916116389, Loss: 2.250296115875244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14598 , TFLOPS: 98.40614173644103, Tokens per sec: 80410.191612294, Loss: 2.2536652088165283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14599 , TFLOPS: 97.11561845864273, Tokens per sec: 79355.67182098042, Loss: 2.2615742683410645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14600 , TFLOPS: 98.4171759806933, Tokens per sec: 80419.20797731925, Loss: 2.2614760398864746 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014600 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014600/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014600/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014600/optimizer.pt +[rank0]:[2024-08-29 23:40:15,413] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007740252010989934, 'preprocessing_with_comm': 0.0015639170014765114, 'state_converting': 2.621240692009451, : 2.632198926003184}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014600/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014600/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014600/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014600/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014600/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014600, took 14.79s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0012600 + eval ppl=7.4070844650268555, eval loss=2.002436876296997 +------------------------------------------------------------------ +iteration: 14601 , TFLOPS: 95.07422162900028, Tokens per sec: 77687.59392125017, Loss: 2.254549503326416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14602 , TFLOPS: 96.80172740818236, Tokens per sec: 79099.18336337476, Loss: 2.2442569732666016 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14603 , TFLOPS: 97.68818544391527, Tokens per sec: 79823.53104383204, Loss: 2.2518808841705322 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14604 , TFLOPS: 96.81535547147887, Tokens per sec: 79110.31920471191, Loss: 2.2557263374328613 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14605 , TFLOPS: 96.38589176538842, Tokens per sec: 78759.3933550861, Loss: 2.2831201553344727 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14606 , TFLOPS: 97.619619194982, Tokens per sec: 79767.5037967764, Loss: 2.2542901039123535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14607 , TFLOPS: 97.85092614417647, Tokens per sec: 79956.51065933378, Loss: 2.2796454429626465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14608 , TFLOPS: 96.5221619627927, Tokens per sec: 78870.74324129206, Loss: 2.246331214904785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14609 , TFLOPS: 98.38773277150564, Tokens per sec: 80395.14917315637, Loss: 2.2600297927856445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14610 , TFLOPS: 98.38587870137066, Tokens per sec: 80393.63416472106, Loss: 2.2686195373535156 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14611 , TFLOPS: 97.65328540691928, Tokens per sec: 79795.01332519574, Loss: 2.2354369163513184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14612 , TFLOPS: 97.11410563023986, Tokens per sec: 79354.43564994876, Loss: 2.251720905303955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14613 , TFLOPS: 97.70686896388733, Tokens per sec: 79838.7978289578, Loss: 2.26060152053833 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14614 , TFLOPS: 97.97243862710435, Tokens per sec: 80055.80163714374, Loss: 2.2707862854003906 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14615 , TFLOPS: 97.5967590784446, Tokens per sec: 79748.82420708194, Loss: 2.2551727294921875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14616 , TFLOPS: 97.16932164248996, Tokens per sec: 79399.55407494493, Loss: 2.253084421157837 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14617 , TFLOPS: 97.04307205876576, Tokens per sec: 79296.39229013048, Loss: 2.259098529815674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14618 , TFLOPS: 97.09688460836246, Tokens per sec: 79340.3639096642, Loss: 2.2493221759796143 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14619 , TFLOPS: 97.71450781567212, Tokens per sec: 79845.0397313927, Loss: 2.227540969848633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14620 , TFLOPS: 97.73827251313124, Tokens per sec: 79864.45847744426, Loss: 2.2455925941467285 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14621 , TFLOPS: 97.76927325176145, Tokens per sec: 79889.78997900902, Loss: 2.2428672313690186 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14622 , TFLOPS: 96.10334610198743, Tokens per sec: 78528.5180201487, Loss: 2.264486074447632 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14623 , TFLOPS: 97.22090531198604, Tokens per sec: 79441.70441917195, Loss: 2.2448225021362305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14624 , TFLOPS: 98.35468507835338, Tokens per sec: 80368.14505235836, Loss: 2.2532904148101807 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14625 , TFLOPS: 98.39288227510251, Tokens per sec: 80399.3569651054, Loss: 2.269270658493042 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14626 , TFLOPS: 96.07673958378841, Tokens per sec: 78506.77714921559, Loss: 2.27742075920105 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14627 , TFLOPS: 97.80304465420485, Tokens per sec: 79917.38546129878, Loss: 2.251237154006958 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14628 , TFLOPS: 97.83297885340319, Tokens per sec: 79941.84546603858, Loss: 2.270620346069336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14629 , TFLOPS: 97.17917020638129, Tokens per sec: 79407.60159002511, Loss: 2.2585010528564453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14630 , TFLOPS: 97.20804894394237, Tokens per sec: 79431.19914989112, Loss: 2.2427122592926025 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14631 , TFLOPS: 95.94710231736246, Tokens per sec: 78400.84720165882, Loss: 2.257094621658325 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14632 , TFLOPS: 98.46479040165045, Tokens per sec: 80458.11494638756, Loss: 2.2488508224487305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14633 , TFLOPS: 95.05576697379993, Tokens per sec: 77672.51414742088, Loss: 2.2575433254241943 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14634 , TFLOPS: 98.36483981740321, Tokens per sec: 80376.44275104221, Loss: 2.253753185272217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14635 , TFLOPS: 96.08662560452774, Tokens per sec: 78514.85527125049, Loss: 2.2465035915374756 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14636 , TFLOPS: 97.09444327145486, Tokens per sec: 79338.36903043141, Loss: 2.257333278656006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14637 , TFLOPS: 96.69943353449487, Tokens per sec: 79015.59640590615, Loss: 2.2592835426330566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14638 , TFLOPS: 96.56538044777952, Tokens per sec: 78906.05817791758, Loss: 2.272067070007324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14639 , TFLOPS: 96.18595954713868, Tokens per sec: 78596.02359284105, Loss: 2.274799346923828 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14640 , TFLOPS: 96.94747668559596, Tokens per sec: 79218.2788498696, Loss: 2.2512481212615967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14641 , TFLOPS: 97.7427891052404, Tokens per sec: 79868.1491011241, Loss: 2.2360854148864746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14642 , TFLOPS: 96.02734007667189, Tokens per sec: 78466.41154029423, Loss: 2.2573349475860596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14643 , TFLOPS: 97.22772239775095, Tokens per sec: 79447.27483543787, Loss: 2.2528231143951416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14644 , TFLOPS: 97.79845133075145, Tokens per sec: 79913.63213846233, Loss: 2.2430238723754883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14645 , TFLOPS: 97.92390024425386, Tokens per sec: 80016.13967502747, Loss: 2.2515594959259033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14646 , TFLOPS: 96.72762135019667, Tokens per sec: 79038.62939573501, Loss: 2.2505176067352295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14647 , TFLOPS: 98.46410993459305, Tokens per sec: 80457.55891923819, Loss: 2.239104747772217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14648 , TFLOPS: 97.71041796281274, Tokens per sec: 79841.69780733911, Loss: 2.2539196014404297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14649 , TFLOPS: 97.69146104642894, Tokens per sec: 79826.20762296747, Loss: 2.258397340774536 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14650 , TFLOPS: 97.21674073825683, Tokens per sec: 79438.30143877215, Loss: 2.2612340450286865 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14651 , TFLOPS: 98.4146553135789, Tokens per sec: 80417.14827533234, Loss: 2.281385660171509 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14652 , TFLOPS: 97.7255176865821, Tokens per sec: 79854.03618033254, Loss: 2.2288753986358643 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14653 , TFLOPS: 98.3003298337691, Tokens per sec: 80323.72998278006, Loss: 2.2264161109924316 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14654 , TFLOPS: 96.46004870221178, Tokens per sec: 78819.98889713384, Loss: 2.245464324951172 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14655 , TFLOPS: 97.7365042724081, Tokens per sec: 79863.01360243083, Loss: 2.2476701736450195 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14656 , TFLOPS: 97.11466449097713, Tokens per sec: 79354.89230944353, Loss: 2.2544193267822266 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14657 , TFLOPS: 97.63612901277119, Tokens per sec: 79780.99439389247, Loss: 2.2681736946105957 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14658 , TFLOPS: 97.20013360205691, Tokens per sec: 79424.73132027748, Loss: 2.252652645111084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14659 , TFLOPS: 97.76103906349495, Tokens per sec: 79883.06161180961, Loss: 2.2536652088165283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14660 , TFLOPS: 96.67977258957858, Tokens per sec: 78999.53094169729, Loss: 2.254403829574585 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14661 , TFLOPS: 96.63439666633886, Tokens per sec: 78962.45310673794, Loss: 2.2574801445007324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14662 , TFLOPS: 97.78791705221673, Tokens per sec: 79905.02430829515, Loss: 2.251732349395752 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14663 , TFLOPS: 98.41956927383146, Tokens per sec: 80421.16359875127, Loss: 2.252913236618042 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14664 , TFLOPS: 95.20826437971537, Tokens per sec: 77797.12370342686, Loss: 2.2420568466186523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14665 , TFLOPS: 97.81247035451092, Tokens per sec: 79925.08744365798, Loss: 2.2446236610412598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14666 , TFLOPS: 97.75562973462553, Tokens per sec: 79878.6415099419, Loss: 2.2547054290771484 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14667 , TFLOPS: 96.45846718665116, Tokens per sec: 78818.69660005742, Loss: 2.2462222576141357 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14668 , TFLOPS: 97.81803297305711, Tokens per sec: 79929.6328024666, Loss: 2.229034423828125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14669 , TFLOPS: 96.03874452276169, Tokens per sec: 78475.73040677076, Loss: 2.260481834411621 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14670 , TFLOPS: 98.46021674673497, Tokens per sec: 80454.37769521978, Loss: 2.2531349658966064 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14671 , TFLOPS: 95.59586017378732, Tokens per sec: 78113.8382043664, Loss: 2.2470216751098633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14672 , TFLOPS: 98.4205649530836, Tokens per sec: 80421.97719389928, Loss: 2.236961603164673 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14673 , TFLOPS: 95.43097340279485, Tokens per sec: 77979.10497922538, Loss: 2.270003080368042 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14674 , TFLOPS: 97.17246338315913, Tokens per sec: 79402.1212721213, Loss: 2.248883008956909 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14675 , TFLOPS: 96.67420586757805, Tokens per sec: 78994.98222985049, Loss: 2.231656551361084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14676 , TFLOPS: 96.80922803565714, Tokens per sec: 79105.31231916774, Loss: 2.2608063220977783 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14677 , TFLOPS: 96.24833665792421, Tokens per sec: 78646.9935357936, Loss: 2.261390209197998 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14678 , TFLOPS: 96.2419948060097, Tokens per sec: 78641.81144534046, Loss: 2.2571399211883545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14679 , TFLOPS: 98.40732258166666, Tokens per sec: 80411.15651132553, Loss: 2.2589786052703857 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14680 , TFLOPS: 96.81068367897427, Tokens per sec: 79106.50176279376, Loss: 2.281327962875366 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14681 , TFLOPS: 97.11956270368121, Tokens per sec: 79358.89476513525, Loss: 2.2382917404174805 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14682 , TFLOPS: 98.36616353585318, Tokens per sec: 80377.52439546323, Loss: 2.266831636428833 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14683 , TFLOPS: 97.7803555270552, Tokens per sec: 79898.8455914347, Loss: 2.2511284351348877 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14684 , TFLOPS: 97.16547763105403, Tokens per sec: 79396.41303424696, Loss: 2.2566816806793213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14685 , TFLOPS: 97.8120303682263, Tokens per sec: 79924.72791953855, Loss: 2.2654662132263184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14686 , TFLOPS: 97.77956762827617, Tokens per sec: 79898.20177906004, Loss: 2.2476348876953125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14687 , TFLOPS: 97.71192042069191, Tokens per sec: 79842.92550434888, Loss: 2.2466025352478027 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14688 , TFLOPS: 97.1705758139066, Tokens per sec: 79400.5788906947, Loss: 2.2496368885040283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14689 , TFLOPS: 97.85605213491417, Tokens per sec: 79960.6992383203, Loss: 2.2472472190856934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14690 , TFLOPS: 97.72954084207052, Tokens per sec: 79857.32360424734, Loss: 2.2411468029022217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14691 , TFLOPS: 98.35411301160846, Tokens per sec: 80367.67760189479, Loss: 2.2596302032470703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14692 , TFLOPS: 97.12105499415516, Tokens per sec: 79360.11415409649, Loss: 2.274137496948242 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14693 , TFLOPS: 97.22323973462491, Tokens per sec: 79443.61193600357, Loss: 2.2581348419189453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14694 , TFLOPS: 96.44339843134163, Tokens per sec: 78806.38353218973, Loss: 2.245276927947998 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14695 , TFLOPS: 97.85445136649597, Tokens per sec: 79959.39120923835, Loss: 2.24330997467041 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14696 , TFLOPS: 97.12130826852652, Tokens per sec: 79360.32111110518, Loss: 2.2557244300842285 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14697 , TFLOPS: 97.77582591802475, Tokens per sec: 79895.1443312934, Loss: 2.2487998008728027 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14698 , TFLOPS: 97.2488046123834, Tokens per sec: 79464.50165571852, Loss: 2.2599058151245117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14699 , TFLOPS: 96.00142592772363, Tokens per sec: 78445.23642209906, Loss: 2.260791063308716 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14700 , TFLOPS: 98.36306480754956, Tokens per sec: 80374.99234479798, Loss: 2.2576489448547363 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14701 , TFLOPS: 97.86327726072035, Tokens per sec: 79966.60307460782, Loss: 2.237640380859375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14702 , TFLOPS: 96.76347492453561, Tokens per sec: 79067.92627428047, Loss: 2.240825653076172 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14703 , TFLOPS: 97.17741635034609, Tokens per sec: 79406.16846911049, Loss: 2.2227306365966797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14704 , TFLOPS: 97.79727104836466, Tokens per sec: 79912.6676993409, Loss: 2.2377963066101074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14705 , TFLOPS: 96.5953023126363, Tokens per sec: 78930.5081039494, Loss: 2.2652666568756104 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14706 , TFLOPS: 97.28552050313876, Tokens per sec: 79494.50315520584, Loss: 2.288776397705078 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14707 , TFLOPS: 96.6767347081046, Tokens per sec: 78997.04861054289, Loss: 2.2458958625793457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14708 , TFLOPS: 97.19945039030276, Tokens per sec: 79424.17305036573, Loss: 2.2332777976989746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14709 , TFLOPS: 95.68425615804279, Tokens per sec: 78186.06883861663, Loss: 2.23994517326355 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14710 , TFLOPS: 98.34987747696238, Tokens per sec: 80364.2166375031, Loss: 2.2759788036346436 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14711 , TFLOPS: 96.69288566307607, Tokens per sec: 79010.24597161263, Loss: 2.2326037883758545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14712 , TFLOPS: 97.22692215558638, Tokens per sec: 79446.62093696512, Loss: 2.2566778659820557 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14713 , TFLOPS: 97.15082739876173, Tokens per sec: 79384.4419523104, Loss: 2.2448644638061523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14714 , TFLOPS: 96.20491728467478, Tokens per sec: 78611.5144481974, Loss: 2.244192123413086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14715 , TFLOPS: 96.8432600268576, Tokens per sec: 79133.12073523903, Loss: 2.2548766136169434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14716 , TFLOPS: 96.83951674579713, Tokens per sec: 79130.06200392389, Loss: 2.258246421813965 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14717 , TFLOPS: 97.81322411002132, Tokens per sec: 79925.70335668891, Loss: 2.247624158859253 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14718 , TFLOPS: 96.24808246985877, Tokens per sec: 78646.78583218194, Loss: 2.241415500640869 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14719 , TFLOPS: 97.18190314729769, Tokens per sec: 79409.83474640007, Loss: 2.2573904991149902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14720 , TFLOPS: 98.39963565130331, Tokens per sec: 80404.87532264645, Loss: 2.223928451538086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14721 , TFLOPS: 97.83397131342272, Tokens per sec: 79942.65643066872, Loss: 2.251012086868286 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14722 , TFLOPS: 97.28787887403328, Tokens per sec: 79496.43024077362, Loss: 2.287914276123047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14723 , TFLOPS: 97.1506188212754, Tokens per sec: 79384.27151827706, Loss: 2.2541117668151855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14724 , TFLOPS: 97.8174749503791, Tokens per sec: 79929.17682777162, Loss: 2.2512121200561523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14725 , TFLOPS: 97.80662556520808, Tokens per sec: 79920.31151586036, Loss: 2.2474303245544434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14726 , TFLOPS: 96.5767660668316, Tokens per sec: 78915.36166033694, Loss: 2.2618589401245117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14727 , TFLOPS: 98.40240693145468, Tokens per sec: 80407.13980699722, Loss: 2.261185884475708 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14728 , TFLOPS: 96.54507569061673, Tokens per sec: 78889.46663814892, Loss: 2.2603111267089844 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14729 , TFLOPS: 97.76483893181229, Tokens per sec: 79886.16658202902, Loss: 2.248843193054199 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14730 , TFLOPS: 97.7600557432783, Tokens per sec: 79882.25811554767, Loss: 2.235576629638672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14731 , TFLOPS: 95.37301757614927, Tokens per sec: 77931.74778135755, Loss: 2.2377848625183105 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14732 , TFLOPS: 97.17187748168907, Tokens per sec: 79401.64251694767, Loss: 2.242112874984741 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14733 , TFLOPS: 96.67843240905707, Tokens per sec: 78998.43584569385, Loss: 2.289689064025879 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14734 , TFLOPS: 97.1379621127575, Tokens per sec: 79373.92939593442, Loss: 2.254014730453491 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14735 , TFLOPS: 96.58068866305842, Tokens per sec: 78918.5669146905, Loss: 2.276597499847412 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14736 , TFLOPS: 97.32495080833006, Tokens per sec: 79526.722672604, Loss: 2.2484335899353027 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14737 , TFLOPS: 95.47496067086983, Tokens per sec: 78015.04810830245, Loss: 2.260721445083618 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14738 , TFLOPS: 97.26460506205235, Tokens per sec: 79477.41260988278, Loss: 2.2618470191955566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14739 , TFLOPS: 96.71333772779676, Tokens per sec: 79026.95788017889, Loss: 2.2839441299438477 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14740 , TFLOPS: 96.19243619670269, Tokens per sec: 78601.31582992361, Loss: 2.254286050796509 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14741 , TFLOPS: 95.49541525010183, Tokens per sec: 78031.76207154062, Loss: 2.2611899375915527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14742 , TFLOPS: 97.82428625376576, Tokens per sec: 79934.74251910741, Loss: 2.269718647003174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14743 , TFLOPS: 96.6638607423642, Tokens per sec: 78986.52896173103, Loss: 2.268733024597168 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14744 , TFLOPS: 95.9947691368752, Tokens per sec: 78439.79698693578, Loss: 2.2756781578063965 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14745 , TFLOPS: 97.0899604369112, Tokens per sec: 79334.70599092734, Loss: 2.2651751041412354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14746 , TFLOPS: 96.92616968080281, Tokens per sec: 79200.86834776202, Loss: 2.241480827331543 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14747 , TFLOPS: 95.62555809885725, Tokens per sec: 78138.10514343367, Loss: 2.242832899093628 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14748 , TFLOPS: 97.78313314766272, Tokens per sec: 79901.11525673541, Loss: 2.251880407333374 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14749 , TFLOPS: 94.02207940846321, Tokens per sec: 76827.86142829907, Loss: 2.256807804107666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14750 , TFLOPS: 97.21188995624239, Tokens per sec: 79434.33773991792, Loss: 2.2536797523498535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14751 , TFLOPS: 96.63137664580458, Tokens per sec: 78959.98537021714, Loss: 2.254485845565796 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14752 , TFLOPS: 97.79988647795042, Tokens per sec: 79914.80483418259, Loss: 2.2470009326934814 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14753 , TFLOPS: 96.80995509009672, Tokens per sec: 79105.90641406637, Loss: 2.24684476852417 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14754 , TFLOPS: 94.14453948172682, Tokens per sec: 76927.9267065655, Loss: 2.2456889152526855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14755 , TFLOPS: 98.34781988460902, Tokens per sec: 80362.53532582404, Loss: 2.248809814453125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14756 , TFLOPS: 95.60329206626278, Tokens per sec: 78119.91099502204, Loss: 2.234020948410034 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14757 , TFLOPS: 97.32485553996345, Tokens per sec: 79526.64482636921, Loss: 2.2730889320373535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14758 , TFLOPS: 97.78008279169548, Tokens per sec: 79898.62273235145, Loss: 2.26302433013916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14759 , TFLOPS: 97.96076116267025, Tokens per sec: 80046.25968034996, Loss: 2.255866765975952 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14760 , TFLOPS: 97.22976991614804, Tokens per sec: 79448.94791542794, Loss: 2.2457878589630127 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14761 , TFLOPS: 97.09280877528928, Tokens per sec: 79337.03344153882, Loss: 2.240551710128784 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14762 , TFLOPS: 96.55214704509761, Tokens per sec: 78895.24482392811, Loss: 2.2553648948669434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14763 , TFLOPS: 98.41536786453418, Tokens per sec: 80417.73051906058, Loss: 2.2564032077789307 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14764 , TFLOPS: 96.53426082791134, Tokens per sec: 78880.629535433, Loss: 2.266725778579712 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14765 , TFLOPS: 97.82891583664144, Tokens per sec: 79938.52547045119, Loss: 2.2514727115631104 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14766 , TFLOPS: 96.59551423072675, Tokens per sec: 78930.68126767635, Loss: 2.25209379196167 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14767 , TFLOPS: 97.18152434718458, Tokens per sec: 79409.52521907646, Loss: 2.2725329399108887 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14768 , TFLOPS: 97.23728697780372, Tokens per sec: 79455.09030001318, Loss: 2.255640983581543 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14769 , TFLOPS: 96.67922344989663, Tokens per sec: 78999.08222552706, Loss: 2.255392074584961 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14770 , TFLOPS: 95.38268917260346, Tokens per sec: 77939.65069178933, Loss: 2.260286808013916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14771 , TFLOPS: 98.43494039153185, Tokens per sec: 80433.72373471204, Loss: 2.2650246620178223 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14772 , TFLOPS: 96.08204497020454, Tokens per sec: 78511.112317029, Loss: 2.2353408336639404 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14773 , TFLOPS: 97.13932882166169, Tokens per sec: 79375.04616896226, Loss: 2.2410621643066406 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14774 , TFLOPS: 96.78721371184153, Tokens per sec: 79087.32384847889, Loss: 2.244696617126465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14775 , TFLOPS: 95.41121362505491, Tokens per sec: 77962.95875617213, Loss: 2.248960494995117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14776 , TFLOPS: 98.41167314028708, Tokens per sec: 80414.7114647676, Loss: 2.228959321975708 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14777 , TFLOPS: 97.20729542937734, Tokens per sec: 79430.58343374285, Loss: 2.2392702102661133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14778 , TFLOPS: 96.8908450401873, Tokens per sec: 79172.00367457794, Loss: 2.2349202632904053 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14779 , TFLOPS: 96.62263853081475, Tokens per sec: 78952.84523151985, Loss: 2.256049633026123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14780 , TFLOPS: 97.24353490812568, Tokens per sec: 79460.19564471528, Loss: 2.2500715255737305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14781 , TFLOPS: 95.97198895955675, Tokens per sec: 78421.18271763492, Loss: 2.259702205657959 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14782 , TFLOPS: 96.26334680670982, Tokens per sec: 78659.25871476192, Loss: 2.2662529945373535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14783 , TFLOPS: 96.74272254213531, Tokens per sec: 79050.96896840734, Loss: 2.26365327835083 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14784 , TFLOPS: 96.75242853186535, Tokens per sec: 79058.89998247016, Loss: 2.2529070377349854 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14785 , TFLOPS: 95.14621231332056, Tokens per sec: 77746.41936261336, Loss: 2.240950107574463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14786 , TFLOPS: 97.92229526069703, Tokens per sec: 80014.82820164722, Loss: 2.2491984367370605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14787 , TFLOPS: 96.53373096607127, Tokens per sec: 78880.19657168364, Loss: 2.2492787837982178 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14788 , TFLOPS: 97.82717296017923, Tokens per sec: 79937.10132122829, Loss: 2.276425838470459 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14789 , TFLOPS: 97.3292506969932, Tokens per sec: 79530.23622231964, Loss: 2.2668557167053223 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14790 , TFLOPS: 97.78672715590757, Tokens per sec: 79904.05201339029, Loss: 2.2569241523742676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14791 , TFLOPS: 96.64524606099525, Tokens per sec: 78971.31842639994, Loss: 2.2476136684417725 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14792 , TFLOPS: 96.34788501383424, Tokens per sec: 78728.33706001035, Loss: 2.254920721054077 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14793 , TFLOPS: 98.40520363167886, Tokens per sec: 80409.42506274386, Loss: 2.265066385269165 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14794 , TFLOPS: 95.48997058372082, Tokens per sec: 78027.31309448252, Loss: 2.292945384979248 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14795 , TFLOPS: 96.61409981662649, Tokens per sec: 78945.86802835057, Loss: 2.2680625915527344 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14796 , TFLOPS: 98.34878398606692, Tokens per sec: 80363.32311794347, Loss: 2.2670695781707764 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14797 , TFLOPS: 97.93771936242254, Tokens per sec: 80027.43163221885, Loss: 2.2846920490264893 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14798 , TFLOPS: 95.99294016250965, Tokens per sec: 78438.3024848997, Loss: 2.2546331882476807 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14799 , TFLOPS: 97.78584807923244, Tokens per sec: 79903.33369720959, Loss: 2.2558133602142334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14800 , TFLOPS: 97.74438409815261, Tokens per sec: 79869.45241089143, Loss: 2.265146017074585 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014800 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014800/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014800/model.pt +[rank0]:[2024-08-30 00:35:43,719] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007652834989130497, 'preprocessing_with_comm': 0.001591485008248128, 'state_converting': 2.5912869090097956, : 2.6021233029896393}) +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014800/optimizer.pt +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014800/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014800/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014800/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014800/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014800/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014800, took 14.99s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0012800 + eval ppl=7.722627639770508, eval loss=2.044154644012451 +------------------------------------------------------------------ +iteration: 14801 , TFLOPS: 95.47317743391216, Tokens per sec: 78013.59097947973, Loss: 2.2596607208251953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14802 , TFLOPS: 97.12875701565049, Tokens per sec: 79366.40767412802, Loss: 2.247744083404541 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14803 , TFLOPS: 96.63721157878707, Tokens per sec: 78964.75324416166, Loss: 2.2655882835388184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14804 , TFLOPS: 97.29089429366036, Tokens per sec: 79498.89421777471, Loss: 2.2661967277526855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14805 , TFLOPS: 97.06699449372653, Tokens per sec: 79315.93992755517, Loss: 2.252485752105713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14806 , TFLOPS: 97.15363364480251, Tokens per sec: 79386.73500818951, Loss: 2.266408920288086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14807 , TFLOPS: 97.9480031933828, Tokens per sec: 80035.83481522584, Loss: 2.2491343021392822 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14808 , TFLOPS: 97.86744352206935, Tokens per sec: 79970.0074340052, Loss: 2.2473397254943848 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14809 , TFLOPS: 98.37887388835988, Tokens per sec: 80387.91035169004, Loss: 2.2604873180389404 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14810 , TFLOPS: 97.66412168470667, Tokens per sec: 79803.86792672642, Loss: 2.260302782058716 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14811 , TFLOPS: 97.04970187927078, Tokens per sec: 79301.80968713188, Loss: 2.259371280670166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14812 , TFLOPS: 98.34770548899606, Tokens per sec: 80362.44185022388, Loss: 2.2477996349334717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14813 , TFLOPS: 97.19332622653633, Tokens per sec: 79419.16883850218, Loss: 2.2516844272613525 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14814 , TFLOPS: 97.1367850194481, Tokens per sec: 79372.96756269023, Loss: 2.2414064407348633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14815 , TFLOPS: 98.31889034042962, Tokens per sec: 80338.89625056233, Loss: 2.2617783546447754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14816 , TFLOPS: 97.6717612922534, Tokens per sec: 79810.11044671385, Loss: 2.2642104625701904 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14817 , TFLOPS: 96.52183771887307, Tokens per sec: 78870.47829323857, Loss: 2.2622218132019043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14818 , TFLOPS: 97.57898453045412, Tokens per sec: 79734.30016636135, Loss: 2.2471771240234375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14819 , TFLOPS: 97.8234438107211, Tokens per sec: 79934.0541372092, Loss: 2.253392457962036 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14820 , TFLOPS: 97.66281741134414, Tokens per sec: 79802.80217138692, Loss: 2.2400805950164795 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14821 , TFLOPS: 97.15662963699357, Tokens per sec: 79389.18311053254, Loss: 2.2512850761413574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14822 , TFLOPS: 97.16364559147223, Tokens per sec: 79394.91602754379, Loss: 2.251570701599121 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14823 , TFLOPS: 97.83448463581246, Tokens per sec: 79943.07587960747, Loss: 2.262464761734009 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14824 , TFLOPS: 96.25888125747342, Tokens per sec: 78655.60979952739, Loss: 2.2507543563842773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14825 , TFLOPS: 96.55924848145682, Tokens per sec: 78901.04759038445, Loss: 2.2354815006256104 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14826 , TFLOPS: 98.3982350878894, Tokens per sec: 80403.73088622723, Loss: 2.2648305892944336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14827 , TFLOPS: 95.5181004255008, Tokens per sec: 78050.2987123275, Loss: 2.264206886291504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14828 , TFLOPS: 97.65047834909859, Tokens per sec: 79792.71960599037, Loss: 2.2579264640808105 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14829 , TFLOPS: 97.9331316588827, Tokens per sec: 80023.68290155845, Loss: 2.254934549331665 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14830 , TFLOPS: 94.7725089314957, Tokens per sec: 77441.05670934338, Loss: 2.2534830570220947 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14831 , TFLOPS: 98.41113692052609, Tokens per sec: 80414.27330579738, Loss: 2.250120162963867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14832 , TFLOPS: 96.554488387347, Tokens per sec: 78897.1579949516, Loss: 2.278672933578491 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14833 , TFLOPS: 97.13164127055481, Tokens per sec: 79368.76447305764, Loss: 2.268282413482666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14834 , TFLOPS: 96.47936133248571, Tokens per sec: 78835.769744482, Loss: 2.2251901626586914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14835 , TFLOPS: 97.61211108408428, Tokens per sec: 79761.3687260859, Loss: 2.256326913833618 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14836 , TFLOPS: 96.02979225633561, Tokens per sec: 78468.41527942217, Loss: 2.2417538166046143 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14837 , TFLOPS: 97.2508374151687, Tokens per sec: 79466.16271120339, Loss: 2.247096061706543 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14838 , TFLOPS: 96.11455684522079, Tokens per sec: 78537.67860703489, Loss: 2.247541666030884 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14839 , TFLOPS: 96.06953780020221, Tokens per sec: 78500.8923864569, Loss: 2.2714321613311768 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14840 , TFLOPS: 97.34060361423887, Tokens per sec: 79539.51298325116, Loss: 2.2509665489196777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14841 , TFLOPS: 97.3017481420789, Tokens per sec: 79507.76317672043, Loss: 2.252699375152588 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14842 , TFLOPS: 97.26908263704229, Tokens per sec: 79481.07135167009, Loss: 2.25911021232605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14843 , TFLOPS: 97.83573917197026, Tokens per sec: 79944.10099339666, Loss: 2.2663536071777344 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14844 , TFLOPS: 97.18368424710391, Tokens per sec: 79411.29012890131, Loss: 2.2718591690063477 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14845 , TFLOPS: 98.0374536521785, Tokens per sec: 80108.92708776746, Loss: 2.2594478130340576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14846 , TFLOPS: 97.84477217947398, Tokens per sec: 79951.48209635825, Loss: 2.2657594680786133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14847 , TFLOPS: 98.45171685467018, Tokens per sec: 80447.43221460695, Loss: 2.2705140113830566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14848 , TFLOPS: 97.17366477124658, Tokens per sec: 79403.10295725404, Loss: 2.2500951290130615 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14849 , TFLOPS: 97.11133200956608, Tokens per sec: 79352.16925309687, Loss: 2.2417359352111816 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14850 , TFLOPS: 98.4314014072671, Tokens per sec: 80430.83193956775, Loss: 2.2436447143554688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14851 , TFLOPS: 97.62718630777718, Tokens per sec: 79773.68707943616, Loss: 2.2723469734191895 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14852 , TFLOPS: 97.66459231353184, Tokens per sec: 79804.25248965454, Loss: 2.2688841819763184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14853 , TFLOPS: 98.4501010964184, Tokens per sec: 80446.11193694611, Loss: 2.2590878009796143 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14854 , TFLOPS: 97.81942503615258, Tokens per sec: 79930.7702930568, Loss: 2.2559916973114014 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14855 , TFLOPS: 95.86075209957026, Tokens per sec: 78330.28821584846, Loss: 2.247706890106201 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14856 , TFLOPS: 97.75524345513575, Tokens per sec: 79878.32587102707, Loss: 2.2392375469207764 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14857 , TFLOPS: 97.75612164238717, Tokens per sec: 79879.04346043676, Loss: 2.266169309616089 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14858 , TFLOPS: 98.39866136249938, Tokens per sec: 80404.07920618432, Loss: 2.278855323791504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14859 , TFLOPS: 95.9361654711721, Tokens per sec: 78391.9104230972, Loss: 2.268373489379883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14860 , TFLOPS: 97.8948134037075, Tokens per sec: 79992.37206885515, Loss: 2.2455341815948486 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14861 , TFLOPS: 97.79115724962656, Tokens per sec: 79907.67195700885, Loss: 2.261871814727783 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14862 , TFLOPS: 96.49726473763357, Tokens per sec: 78850.39907770297, Loss: 2.2415695190429688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14863 , TFLOPS: 96.64090363181143, Tokens per sec: 78967.77011572977, Loss: 2.2779579162597656 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14864 , TFLOPS: 98.40209965703473, Tokens per sec: 80406.88872515873, Loss: 2.2450640201568604 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14865 , TFLOPS: 95.53267253882325, Tokens per sec: 78062.20596124284, Loss: 2.243675708770752 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14866 , TFLOPS: 97.63597735503696, Tokens per sec: 79780.8704704538, Loss: 2.254765748977661 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14867 , TFLOPS: 97.18471878887607, Tokens per sec: 79412.13547961482, Loss: 2.2510218620300293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14868 , TFLOPS: 96.09643973778155, Tokens per sec: 78522.87465217045, Loss: 2.262683391571045 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14869 , TFLOPS: 97.02473600319867, Tokens per sec: 79281.40942711453, Loss: 2.261622905731201 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14870 , TFLOPS: 97.39936828376423, Tokens per sec: 79587.53110745751, Loss: 2.2753677368164062 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14871 , TFLOPS: 97.24665952185224, Tokens per sec: 79462.74884702597, Loss: 2.2572810649871826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14872 , TFLOPS: 95.53489051186364, Tokens per sec: 78064.01832410984, Loss: 2.2397048473358154 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14873 , TFLOPS: 98.34401448807257, Tokens per sec: 80359.4258383545, Loss: 2.2583980560302734 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14874 , TFLOPS: 95.37593268351715, Tokens per sec: 77934.12979062939, Loss: 2.2683637142181396 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14875 , TFLOPS: 97.18593264504011, Tokens per sec: 79413.12735273389, Loss: 2.2663092613220215 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14876 , TFLOPS: 95.97915140833717, Tokens per sec: 78427.03533891124, Loss: 2.26078462600708 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14877 , TFLOPS: 97.06808762928202, Tokens per sec: 79316.83315675738, Loss: 2.266922950744629 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14878 , TFLOPS: 96.59661395115108, Tokens per sec: 78931.5798775444, Loss: 2.246872663497925 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14879 , TFLOPS: 97.12805061153142, Tokens per sec: 79365.83045313785, Loss: 2.2279367446899414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14880 , TFLOPS: 97.74058148819287, Tokens per sec: 79866.34520040544, Loss: 2.2547149658203125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14881 , TFLOPS: 97.25383637185152, Tokens per sec: 79468.61323590892, Loss: 2.259859085083008 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14882 , TFLOPS: 97.70516133228931, Tokens per sec: 79837.4024792207, Loss: 2.2506911754608154 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14883 , TFLOPS: 97.24292863175862, Tokens per sec: 79459.7002406888, Loss: 2.2674789428710938 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14884 , TFLOPS: 97.95913953930341, Tokens per sec: 80044.93461015262, Loss: 2.257051944732666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14885 , TFLOPS: 98.40922604999483, Tokens per sec: 80412.7118842961, Loss: 2.252955913543701 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14886 , TFLOPS: 97.81129876675332, Tokens per sec: 79924.13010914189, Loss: 2.2413206100463867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14887 , TFLOPS: 96.56075915321817, Tokens per sec: 78902.2819991688, Loss: 2.2338569164276123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14888 , TFLOPS: 98.39907860760574, Tokens per sec: 80404.42014790158, Loss: 2.276829242706299 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14889 , TFLOPS: 97.67543668038601, Tokens per sec: 79813.11370096992, Loss: 2.2699620723724365 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14890 , TFLOPS: 97.76962304680629, Tokens per sec: 79890.0758055447, Loss: 2.2677135467529297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14891 , TFLOPS: 97.69305508979085, Tokens per sec: 79827.51015683281, Loss: 2.235752820968628 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14892 , TFLOPS: 97.72840451510699, Tokens per sec: 79856.39508223378, Loss: 2.2312510013580322 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14893 , TFLOPS: 96.54152479561714, Tokens per sec: 78886.56511043658, Loss: 2.2643990516662598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14894 , TFLOPS: 97.05545426828454, Tokens per sec: 79306.5101122747, Loss: 2.264132261276245 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14895 , TFLOPS: 97.69532568645593, Tokens per sec: 79829.36551981821, Loss: 2.253335475921631 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14896 , TFLOPS: 97.6918769992327, Tokens per sec: 79826.54750871101, Loss: 2.2450952529907227 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14897 , TFLOPS: 97.28941762423977, Tokens per sec: 79497.6875931783, Loss: 2.251786231994629 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14898 , TFLOPS: 97.13767280398916, Tokens per sec: 79373.69299429233, Loss: 2.2446069717407227 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14899 , TFLOPS: 97.36753519605934, Tokens per sec: 79561.51947203727, Loss: 2.2529165744781494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14900 , TFLOPS: 97.22380049370582, Tokens per sec: 79444.0701466838, Loss: 2.259460926055908 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14901 , TFLOPS: 95.79233883102087, Tokens per sec: 78274.38597300254, Loss: 2.2493338584899902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14902 , TFLOPS: 97.7756819236309, Tokens per sec: 79895.02666976744, Loss: 2.2633657455444336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14903 , TFLOPS: 96.07687636974873, Tokens per sec: 78506.88892054494, Loss: 2.2582576274871826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14904 , TFLOPS: 97.00021027501556, Tokens per sec: 79261.36882326771, Loss: 2.2512660026550293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14905 , TFLOPS: 97.89707958261185, Tokens per sec: 79994.22382197453, Loss: 2.2203845977783203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14906 , TFLOPS: 94.9539921647497, Tokens per sec: 77589.35133102929, Loss: 2.2454333305358887 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14907 , TFLOPS: 97.81866502301496, Tokens per sec: 79930.14926675758, Loss: 2.261455535888672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14908 , TFLOPS: 97.18526296834068, Tokens per sec: 79412.58014266379, Loss: 2.2402515411376953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14909 , TFLOPS: 97.01918370102484, Tokens per sec: 79276.87249807933, Loss: 2.233820915222168 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14910 , TFLOPS: 97.241906707795, Tokens per sec: 79458.86520031148, Loss: 2.226951837539673 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14911 , TFLOPS: 97.77487112367791, Tokens per sec: 79894.36414425401, Loss: 2.2478129863739014 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14912 , TFLOPS: 95.87333096761672, Tokens per sec: 78340.56673273808, Loss: 2.2495298385620117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14913 , TFLOPS: 97.81961530341994, Tokens per sec: 79930.92576533886, Loss: 2.259660482406616 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14914 , TFLOPS: 96.22932224843848, Tokens per sec: 78631.45637232836, Loss: 2.2639169692993164 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14915 , TFLOPS: 96.03956527297066, Tokens per sec: 78476.4010628945, Loss: 2.269575595855713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14916 , TFLOPS: 96.5171722499915, Tokens per sec: 78866.66601851559, Loss: 2.2411279678344727 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14917 , TFLOPS: 97.92593394911337, Tokens per sec: 80017.8014676204, Loss: 2.2653048038482666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14918 , TFLOPS: 97.8333778307592, Tokens per sec: 79942.17148070676, Loss: 2.2508859634399414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14919 , TFLOPS: 96.84768459140255, Tokens per sec: 79136.73616082713, Loss: 2.2650153636932373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14920 , TFLOPS: 98.43786677511739, Tokens per sec: 80436.11495807159, Loss: 2.2590131759643555 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14921 , TFLOPS: 97.28803066653155, Tokens per sec: 79496.55427433144, Loss: 2.2509541511535645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14922 , TFLOPS: 97.34908303905911, Tokens per sec: 79546.44173955159, Loss: 2.2256247997283936 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14923 , TFLOPS: 97.85672884558105, Tokens per sec: 79961.25219602628, Loss: 2.2463483810424805 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14924 , TFLOPS: 98.42915847204483, Tokens per sec: 80428.99917945935, Loss: 2.2751681804656982 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14925 , TFLOPS: 95.85221788307781, Tokens per sec: 78323.31468786219, Loss: 2.2540576457977295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14926 , TFLOPS: 97.75990026599708, Tokens per sec: 79882.13107105884, Loss: 2.2520527839660645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14927 , TFLOPS: 97.74953125161731, Tokens per sec: 79873.65828248697, Loss: 2.2774720191955566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14928 , TFLOPS: 97.73629960962758, Tokens per sec: 79862.84636720439, Loss: 2.2401671409606934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14929 , TFLOPS: 97.86598598486817, Tokens per sec: 79968.81644283776, Loss: 2.272850275039673 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14930 , TFLOPS: 96.63117365420767, Tokens per sec: 78959.81950055783, Loss: 2.231548547744751 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14931 , TFLOPS: 96.4544434296427, Tokens per sec: 78815.40868462509, Loss: 2.264525890350342 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14932 , TFLOPS: 96.70973700775635, Tokens per sec: 79024.0156391431, Loss: 2.2496204376220703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14933 , TFLOPS: 97.78224208280214, Tokens per sec: 79900.38714469978, Loss: 2.270219326019287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14934 , TFLOPS: 97.13350949928378, Tokens per sec: 79370.29105084458, Loss: 2.2453978061676025 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14935 , TFLOPS: 96.1583733987886, Tokens per sec: 78573.48224089359, Loss: 2.2492382526397705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14936 , TFLOPS: 97.17697994147403, Tokens per sec: 79405.81186818695, Loss: 2.2520015239715576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14937 , TFLOPS: 96.2517406941522, Tokens per sec: 78649.77506142319, Loss: 2.250741958618164 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14938 , TFLOPS: 97.29440763684441, Tokens per sec: 79501.7650609319, Loss: 2.270451545715332 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14939 , TFLOPS: 96.12120024601728, Tokens per sec: 78543.10710084205, Loss: 2.2597270011901855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14940 , TFLOPS: 97.75778404147373, Tokens per sec: 79880.40184952435, Loss: 2.2604634761810303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14941 , TFLOPS: 96.61988677778223, Tokens per sec: 78950.59670328055, Loss: 2.278306245803833 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14942 , TFLOPS: 97.9398795099993, Tokens per sec: 80029.19674441098, Loss: 2.249302387237549 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14943 , TFLOPS: 97.60972257193463, Tokens per sec: 79759.41701132264, Loss: 2.269575595855713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14944 , TFLOPS: 96.07752678224114, Tokens per sec: 78507.42038933559, Loss: 2.227052688598633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14945 , TFLOPS: 97.18761449832888, Tokens per sec: 79414.50163835092, Loss: 2.255791187286377 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14946 , TFLOPS: 97.30733603036526, Tokens per sec: 79512.32918408424, Loss: 2.2534358501434326 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14947 , TFLOPS: 96.52452948354532, Tokens per sec: 78872.67780344444, Loss: 2.2478482723236084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14948 , TFLOPS: 96.57930111708218, Tokens per sec: 78917.4331151547, Loss: 2.244168281555176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14949 , TFLOPS: 98.37101532129688, Tokens per sec: 80381.48891424535, Loss: 2.2560105323791504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14950 , TFLOPS: 95.9726606637162, Tokens per sec: 78421.73158439482, Loss: 2.262273073196411 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14951 , TFLOPS: 97.77189895435578, Tokens per sec: 79891.93550819045, Loss: 2.237614870071411 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14952 , TFLOPS: 95.73586592343077, Tokens per sec: 78228.24050646869, Loss: 2.2496583461761475 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14953 , TFLOPS: 96.20808677316342, Tokens per sec: 78614.10431883135, Loss: 2.2459471225738525 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14954 , TFLOPS: 96.70214719391733, Tokens per sec: 79017.81380687605, Loss: 2.2770965099334717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14955 , TFLOPS: 97.9742078526654, Tokens per sec: 80057.2473168936, Loss: 2.276393413543701 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14956 , TFLOPS: 97.15627658371211, Tokens per sec: 79388.89462160786, Loss: 2.2443230152130127 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14957 , TFLOPS: 95.61459951990965, Tokens per sec: 78129.15060647664, Loss: 2.2729544639587402 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14958 , TFLOPS: 98.37244665533021, Tokens per sec: 80382.65849412979, Loss: 2.264277696609497 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14959 , TFLOPS: 97.96555157327617, Tokens per sec: 80050.17404817221, Loss: 2.2427175045013428 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14960 , TFLOPS: 97.32997397234405, Tokens per sec: 79530.82722922758, Loss: 2.2496509552001953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14961 , TFLOPS: 97.2096879757228, Tokens per sec: 79432.53844495134, Loss: 2.25374698638916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14962 , TFLOPS: 98.39159766133074, Tokens per sec: 80398.30727412358, Loss: 2.254366636276245 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14963 , TFLOPS: 96.68200774443747, Tokens per sec: 79001.3573442705, Loss: 2.257887363433838 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14964 , TFLOPS: 97.16355600689567, Tokens per sec: 79394.84282568017, Loss: 2.2609469890594482 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14965 , TFLOPS: 97.8085912635754, Tokens per sec: 79921.91773859787, Loss: 2.281426191329956 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14966 , TFLOPS: 96.49366205366013, Tokens per sec: 78847.45523188692, Loss: 2.284546136856079 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14967 , TFLOPS: 98.3499079295397, Tokens per sec: 80364.24152108796, Loss: 2.2572975158691406 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14968 , TFLOPS: 96.08246526966404, Tokens per sec: 78511.45575453682, Loss: 2.24194598197937 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14969 , TFLOPS: 97.21975134339598, Tokens per sec: 79440.76148173123, Loss: 2.2527782917022705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14970 , TFLOPS: 96.66161503831333, Tokens per sec: 78984.6939391415, Loss: 2.262876033782959 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14971 , TFLOPS: 97.16905124653648, Tokens per sec: 79399.33312745074, Loss: 2.227210521697998 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14972 , TFLOPS: 97.24189974070707, Tokens per sec: 79458.85950732458, Loss: 2.261291027069092 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14973 , TFLOPS: 96.6385147876568, Tokens per sec: 78965.81812967671, Loss: 2.259493350982666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14974 , TFLOPS: 96.58435447752346, Tokens per sec: 78921.56234605634, Loss: 2.283841133117676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14975 , TFLOPS: 96.33761471617811, Tokens per sec: 78719.94493541453, Loss: 2.225975275039673 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14976 , TFLOPS: 97.24579110028158, Tokens per sec: 79462.03923740542, Loss: 2.2359960079193115 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14977 , TFLOPS: 96.87839168061929, Tokens per sec: 79161.82771389691, Loss: 2.2454721927642822 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14978 , TFLOPS: 97.49018676659543, Tokens per sec: 79661.7412276546, Loss: 2.2693281173706055 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14979 , TFLOPS: 96.6230081952413, Tokens per sec: 78953.1472938388, Loss: 2.2377519607543945 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14980 , TFLOPS: 97.64251328182795, Tokens per sec: 79786.21114448443, Loss: 2.250382423400879 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14981 , TFLOPS: 98.33871594235048, Tokens per sec: 80355.09626024825, Loss: 2.244687557220459 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14982 , TFLOPS: 96.62988305914917, Tokens per sec: 78958.76492211266, Loss: 2.257112979888916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14983 , TFLOPS: 97.65420816712047, Tokens per sec: 79795.7673363101, Loss: 2.2333340644836426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14984 , TFLOPS: 96.59678833910257, Tokens per sec: 78931.7223744287, Loss: 2.2565243244171143 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14985 , TFLOPS: 93.98968528917423, Tokens per sec: 76801.39135952923, Loss: 2.2638678550720215 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14986 , TFLOPS: 97.17934345747928, Tokens per sec: 79407.74315795716, Loss: 2.2689743041992188 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14987 , TFLOPS: 97.67207644009045, Tokens per sec: 79810.36796212415, Loss: 2.268710136413574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14988 , TFLOPS: 96.52202068558482, Tokens per sec: 78870.62780004872, Loss: 2.257554531097412 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14989 , TFLOPS: 95.10926912708133, Tokens per sec: 77716.23213413506, Loss: 2.252155065536499 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14990 , TFLOPS: 97.26631951295703, Tokens per sec: 79478.81353185089, Loss: 2.2505552768707275 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14991 , TFLOPS: 95.8631734340289, Tokens per sec: 78332.2667505653, Loss: 2.242053985595703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14992 , TFLOPS: 96.01881543812797, Tokens per sec: 78459.44583869627, Loss: 2.265326499938965 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14993 , TFLOPS: 96.86264991764276, Tokens per sec: 79148.96471413976, Loss: 2.246178388595581 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14994 , TFLOPS: 97.81722330593915, Tokens per sec: 79928.97120262183, Loss: 2.2609689235687256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14995 , TFLOPS: 97.3103233453391, Tokens per sec: 79514.77019604969, Loss: 2.257981777191162 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14996 , TFLOPS: 97.19976495719308, Tokens per sec: 79424.43009106949, Loss: 2.2646677494049072 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14997 , TFLOPS: 97.98147404335099, Tokens per sec: 80063.18470835117, Loss: 2.257035732269287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14998 , TFLOPS: 96.06513944763115, Tokens per sec: 78497.29837934741, Loss: 2.2284162044525146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 14999 , TFLOPS: 98.4720585233748, Tokens per sec: 80464.05391574663, Loss: 2.2507338523864746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15000 , TFLOPS: 96.85202155988817, Tokens per sec: 79140.28000942022, Loss: 2.254051923751831 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015000 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015000/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015000/model.pt +[rank0]:[2024-08-30 01:31:10,552] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007644590004929341, 'preprocessing_with_comm': 0.0017478980007581413, 'state_converting': 2.6439950979984133, : 2.655007009001565}) +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015000/optimizer.pt +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015000/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015000/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015000/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015000/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015000/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015000, took 15.12s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0013000 + eval ppl=8.749403953552246, eval loss=2.168985605239868 +------------------------------------------------------------------ +iteration: 15001 , TFLOPS: 95.79275471379349, Tokens per sec: 78274.72580152182, Loss: 2.2613325119018555 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15002 , TFLOPS: 96.61927093844231, Tokens per sec: 78950.0934851028, Loss: 2.25059175491333 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15003 , TFLOPS: 97.09287077555884, Tokens per sec: 79337.0841035551, Loss: 2.2431442737579346 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15004 , TFLOPS: 96.05745347246703, Tokens per sec: 78491.0179711862, Loss: 2.243067502975464 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15005 , TFLOPS: 95.7177698463489, Tokens per sec: 78213.45373605064, Loss: 2.2572500705718994 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15006 , TFLOPS: 97.65771740598838, Tokens per sec: 79798.63482572492, Loss: 2.2707905769348145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15007 , TFLOPS: 97.45696402134925, Tokens per sec: 79634.5940672844, Loss: 2.245854139328003 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15008 , TFLOPS: 96.0613054212272, Tokens per sec: 78494.16549767612, Loss: 2.225360631942749 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15009 , TFLOPS: 98.41556125345227, Tokens per sec: 80417.88854212382, Loss: 2.2579405307769775 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15010 , TFLOPS: 97.56221300696511, Tokens per sec: 79720.59572278104, Loss: 2.231889247894287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15011 , TFLOPS: 97.20848594381904, Tokens per sec: 79431.55623373971, Loss: 2.271432399749756 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15012 , TFLOPS: 97.72631033558577, Tokens per sec: 79854.68387423804, Loss: 2.253070592880249 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15013 , TFLOPS: 97.47650954653524, Tokens per sec: 79650.56521906046, Loss: 2.254655122756958 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15014 , TFLOPS: 96.47526144779013, Tokens per sec: 78832.4196231568, Loss: 2.263195276260376 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15015 , TFLOPS: 97.76174714540711, Tokens per sec: 79883.64020376769, Loss: 2.2472691535949707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15016 , TFLOPS: 96.78979966117156, Tokens per sec: 79089.4368942447, Loss: 2.2706735134124756 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15017 , TFLOPS: 97.17057449075249, Tokens per sec: 79400.57780951142, Loss: 2.2740299701690674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15018 , TFLOPS: 96.57696882999083, Tokens per sec: 78915.52734333395, Loss: 2.2417263984680176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15019 , TFLOPS: 96.39195800797862, Tokens per sec: 78764.35023806551, Loss: 2.246490001678467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15020 , TFLOPS: 97.35918888573943, Tokens per sec: 79554.69948702172, Loss: 2.274016857147217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15021 , TFLOPS: 96.85490588434006, Tokens per sec: 79142.63686517898, Loss: 2.2751879692077637 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15022 , TFLOPS: 97.8312543765217, Tokens per sec: 79940.43635158673, Loss: 2.268841505050659 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15023 , TFLOPS: 96.28416641140505, Tokens per sec: 78676.2709497027, Loss: 2.2402405738830566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15024 , TFLOPS: 95.7048194975567, Tokens per sec: 78202.87167268089, Loss: 2.2702574729919434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15025 , TFLOPS: 98.4478338157436, Tokens per sec: 80444.25928354173, Loss: 2.2693557739257812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15026 , TFLOPS: 98.38668904400555, Tokens per sec: 80394.29631654812, Loss: 2.2366693019866943 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15027 , TFLOPS: 96.621688205604, Tokens per sec: 78952.0686963266, Loss: 2.262179136276245 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15028 , TFLOPS: 97.69816562314719, Tokens per sec: 79831.68610519511, Loss: 2.272336006164551 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15029 , TFLOPS: 98.42454180826587, Tokens per sec: 80425.22678464206, Loss: 2.2516093254089355 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15030 , TFLOPS: 96.5724425852155, Tokens per sec: 78911.82883221211, Loss: 2.245253086090088 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15031 , TFLOPS: 97.03288483477132, Tokens per sec: 79288.06804716215, Loss: 2.2385189533233643 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15032 , TFLOPS: 97.81902692940088, Tokens per sec: 79930.4449897818, Loss: 2.2582268714904785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15033 , TFLOPS: 96.09312281969493, Tokens per sec: 78520.1643130168, Loss: 2.2397255897521973 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15034 , TFLOPS: 97.84218114286803, Tokens per sec: 79949.3648936483, Loss: 2.246948003768921 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15035 , TFLOPS: 96.36959156981858, Tokens per sec: 78746.07404568161, Loss: 2.2181625366210938 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15036 , TFLOPS: 97.75903075446836, Tokens per sec: 79881.42057080551, Loss: 2.26224422454834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15037 , TFLOPS: 95.43631654828684, Tokens per sec: 77983.47100095167, Loss: 2.272693157196045 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15038 , TFLOPS: 97.14573863491054, Tokens per sec: 79380.28379237116, Loss: 2.2666690349578857 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15039 , TFLOPS: 96.7148536792056, Tokens per sec: 79028.19660309913, Loss: 2.248528003692627 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15040 , TFLOPS: 96.16827591616783, Tokens per sec: 78581.57384275761, Loss: 2.2555387020111084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15041 , TFLOPS: 97.83227006125945, Tokens per sec: 79941.26629373198, Loss: 2.2655508518218994 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15042 , TFLOPS: 96.67585270378245, Tokens per sec: 78996.32790210644, Loss: 2.2606470584869385 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15043 , TFLOPS: 96.30306325826199, Tokens per sec: 78691.71204971761, Loss: 2.2344541549682617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15044 , TFLOPS: 97.6386250021965, Tokens per sec: 79783.03393110438, Loss: 2.2643682956695557 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15045 , TFLOPS: 97.85947810451091, Tokens per sec: 79963.4986862701, Loss: 2.24053692817688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15046 , TFLOPS: 96.76560299095783, Tokens per sec: 79069.66517213537, Loss: 2.2557406425476074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15047 , TFLOPS: 98.43266790136322, Tokens per sec: 80431.86682449616, Loss: 2.2792224884033203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15048 , TFLOPS: 97.13401215986636, Tokens per sec: 79370.70178774623, Loss: 2.269360065460205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15049 , TFLOPS: 96.075466435082, Tokens per sec: 78505.7368266338, Loss: 2.2513744831085205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15050 , TFLOPS: 97.76432971511491, Tokens per sec: 79885.75048795718, Loss: 2.257539987564087 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15051 , TFLOPS: 97.84641185994988, Tokens per sec: 79952.82192148422, Loss: 2.2453131675720215 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15052 , TFLOPS: 96.55944733849753, Tokens per sec: 78901.21008159146, Loss: 2.2376699447631836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15053 , TFLOPS: 97.80875887759699, Tokens per sec: 79922.05470032968, Loss: 2.232667922973633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15054 , TFLOPS: 96.74719957159509, Tokens per sec: 79054.62726442792, Loss: 2.2541518211364746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15055 , TFLOPS: 97.85146175658333, Tokens per sec: 79956.94832201932, Loss: 2.261093854904175 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15056 , TFLOPS: 96.51823407598123, Tokens per sec: 78867.53366386563, Loss: 2.2706167697906494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15057 , TFLOPS: 96.64133229578475, Tokens per sec: 78968.12038809716, Loss: 2.2621543407440186 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15058 , TFLOPS: 96.42711771403842, Tokens per sec: 78793.08013897833, Loss: 2.280695915222168 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15059 , TFLOPS: 96.7361816291811, Tokens per sec: 79045.62422005438, Loss: 2.254940986633301 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15060 , TFLOPS: 97.89370005263153, Tokens per sec: 79991.46232103076, Loss: 2.2430531978607178 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15061 , TFLOPS: 96.92209049239362, Tokens per sec: 79197.53513790522, Loss: 2.2549378871917725 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15062 , TFLOPS: 95.70638474229101, Tokens per sec: 78204.15067444628, Loss: 2.2672183513641357 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15063 , TFLOPS: 98.38102953076887, Tokens per sec: 80389.67178259359, Loss: 2.2636969089508057 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15064 , TFLOPS: 97.63695235042886, Tokens per sec: 79781.66716428714, Loss: 2.238725185394287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15065 , TFLOPS: 96.69896318721224, Tokens per sec: 79015.21207303369, Loss: 2.2443954944610596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15066 , TFLOPS: 98.4200319847088, Tokens per sec: 80421.54169172038, Loss: 2.2502548694610596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15067 , TFLOPS: 98.42230663641473, Tokens per sec: 80423.40036818405, Loss: 2.257901668548584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15068 , TFLOPS: 96.54536077772879, Tokens per sec: 78889.69959016697, Loss: 2.229466438293457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15069 , TFLOPS: 97.08866545390596, Tokens per sec: 79333.64782697793, Loss: 2.247563362121582 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15070 , TFLOPS: 97.79370184145185, Tokens per sec: 79909.7512084929, Loss: 2.2651257514953613 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15071 , TFLOPS: 96.04872687070329, Tokens per sec: 78483.88724024234, Loss: 2.27317476272583 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15072 , TFLOPS: 97.75862274692085, Tokens per sec: 79881.0871773354, Loss: 2.244520425796509 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15073 , TFLOPS: 96.63111009621223, Tokens per sec: 78959.76756568362, Loss: 2.2457752227783203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15074 , TFLOPS: 97.81469368016418, Tokens per sec: 79926.90418028274, Loss: 2.254274368286133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15075 , TFLOPS: 96.07117085983306, Tokens per sec: 78502.22680151995, Loss: 2.2623398303985596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15076 , TFLOPS: 96.52249560300196, Tokens per sec: 78871.01586729573, Loss: 2.2585678100585938 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15077 , TFLOPS: 96.73172001362717, Tokens per sec: 79041.97851913306, Loss: 2.24222469329834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15078 , TFLOPS: 96.80190830367928, Tokens per sec: 79099.33117774197, Loss: 2.2772393226623535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15079 , TFLOPS: 97.07450653123219, Tokens per sec: 79322.0782067781, Loss: 2.2622592449188232 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15080 , TFLOPS: 97.43325477167129, Tokens per sec: 79615.22062904216, Loss: 2.252131700515747 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15081 , TFLOPS: 95.62685654164936, Tokens per sec: 78139.16613446397, Loss: 2.25522518157959 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15082 , TFLOPS: 98.42420724545455, Tokens per sec: 80424.95340475652, Loss: 2.253572940826416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15083 , TFLOPS: 97.8851385116284, Tokens per sec: 79984.46646548345, Loss: 2.2491579055786133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15084 , TFLOPS: 96.59002371518314, Tokens per sec: 78926.19482608722, Loss: 2.2442209720611572 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15085 , TFLOPS: 98.39170858959305, Tokens per sec: 80398.39791646226, Loss: 2.250715732574463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15086 , TFLOPS: 96.63279372609242, Tokens per sec: 78961.14330299922, Loss: 2.2652580738067627 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15087 , TFLOPS: 96.41125934023863, Tokens per sec: 78780.12185351565, Loss: 2.2286088466644287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15088 , TFLOPS: 97.69490982304936, Tokens per sec: 79829.02570712342, Loss: 2.2529211044311523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15089 , TFLOPS: 97.17615015138566, Tokens per sec: 79405.13382534505, Loss: 2.252941608428955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15090 , TFLOPS: 96.57821120161435, Tokens per sec: 78916.54251716901, Loss: 2.244020938873291 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15091 , TFLOPS: 96.5788906283863, Tokens per sec: 78917.09769427439, Loss: 2.266490936279297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15092 , TFLOPS: 97.35012339537464, Tokens per sec: 79547.29184147816, Loss: 2.2298977375030518 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15093 , TFLOPS: 97.83282848523163, Tokens per sec: 79941.72259633477, Loss: 2.273170232772827 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15094 , TFLOPS: 96.20304374352243, Tokens per sec: 78609.98352949256, Loss: 2.2638652324676514 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15095 , TFLOPS: 96.55254035848759, Tokens per sec: 78895.56621042384, Loss: 2.228475570678711 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15096 , TFLOPS: 97.25589032742307, Tokens per sec: 79470.29157587996, Loss: 2.270583391189575 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15097 , TFLOPS: 96.81747363081965, Tokens per sec: 79112.05000723552, Loss: 2.2513554096221924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15098 , TFLOPS: 97.84084793917705, Tokens per sec: 79948.27549859269, Loss: 2.261660575866699 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15099 , TFLOPS: 96.12320830695548, Tokens per sec: 78544.74793912674, Loss: 2.257169723510742 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15100 , TFLOPS: 95.70137044114554, Tokens per sec: 78200.05335990073, Loss: 2.223923444747925 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15101 , TFLOPS: 98.45400491240119, Tokens per sec: 80449.30184547881, Loss: 2.2338476181030273 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15102 , TFLOPS: 97.87484571199096, Tokens per sec: 79976.05595393946, Loss: 2.2630648612976074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15103 , TFLOPS: 97.20581825422597, Tokens per sec: 79429.37639590078, Loss: 2.255902051925659 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15104 , TFLOPS: 97.14403006606054, Tokens per sec: 79378.88767678135, Loss: 2.2537901401519775 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15105 , TFLOPS: 98.44427181326776, Tokens per sec: 80441.34867962461, Loss: 2.272287368774414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15106 , TFLOPS: 96.61649150019014, Tokens per sec: 78947.82233455783, Loss: 2.2361056804656982 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15107 , TFLOPS: 97.25222964496805, Tokens per sec: 79467.30033801163, Loss: 2.246100902557373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15108 , TFLOPS: 97.68043360957492, Tokens per sec: 79817.19682044256, Loss: 2.2660675048828125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15109 , TFLOPS: 95.64490705510596, Tokens per sec: 78153.91567367091, Loss: 2.2456753253936768 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15110 , TFLOPS: 97.80403653654595, Tokens per sec: 79918.19595389301, Loss: 2.247715473175049 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15111 , TFLOPS: 96.53139277768013, Tokens per sec: 78878.2859777589, Loss: 2.2578539848327637 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15112 , TFLOPS: 98.39536831322688, Tokens per sec: 80401.38837085312, Loss: 2.244616746902466 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15113 , TFLOPS: 96.55324328149973, Tokens per sec: 78896.14058691192, Loss: 2.2810919284820557 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15114 , TFLOPS: 96.4339273664541, Tokens per sec: 78798.64448126323, Loss: 2.257044792175293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15115 , TFLOPS: 97.15445165153672, Tokens per sec: 79387.40342254941, Loss: 2.2357773780822754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15116 , TFLOPS: 96.57210528291247, Tokens per sec: 78911.55321381737, Loss: 2.242542028427124 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15117 , TFLOPS: 97.85845315305784, Tokens per sec: 79962.6611720532, Loss: 2.235325336456299 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15118 , TFLOPS: 96.7825196078395, Tokens per sec: 79083.48817526185, Loss: 2.2697019577026367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15119 , TFLOPS: 95.63574148021956, Tokens per sec: 78146.42624648835, Loss: 2.272040605545044 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15120 , TFLOPS: 97.67197979587942, Tokens per sec: 79810.28899165145, Loss: 2.259967088699341 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15121 , TFLOPS: 97.87741628956596, Tokens per sec: 79978.15643904854, Loss: 2.2736074924468994 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15122 , TFLOPS: 96.7385948822152, Tokens per sec: 79047.59615123119, Loss: 2.260197639465332 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15123 , TFLOPS: 98.41760153271129, Tokens per sec: 80419.55570682781, Loss: 2.244128942489624 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15124 , TFLOPS: 96.49913693910567, Tokens per sec: 78851.92890172056, Loss: 2.273411512374878 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15125 , TFLOPS: 97.22149495437037, Tokens per sec: 79442.18623114293, Loss: 2.266134023666382 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15126 , TFLOPS: 97.06254381885334, Tokens per sec: 79312.30316654574, Loss: 2.272434949874878 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15127 , TFLOPS: 97.6972705362116, Tokens per sec: 79830.95470661846, Loss: 2.264014720916748 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15128 , TFLOPS: 96.60974020085524, Tokens per sec: 78942.3056740774, Loss: 2.2354815006256104 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15129 , TFLOPS: 97.33655030524628, Tokens per sec: 79536.20092013186, Loss: 2.2662100791931152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15130 , TFLOPS: 97.46977389769232, Tokens per sec: 79645.06134699988, Loss: 2.2453341484069824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15131 , TFLOPS: 97.74728676334522, Tokens per sec: 79871.82425334156, Loss: 2.2576141357421875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15132 , TFLOPS: 96.10013955561583, Tokens per sec: 78525.89786856461, Loss: 2.2498555183410645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15133 , TFLOPS: 96.58351145680224, Tokens per sec: 78920.87349212373, Loss: 2.242570638656616 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15134 , TFLOPS: 96.89579365907849, Tokens per sec: 79176.0473184627, Loss: 2.286125421524048 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15135 , TFLOPS: 96.12526271469477, Tokens per sec: 78546.42664857567, Loss: 2.2438066005706787 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15136 , TFLOPS: 97.89661543695684, Tokens per sec: 79993.84455661164, Loss: 2.2528254985809326 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15137 , TFLOPS: 96.78001468912674, Tokens per sec: 79081.4413416993, Loss: 2.2616400718688965 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15138 , TFLOPS: 95.76653333034884, Tokens per sec: 78253.29963411072, Loss: 2.2348198890686035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15139 , TFLOPS: 98.44760288132056, Tokens per sec: 80444.07058107988, Loss: 2.261281967163086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15140 , TFLOPS: 97.93959752621495, Tokens per sec: 80028.96632820187, Loss: 2.249913215637207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15141 , TFLOPS: 96.58302376163815, Tokens per sec: 78920.47498385077, Loss: 2.2668328285217285 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15142 , TFLOPS: 97.88389965545244, Tokens per sec: 79983.45416421107, Loss: 2.2689499855041504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15143 , TFLOPS: 98.40611142394468, Tokens per sec: 80410.16684317292, Loss: 2.262409210205078 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15144 , TFLOPS: 96.49341638005639, Tokens per sec: 78847.25448566109, Loss: 2.256927251815796 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15145 , TFLOPS: 97.31170347228381, Tokens per sec: 79515.89793330384, Loss: 2.2602949142456055 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15146 , TFLOPS: 97.67800835751467, Tokens per sec: 79815.2150845526, Loss: 2.249635696411133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15147 , TFLOPS: 95.44338275237926, Tokens per sec: 77989.24497821587, Loss: 2.275670051574707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15148 , TFLOPS: 97.9338239036124, Tokens per sec: 80024.24855254705, Loss: 2.2406468391418457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15149 , TFLOPS: 96.63791522922152, Tokens per sec: 78965.32821504523, Loss: 2.2641663551330566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15150 , TFLOPS: 98.38040412318163, Tokens per sec: 80389.16074595465, Loss: 2.2329626083374023 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15151 , TFLOPS: 96.60091912211615, Tokens per sec: 78935.09774356491, Loss: 2.261712074279785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15152 , TFLOPS: 97.24881856602451, Tokens per sec: 79464.51305759787, Loss: 2.263239860534668 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15153 , TFLOPS: 96.46870833500097, Tokens per sec: 78827.06490600466, Loss: 2.232740640640259 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15154 , TFLOPS: 97.30064172433386, Tokens per sec: 79506.85909429922, Loss: 2.2511911392211914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15155 , TFLOPS: 97.13589961934578, Tokens per sec: 79372.2440794744, Loss: 2.2598624229431152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15156 , TFLOPS: 96.77090389994507, Tokens per sec: 79073.99668132639, Loss: 2.2527241706848145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15157 , TFLOPS: 96.37436634186362, Tokens per sec: 78749.97563483319, Loss: 2.255481243133545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15158 , TFLOPS: 97.83396365322744, Tokens per sec: 79942.65017132598, Loss: 2.249907970428467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15159 , TFLOPS: 97.82241529314761, Tokens per sec: 79933.21370902352, Loss: 2.244563102722168 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15160 , TFLOPS: 96.59100292429896, Tokens per sec: 78926.99496306285, Loss: 2.255157232284546 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15161 , TFLOPS: 97.88284927342973, Tokens per sec: 79982.59587002108, Loss: 2.2621512413024902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15162 , TFLOPS: 97.05201966906532, Tokens per sec: 79303.70361283822, Loss: 2.251722812652588 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15163 , TFLOPS: 97.11471992270529, Tokens per sec: 79354.93760413556, Loss: 2.2484076023101807 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15164 , TFLOPS: 96.61915041809645, Tokens per sec: 78949.99500482572, Loss: 2.252575635910034 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15165 , TFLOPS: 97.0309171261429, Tokens per sec: 79286.46018178853, Loss: 2.2390713691711426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15166 , TFLOPS: 96.55944655457652, Tokens per sec: 78901.20944102942, Loss: 2.2464306354522705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15167 , TFLOPS: 96.64268560636111, Tokens per sec: 78969.22621300642, Loss: 2.252229928970337 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15168 , TFLOPS: 97.37256189476163, Tokens per sec: 79565.62691694565, Loss: 2.2679686546325684 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15169 , TFLOPS: 97.85214540485262, Tokens per sec: 79957.50694861883, Loss: 2.269441604614258 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15170 , TFLOPS: 96.05450269909485, Tokens per sec: 78488.60681830414, Loss: 2.2585341930389404 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15171 , TFLOPS: 96.55392585819304, Tokens per sec: 78896.6983378991, Loss: 2.2526438236236572 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15172 , TFLOPS: 96.67674587290841, Tokens per sec: 78997.05773359149, Loss: 2.2557854652404785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15173 , TFLOPS: 97.37899492522095, Tokens per sec: 79570.8835117349, Loss: 2.240715503692627 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15174 , TFLOPS: 97.22251574287063, Tokens per sec: 79443.02034370393, Loss: 2.2490522861480713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15175 , TFLOPS: 96.93518874854752, Tokens per sec: 79208.23806018768, Loss: 2.259246826171875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15176 , TFLOPS: 95.03134742844854, Tokens per sec: 77652.56030829923, Loss: 2.2406413555145264 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15177 , TFLOPS: 97.92230980238607, Tokens per sec: 80014.84008403569, Loss: 2.253873348236084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15178 , TFLOPS: 98.4110444779387, Tokens per sec: 80414.19776857954, Loss: 2.2408204078674316 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15179 , TFLOPS: 96.83827437048068, Tokens per sec: 79129.04682707123, Loss: 2.2582998275756836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15180 , TFLOPS: 97.87571402144982, Tokens per sec: 79976.76547195057, Loss: 2.257875442504883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15181 , TFLOPS: 97.69499023735409, Tokens per sec: 79829.09141572188, Loss: 2.247699499130249 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15182 , TFLOPS: 97.2548567145835, Tokens per sec: 79469.4469842212, Loss: 2.228977680206299 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15183 , TFLOPS: 97.80051956235728, Tokens per sec: 79915.32214374848, Loss: 2.252045154571533 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15184 , TFLOPS: 97.7548926679193, Tokens per sec: 79878.03923376244, Loss: 2.2466773986816406 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15185 , TFLOPS: 96.07116711193434, Tokens per sec: 78502.22373901543, Loss: 2.266390562057495 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15186 , TFLOPS: 98.41471918240458, Tokens per sec: 80417.20046419391, Loss: 2.2566702365875244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15187 , TFLOPS: 97.25101148736834, Tokens per sec: 79466.30495007875, Loss: 2.2816381454467773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15188 , TFLOPS: 97.67512242584289, Tokens per sec: 79812.8569154931, Loss: 2.2330901622772217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15189 , TFLOPS: 96.12022043460996, Tokens per sec: 78542.30647171853, Loss: 2.25301456451416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15190 , TFLOPS: 96.48878081215149, Tokens per sec: 78843.46664379588, Loss: 2.2627336978912354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15191 , TFLOPS: 97.27801799525425, Tokens per sec: 79488.37266288167, Loss: 2.270143508911133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15192 , TFLOPS: 97.3281947394703, Tokens per sec: 79529.37337224491, Loss: 2.2542383670806885 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15193 , TFLOPS: 97.08122580700723, Tokens per sec: 79327.56869998343, Loss: 2.24196720123291 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15194 , TFLOPS: 96.74291387840181, Tokens per sec: 79051.12531419614, Loss: 2.2686712741851807 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15195 , TFLOPS: 95.13587792853838, Tokens per sec: 77737.9748707771, Loss: 2.24760103225708 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15196 , TFLOPS: 98.39936702443426, Tokens per sec: 80404.65582071665, Loss: 2.2544422149658203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15197 , TFLOPS: 97.15195890458911, Tokens per sec: 79385.3665348495, Loss: 2.229365110397339 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15198 , TFLOPS: 97.27061987347207, Tokens per sec: 79482.32746712885, Loss: 2.2683253288269043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15199 , TFLOPS: 97.17098045660958, Tokens per sec: 79400.90953466375, Loss: 2.2720417976379395 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15200 , TFLOPS: 97.77928663236696, Tokens per sec: 79897.97217006913, Loss: 2.257723093032837 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015200 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015200/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015200/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015200/optimizer.pt +[rank0]:[2024-08-30 02:26:38,896] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007652569998754188, 'preprocessing_with_comm': 0.0016554799949517474, 'state_converting': 2.5955489670013776, : 2.6064438039902598}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015200/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015200/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015200/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015200/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015200/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015200, took 14.74s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0013200 + eval ppl=7.440725326538086, eval loss=2.0069682598114014 +------------------------------------------------------------------ +iteration: 15201 , TFLOPS: 95.84728758139367, Tokens per sec: 78319.28601143882, Loss: 2.2583847045898438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15202 , TFLOPS: 98.50404797654879, Tokens per sec: 80490.19332141688, Loss: 2.231748104095459 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15203 , TFLOPS: 97.08825317784235, Tokens per sec: 79333.31094559342, Loss: 2.265695333480835 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15204 , TFLOPS: 97.44827395080061, Tokens per sec: 79627.49318693679, Loss: 2.2552425861358643 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15205 , TFLOPS: 97.7274296970008, Tokens per sec: 79855.59853326518, Loss: 2.274564266204834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15206 , TFLOPS: 97.02198527820342, Tokens per sec: 79279.16173891096, Loss: 2.272106170654297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15207 , TFLOPS: 97.80841013125057, Tokens per sec: 79921.76973071249, Loss: 2.238447904586792 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15208 , TFLOPS: 97.7323777595108, Tokens per sec: 79859.64172251655, Loss: 2.263819456100464 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15209 , TFLOPS: 96.42837071703627, Tokens per sec: 78794.10399998304, Loss: 2.256850004196167 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15210 , TFLOPS: 98.37109774432402, Tokens per sec: 80381.55626422257, Loss: 2.2759716510772705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15211 , TFLOPS: 97.05202381656721, Tokens per sec: 79303.70700186878, Loss: 2.2556657791137695 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15212 , TFLOPS: 98.38537376173377, Tokens per sec: 80393.22156554548, Loss: 2.246828556060791 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15213 , TFLOPS: 96.77573477354264, Tokens per sec: 79077.94411250071, Loss: 2.235438346862793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15214 , TFLOPS: 98.32883041684778, Tokens per sec: 80347.01854288393, Loss: 2.233605146408081 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15215 , TFLOPS: 96.3247844603562, Tokens per sec: 78709.46100310229, Loss: 2.2505135536193848 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15216 , TFLOPS: 97.56895501123357, Tokens per sec: 79726.10478802344, Loss: 2.265197277069092 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15217 , TFLOPS: 96.60158969124019, Tokens per sec: 78935.64568285816, Loss: 2.241199493408203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15218 , TFLOPS: 97.73509011132973, Tokens per sec: 79861.85805500996, Loss: 2.2649857997894287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15219 , TFLOPS: 97.2898071284233, Tokens per sec: 79498.0058670734, Loss: 2.246494770050049 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15220 , TFLOPS: 97.58073277793666, Tokens per sec: 79735.72870438322, Loss: 2.2650701999664307 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15221 , TFLOPS: 97.69779049209802, Tokens per sec: 79831.3795759581, Loss: 2.2773218154907227 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15222 , TFLOPS: 97.60588097705204, Tokens per sec: 79756.27794525263, Loss: 2.2481977939605713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15223 , TFLOPS: 96.74800372381523, Tokens per sec: 79055.28435790772, Loss: 2.235132932662964 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15224 , TFLOPS: 98.32934433872065, Tokens per sec: 80347.43848167578, Loss: 2.253451108932495 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15225 , TFLOPS: 97.76951435627711, Tokens per sec: 79889.98699171536, Loss: 2.267214775085449 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15226 , TFLOPS: 95.99726546586317, Tokens per sec: 78441.83680161306, Loss: 2.253220319747925 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15227 , TFLOPS: 94.47898252934144, Tokens per sec: 77201.20873010135, Loss: 2.281309127807617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15228 , TFLOPS: 96.01965897147473, Tokens per sec: 78460.13511150838, Loss: 2.2466320991516113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15229 , TFLOPS: 98.41766127304825, Tokens per sec: 80419.60452219502, Loss: 2.2569422721862793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15230 , TFLOPS: 95.62971252903633, Tokens per sec: 78141.49983527753, Loss: 2.2418582439422607 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15231 , TFLOPS: 97.78303144793277, Tokens per sec: 79901.0321552681, Loss: 2.244520664215088 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15232 , TFLOPS: 96.31290186368479, Tokens per sec: 78699.75142747683, Loss: 2.2512824535369873 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15233 , TFLOPS: 97.1099736414008, Tokens per sec: 79351.05929550152, Loss: 2.231396198272705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15234 , TFLOPS: 96.62023872276424, Tokens per sec: 78950.88428658523, Loss: 2.2455008029937744 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15235 , TFLOPS: 96.53719623829153, Tokens per sec: 78883.02813481884, Loss: 2.2691216468811035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15236 , TFLOPS: 96.39514501242918, Tokens per sec: 78766.95442144286, Loss: 2.2587451934814453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15237 , TFLOPS: 96.18065721515374, Tokens per sec: 78591.690920882, Loss: 2.233016014099121 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15238 , TFLOPS: 97.18736553119592, Tokens per sec: 79414.29820089758, Loss: 2.242032766342163 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15239 , TFLOPS: 96.32385567408564, Tokens per sec: 78708.70206793152, Loss: 2.255735158920288 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15240 , TFLOPS: 98.37112452769146, Tokens per sec: 80381.57814960157, Loss: 2.2376811504364014 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15241 , TFLOPS: 96.65360015310371, Tokens per sec: 78978.14477011524, Loss: 2.2535130977630615 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15242 , TFLOPS: 98.4075846538096, Tokens per sec: 80411.37065721984, Loss: 2.238191604614258 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15243 , TFLOPS: 97.71820139046667, Tokens per sec: 79848.0578464384, Loss: 2.2600979804992676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15244 , TFLOPS: 97.05717017182508, Tokens per sec: 79307.91222122895, Loss: 2.2382090091705322 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15245 , TFLOPS: 97.7185679300411, Tokens per sec: 79848.35735536023, Loss: 2.2473440170288086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15246 , TFLOPS: 96.77805581119686, Tokens per sec: 79079.84069211679, Loss: 2.258253812789917 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15247 , TFLOPS: 97.2219269535782, Tokens per sec: 79442.53922881631, Loss: 2.2645857334136963 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15248 , TFLOPS: 97.72298066247119, Tokens per sec: 79851.96311262269, Loss: 2.260969877243042 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15249 , TFLOPS: 97.22954759900965, Tokens per sec: 79448.7662543712, Loss: 2.273483991622925 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15250 , TFLOPS: 97.71220963991887, Tokens per sec: 79843.16183282441, Loss: 2.257903575897217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15251 , TFLOPS: 97.79365717509539, Tokens per sec: 79909.71471046323, Loss: 2.2611567974090576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15252 , TFLOPS: 98.36695232890132, Tokens per sec: 80378.16893856821, Loss: 2.2822132110595703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15253 , TFLOPS: 95.95337071749474, Tokens per sec: 78405.96927276987, Loss: 2.2321064472198486 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15254 , TFLOPS: 97.68950065355513, Tokens per sec: 79824.60573548528, Loss: 2.246840000152588 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15255 , TFLOPS: 97.7935815003811, Tokens per sec: 79909.65287470612, Loss: 2.2428507804870605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15256 , TFLOPS: 97.75732719137055, Tokens per sec: 79880.02854554467, Loss: 2.2504565715789795 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15257 , TFLOPS: 97.22618241925736, Tokens per sec: 79446.01647936818, Loss: 2.264538049697876 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15258 , TFLOPS: 97.71383790506484, Tokens per sec: 79844.49233019045, Loss: 2.266481637954712 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15259 , TFLOPS: 97.84018092167744, Tokens per sec: 79947.7304614232, Loss: 2.246645450592041 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15260 , TFLOPS: 97.10429463213178, Tokens per sec: 79346.4188308371, Loss: 2.2588040828704834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15261 , TFLOPS: 97.07545742708731, Tokens per sec: 79322.85520825956, Loss: 2.270193099975586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15262 , TFLOPS: 97.02822939314281, Tokens per sec: 79284.26396596576, Loss: 2.244534969329834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15263 , TFLOPS: 97.82324149548263, Tokens per sec: 79933.88882021978, Loss: 2.2662227153778076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15264 , TFLOPS: 96.05447351928538, Tokens per sec: 78488.58297473066, Loss: 2.2600760459899902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15265 , TFLOPS: 98.46299666933746, Tokens per sec: 80456.64924153984, Loss: 2.235771894454956 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15266 , TFLOPS: 95.96776480419992, Tokens per sec: 78417.73105155343, Loss: 2.2567477226257324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15267 , TFLOPS: 97.82868821985295, Tokens per sec: 79938.33947891394, Loss: 2.2625813484191895 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15268 , TFLOPS: 95.32587608356907, Tokens per sec: 77893.22725424031, Loss: 2.264404535293579 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15269 , TFLOPS: 97.87044811605995, Tokens per sec: 79972.46256510004, Loss: 2.2428627014160156 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15270 , TFLOPS: 96.66148648072989, Tokens per sec: 78984.58889143071, Loss: 2.2463502883911133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15271 , TFLOPS: 97.73545071538142, Tokens per sec: 79862.15271386335, Loss: 2.250948905944824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15272 , TFLOPS: 96.61426945428761, Tokens per sec: 78946.00664365033, Loss: 2.255279779434204 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15273 , TFLOPS: 97.07645641667088, Tokens per sec: 79323.67150836457, Loss: 2.2438015937805176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15274 , TFLOPS: 96.55097526411231, Tokens per sec: 78894.2873315207, Loss: 2.2537317276000977 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15275 , TFLOPS: 96.73881658604793, Tokens per sec: 79047.77731114009, Loss: 2.2533698081970215 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15276 , TFLOPS: 97.8850800367494, Tokens per sec: 79984.41868415455, Loss: 2.2475225925445557 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15277 , TFLOPS: 95.11885759500667, Tokens per sec: 77724.06711810597, Loss: 2.262246608734131 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15278 , TFLOPS: 98.38240005998435, Tokens per sec: 80390.79167729612, Loss: 2.2710790634155273 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15279 , TFLOPS: 97.11465529836231, Tokens per sec: 79354.88479792133, Loss: 2.2428741455078125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15280 , TFLOPS: 97.62282933821632, Tokens per sec: 79770.1268874496, Loss: 2.2695119380950928 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15281 , TFLOPS: 97.21283436375343, Tokens per sec: 79435.1094396064, Loss: 2.2656047344207764 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15282 , TFLOPS: 97.70120801062616, Tokens per sec: 79834.1721183222, Loss: 2.2578582763671875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15283 , TFLOPS: 97.55137753770404, Tokens per sec: 79711.74178191804, Loss: 2.253556728363037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15284 , TFLOPS: 97.79310361774331, Tokens per sec: 79909.26238450101, Loss: 2.2396178245544434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15285 , TFLOPS: 96.00269797021684, Tokens per sec: 78446.27584076556, Loss: 2.255556583404541 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15286 , TFLOPS: 98.35158058492298, Tokens per sec: 80365.60829085961, Loss: 2.248677968978882 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15287 , TFLOPS: 97.23204954597722, Tokens per sec: 79450.81065964425, Loss: 2.2765679359436035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15288 , TFLOPS: 97.80049891490917, Tokens per sec: 79915.3052721871, Loss: 2.251007318496704 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15289 , TFLOPS: 97.75843371082263, Tokens per sec: 79880.93271107331, Loss: 2.2458696365356445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15290 , TFLOPS: 97.8825567518121, Tokens per sec: 79982.35684307713, Loss: 2.242281436920166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15291 , TFLOPS: 96.54986507551529, Tokens per sec: 78893.38016783926, Loss: 2.267402172088623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15292 , TFLOPS: 97.15913077226556, Tokens per sec: 79391.22685254822, Loss: 2.247037887573242 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15293 , TFLOPS: 97.20134982049069, Tokens per sec: 79425.72512366797, Loss: 2.2506637573242188 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15294 , TFLOPS: 97.15392452413683, Tokens per sec: 79386.97269318148, Loss: 2.243777275085449 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15295 , TFLOPS: 97.36580613412106, Tokens per sec: 79560.10661101749, Loss: 2.2686195373535156 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15296 , TFLOPS: 97.87516607968334, Tokens per sec: 79976.3177346279, Loss: 2.238924980163574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15297 , TFLOPS: 97.79158127056854, Tokens per sec: 79908.01843543579, Loss: 2.250537157058716 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15298 , TFLOPS: 97.09656558104751, Tokens per sec: 79340.10322423265, Loss: 2.254673957824707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15299 , TFLOPS: 96.80165028443054, Tokens per sec: 79099.1203435718, Loss: 2.245368480682373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15300 , TFLOPS: 98.3836382200367, Tokens per sec: 80391.80340974798, Loss: 2.254258155822754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15301 , TFLOPS: 97.21930677220826, Tokens per sec: 79440.39821116945, Loss: 2.2656803131103516 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15302 , TFLOPS: 96.5042149637976, Tokens per sec: 78856.07828641632, Loss: 2.254298210144043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15303 , TFLOPS: 98.38141602143756, Tokens per sec: 80389.98759406814, Loss: 2.2529678344726562 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15304 , TFLOPS: 95.88255266558853, Tokens per sec: 78348.10201953848, Loss: 2.245371103286743 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15305 , TFLOPS: 98.3982172084555, Tokens per sec: 80403.71627648153, Loss: 2.27699613571167 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15306 , TFLOPS: 95.72726613911087, Tokens per sec: 78221.21340132276, Loss: 2.2508184909820557 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15307 , TFLOPS: 98.39005956597246, Tokens per sec: 80397.05045681224, Loss: 2.245924472808838 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15308 , TFLOPS: 96.7408777264637, Tokens per sec: 79049.46152203227, Loss: 2.2528369426727295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15309 , TFLOPS: 96.97252395800855, Tokens per sec: 79238.74562093207, Loss: 2.2693605422973633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15310 , TFLOPS: 96.00228669151197, Tokens per sec: 78445.93977434855, Loss: 2.252955198287964 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15311 , TFLOPS: 97.70167633954013, Tokens per sec: 79834.55480193366, Loss: 2.2587554454803467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15312 , TFLOPS: 96.88609918950613, Tokens per sec: 79168.12571781728, Loss: 2.256903648376465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15313 , TFLOPS: 96.16871598993103, Tokens per sec: 78581.93343835804, Loss: 2.2430009841918945 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15314 , TFLOPS: 97.22647452796761, Tokens per sec: 79446.25516891487, Loss: 2.266122341156006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15315 , TFLOPS: 96.34747528182139, Tokens per sec: 78728.00225743519, Loss: 2.264207363128662 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15316 , TFLOPS: 98.38689507121717, Tokens per sec: 80394.46466668634, Loss: 2.2401864528656006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15317 , TFLOPS: 96.6285597821125, Tokens per sec: 78957.68363838182, Loss: 2.2550289630889893 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15318 , TFLOPS: 97.74435655427249, Tokens per sec: 79869.42990407794, Loss: 2.250938653945923 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15319 , TFLOPS: 97.74719172556999, Tokens per sec: 79871.74659552894, Loss: 2.26430082321167 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15320 , TFLOPS: 97.2174786709689, Tokens per sec: 79438.90442258726, Loss: 2.2755467891693115 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15321 , TFLOPS: 97.75176633301822, Tokens per sec: 79875.48462503577, Loss: 2.263958692550659 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15322 , TFLOPS: 97.0945426865761, Tokens per sec: 79338.45026508614, Loss: 2.2658138275146484 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15323 , TFLOPS: 97.19504393153082, Tokens per sec: 79420.57241947083, Loss: 2.2335329055786133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15324 , TFLOPS: 97.69577678553424, Tokens per sec: 79829.73412448741, Loss: 2.2494869232177734 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15325 , TFLOPS: 97.2700936061105, Tokens per sec: 79481.89744052039, Loss: 2.2411203384399414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15326 , TFLOPS: 98.35022930633882, Tokens per sec: 80364.50412634348, Loss: 2.2658283710479736 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15327 , TFLOPS: 96.65688496579344, Tokens per sec: 78980.8288751226, Loss: 2.274237871170044 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15328 , TFLOPS: 97.76268526043364, Tokens per sec: 79884.40676170509, Loss: 2.241945743560791 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15329 , TFLOPS: 97.07481942724252, Tokens per sec: 79322.3338821628, Loss: 2.256648540496826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15330 , TFLOPS: 97.19334760092423, Tokens per sec: 79419.18630406476, Loss: 2.2665035724639893 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15331 , TFLOPS: 96.48516551907889, Tokens per sec: 78840.51249476033, Loss: 2.2671985626220703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15332 , TFLOPS: 97.85822357395354, Tokens per sec: 79962.47357705717, Loss: 2.2418761253356934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15333 , TFLOPS: 96.1677726028726, Tokens per sec: 78581.16257250766, Loss: 2.258981704711914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15334 , TFLOPS: 97.73139036982595, Tokens per sec: 79858.8349009874, Loss: 2.245903968811035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15335 , TFLOPS: 97.83777999526545, Tokens per sec: 79945.76860264661, Loss: 2.2605583667755127 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15336 , TFLOPS: 96.5408225133732, Tokens per sec: 78885.9912575367, Loss: 2.250011920928955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15337 , TFLOPS: 97.82803853441553, Tokens per sec: 79937.80860421863, Loss: 2.2479515075683594 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15338 , TFLOPS: 97.80279078442078, Tokens per sec: 79917.17801776301, Loss: 2.2786145210266113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15339 , TFLOPS: 97.86363933028952, Tokens per sec: 79966.89893097329, Loss: 2.2615199089050293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15340 , TFLOPS: 96.63710818900465, Tokens per sec: 78964.66876170896, Loss: 2.255990743637085 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15341 , TFLOPS: 97.84196593730928, Tokens per sec: 79949.18904364637, Loss: 2.2710916996002197 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15342 , TFLOPS: 96.56064519558531, Tokens per sec: 78902.18888145348, Loss: 2.247594118118286 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15343 , TFLOPS: 98.33731252925703, Tokens per sec: 80353.94949528255, Loss: 2.2704405784606934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15344 , TFLOPS: 96.08887785648167, Tokens per sec: 78516.69564430017, Loss: 2.261671781539917 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15345 , TFLOPS: 97.21177068986887, Tokens per sec: 79434.24028429392, Loss: 2.2671353816986084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15346 , TFLOPS: 96.81809322538199, Tokens per sec: 79112.55629390219, Loss: 2.2608771324157715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15347 , TFLOPS: 97.3125737143903, Tokens per sec: 79516.60903053108, Loss: 2.2393710613250732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15348 , TFLOPS: 97.11448547318135, Tokens per sec: 79354.74602939429, Loss: 2.2459945678710938 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15349 , TFLOPS: 97.04412501168474, Tokens per sec: 79297.25268506598, Loss: 2.2423300743103027 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15350 , TFLOPS: 97.19346259794304, Tokens per sec: 79419.28027108913, Loss: 2.26344895362854 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15351 , TFLOPS: 96.60044703598814, Tokens per sec: 78934.71198983713, Loss: 2.2395479679107666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15352 , TFLOPS: 97.27379488187576, Tokens per sec: 79484.92184822749, Loss: 2.2390825748443604 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15353 , TFLOPS: 96.68033453109935, Tokens per sec: 78999.99011857958, Loss: 2.2515416145324707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15354 , TFLOPS: 97.77863073139956, Tokens per sec: 79897.43621650428, Loss: 2.2520058155059814 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15355 , TFLOPS: 96.43437080147075, Tokens per sec: 78799.00682343038, Loss: 2.2510745525360107 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15356 , TFLOPS: 97.73315009703222, Tokens per sec: 79860.27281938704, Loss: 2.234025716781616 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15357 , TFLOPS: 97.18834228824731, Tokens per sec: 79415.09633422825, Loss: 2.2854044437408447 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15358 , TFLOPS: 97.8467464832972, Tokens per sec: 79953.0953508353, Loss: 2.2721328735351562 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15359 , TFLOPS: 97.8779871204355, Tokens per sec: 79978.62287964656, Loss: 2.2545154094696045 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15360 , TFLOPS: 96.4804270051381, Tokens per sec: 78836.64053303911, Loss: 2.2633421421051025 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15361 , TFLOPS: 97.21325055915874, Tokens per sec: 79435.44952358589, Loss: 2.2471723556518555 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15362 , TFLOPS: 97.81007589409388, Tokens per sec: 79923.13086841232, Loss: 2.2417304515838623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15363 , TFLOPS: 97.07651188703916, Tokens per sec: 79323.71683463042, Loss: 2.2502939701080322 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15364 , TFLOPS: 97.8835504207305, Tokens per sec: 79983.16879552968, Loss: 2.2486746311187744 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15365 , TFLOPS: 96.78307731895765, Tokens per sec: 79083.94389537325, Loss: 2.2403371334075928 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15366 , TFLOPS: 97.7034989922912, Tokens per sec: 79836.04413841582, Loss: 2.270693302154541 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15367 , TFLOPS: 98.37536269420457, Tokens per sec: 80385.04126455956, Loss: 2.236917018890381 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15368 , TFLOPS: 96.46155516465772, Tokens per sec: 78821.2198663782, Loss: 2.246335506439209 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15369 , TFLOPS: 96.74125538399751, Tokens per sec: 79049.77011572494, Loss: 2.2529807090759277 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15370 , TFLOPS: 97.62388521704179, Tokens per sec: 79770.98967321863, Loss: 2.2353854179382324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15371 , TFLOPS: 97.76440332649234, Tokens per sec: 79885.81063770861, Loss: 2.2727627754211426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15372 , TFLOPS: 96.54860306118123, Tokens per sec: 78892.34894343978, Loss: 2.255976915359497 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15373 , TFLOPS: 97.24191581850492, Tokens per sec: 79458.87264490708, Loss: 2.2430689334869385 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15374 , TFLOPS: 96.00477011672508, Tokens per sec: 78447.96904501923, Loss: 2.250276803970337 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15375 , TFLOPS: 97.7811963764883, Tokens per sec: 79899.53267115187, Loss: 2.25812029838562 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15376 , TFLOPS: 96.83743525725671, Tokens per sec: 79128.36116605523, Loss: 2.2427866458892822 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15377 , TFLOPS: 97.18848920526939, Tokens per sec: 79415.21638390895, Loss: 2.2578771114349365 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15378 , TFLOPS: 95.53294791547802, Tokens per sec: 78062.43097859628, Loss: 2.281033515930176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15379 , TFLOPS: 98.38079215218457, Tokens per sec: 80389.47781444169, Loss: 2.2597601413726807 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15380 , TFLOPS: 96.64802898648803, Tokens per sec: 78973.59242645897, Loss: 2.2569098472595215 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15381 , TFLOPS: 97.0090051195641, Tokens per sec: 79268.55531714775, Loss: 2.2421822547912598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15382 , TFLOPS: 96.30404768455999, Tokens per sec: 78692.5164497871, Loss: 2.2671058177948 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15383 , TFLOPS: 96.86075146585114, Tokens per sec: 79147.41344031034, Loss: 2.2416586875915527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15384 , TFLOPS: 97.01724876608723, Tokens per sec: 79275.29141293214, Loss: 2.2530314922332764 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15385 , TFLOPS: 96.14510744028306, Tokens per sec: 78562.64228470255, Loss: 2.2433905601501465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15386 , TFLOPS: 96.54002833166173, Tokens per sec: 78885.34231121621, Loss: 2.2387032508850098 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15387 , TFLOPS: 97.17778212611435, Tokens per sec: 79406.46735390663, Loss: 2.2633509635925293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15388 , TFLOPS: 97.07245189615864, Tokens per sec: 79320.39931156849, Loss: 2.2725882530212402 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15389 , TFLOPS: 97.40325369112892, Tokens per sec: 79590.70597383448, Loss: 2.2559714317321777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15390 , TFLOPS: 97.21074323493812, Tokens per sec: 79433.40072442098, Loss: 2.252056360244751 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15391 , TFLOPS: 95.53173578597281, Tokens per sec: 78061.44051637458, Loss: 2.2328810691833496 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15392 , TFLOPS: 97.83756112482398, Tokens per sec: 79945.58975797467, Loss: 2.2696893215179443 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15393 , TFLOPS: 96.19401570884534, Tokens per sec: 78602.60648995564, Loss: 2.2585549354553223 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15394 , TFLOPS: 97.83154809978112, Tokens per sec: 79940.6763604231, Loss: 2.23953914642334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15395 , TFLOPS: 97.23996761950922, Tokens per sec: 79457.2807213566, Loss: 2.252399444580078 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15396 , TFLOPS: 97.31223385245717, Tokens per sec: 79516.3313205967, Loss: 2.273041248321533 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15397 , TFLOPS: 96.69373918897853, Tokens per sec: 79010.94340959925, Loss: 2.2608683109283447 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15398 , TFLOPS: 97.17981977179201, Tokens per sec: 79408.13236664353, Loss: 2.231656789779663 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15399 , TFLOPS: 98.41436606512245, Tokens per sec: 80416.91192297263, Loss: 2.2586312294006348 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15400 , TFLOPS: 97.17296383589766, Tokens per sec: 79402.53020493676, Loss: 2.2628209590911865 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015400 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015400/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015400/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015400/optimizer.pt +[rank0]:[2024-08-30 03:22:03,058] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007705546988290735, 'preprocessing_with_comm': 0.0016737190017011017, 'state_converting': 2.6382416830019793, : 2.649255092997919}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015400/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015400/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015400/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015400/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015400/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015400, took 15.09s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0013400 + eval ppl=7.760585308074951, eval loss=2.049057722091675 +------------------------------------------------------------------ +iteration: 15401 , TFLOPS: 95.7863082631685, Tokens per sec: 78269.45824076975, Loss: 2.2525038719177246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15402 , TFLOPS: 94.35287988593775, Tokens per sec: 77098.16701400538, Loss: 2.2716927528381348 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15403 , TFLOPS: 97.88665205864604, Tokens per sec: 79985.70322371373, Loss: 2.2450966835021973 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15404 , TFLOPS: 96.67374889715664, Tokens per sec: 78994.6088275557, Loss: 2.2680747509002686 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15405 , TFLOPS: 96.1740236703706, Tokens per sec: 78586.27048067706, Loss: 2.2502987384796143 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15406 , TFLOPS: 98.39790789184644, Tokens per sec: 80403.46352591774, Loss: 2.2452545166015625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15407 , TFLOPS: 96.69246902600267, Tokens per sec: 79009.90552673477, Loss: 2.2525017261505127 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15408 , TFLOPS: 97.3733540721153, Tokens per sec: 79566.27422545379, Loss: 2.237856149673462 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15409 , TFLOPS: 98.41257011001181, Tokens per sec: 80415.44440181975, Loss: 2.2623023986816406 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15410 , TFLOPS: 97.04174069612273, Tokens per sec: 79295.30439944252, Loss: 2.2338929176330566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15411 , TFLOPS: 97.51033052334392, Tokens per sec: 79678.20120984025, Loss: 2.260857582092285 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15412 , TFLOPS: 98.39963147232798, Tokens per sec: 80404.87190789812, Loss: 2.272589683532715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15413 , TFLOPS: 98.22661482498307, Tokens per sec: 80263.49555150786, Loss: 2.2264723777770996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15414 , TFLOPS: 97.37587592182065, Tokens per sec: 79568.33489376621, Loss: 2.2918124198913574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15415 , TFLOPS: 96.35948385569476, Tokens per sec: 78737.81477227533, Loss: 2.25059175491333 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15416 , TFLOPS: 98.43484816977461, Tokens per sec: 80433.6483779402, Loss: 2.2575864791870117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15417 , TFLOPS: 97.10844937174342, Tokens per sec: 79349.81377552627, Loss: 2.2676453590393066 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15418 , TFLOPS: 96.53668783128778, Tokens per sec: 78882.61270236854, Loss: 2.2358288764953613 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15419 , TFLOPS: 97.76611330941859, Tokens per sec: 79887.20790877653, Loss: 2.239975690841675 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15420 , TFLOPS: 98.27961652625402, Tokens per sec: 80306.80460600153, Loss: 2.2473018169403076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15421 , TFLOPS: 95.97452568970917, Tokens per sec: 78423.25554514371, Loss: 2.266489267349243 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15422 , TFLOPS: 97.17221586981697, Tokens per sec: 79401.91902259785, Loss: 2.2652387619018555 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15423 , TFLOPS: 97.79583256492181, Tokens per sec: 79911.4922774899, Loss: 2.2315711975097656 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15424 , TFLOPS: 96.72130998143999, Tokens per sec: 79033.47221385484, Loss: 2.241159439086914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15425 , TFLOPS: 97.70353308273094, Tokens per sec: 79836.0719945917, Loss: 2.2673707008361816 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15426 , TFLOPS: 96.72559627201025, Tokens per sec: 79036.97465221873, Loss: 2.2606008052825928 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15427 , TFLOPS: 96.65355638836489, Tokens per sec: 78978.10900882052, Loss: 2.2587153911590576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15428 , TFLOPS: 97.31915279249739, Tokens per sec: 79521.98496461293, Loss: 2.2431838512420654 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15429 , TFLOPS: 96.47218415600119, Tokens per sec: 78829.90508881953, Loss: 2.2521371841430664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15430 , TFLOPS: 95.99569480749454, Tokens per sec: 78440.55337622772, Loss: 2.235738515853882 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15431 , TFLOPS: 97.10380548055612, Tokens per sec: 79346.01913249273, Loss: 2.255356788635254 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15432 , TFLOPS: 96.53292307082249, Tokens per sec: 78879.53641967879, Loss: 2.253931999206543 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15433 , TFLOPS: 96.21778425739123, Tokens per sec: 78622.02838282935, Loss: 2.2482151985168457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15434 , TFLOPS: 95.42545552667586, Tokens per sec: 77974.59618060577, Loss: 2.2628602981567383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15435 , TFLOPS: 97.20603514291753, Tokens per sec: 79429.55362123386, Loss: 2.259873151779175 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15436 , TFLOPS: 97.22307357963453, Tokens per sec: 79443.4761664837, Loss: 2.240748405456543 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15437 , TFLOPS: 97.14352350499175, Tokens per sec: 79378.47375269199, Loss: 2.2479653358459473 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15438 , TFLOPS: 96.6359199558584, Tokens per sec: 78963.69782581732, Loss: 2.2600626945495605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15439 , TFLOPS: 97.29703233366476, Tokens per sec: 79503.90976826951, Loss: 2.2591159343719482 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15440 , TFLOPS: 96.00845800514165, Tokens per sec: 78450.9825135791, Loss: 2.2737936973571777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15441 , TFLOPS: 97.14262154920013, Tokens per sec: 79377.73674139625, Loss: 2.2663283348083496 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15442 , TFLOPS: 95.86642042860167, Tokens per sec: 78334.91995341578, Loss: 2.2800893783569336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15443 , TFLOPS: 96.28472057058114, Tokens per sec: 78676.72376743081, Loss: 2.259627103805542 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15444 , TFLOPS: 97.70595060680841, Tokens per sec: 79838.047415748, Loss: 2.2804174423217773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15445 , TFLOPS: 96.90316597928782, Tokens per sec: 79182.07143108615, Loss: 2.268141269683838 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15446 , TFLOPS: 97.94524634120648, Tokens per sec: 80033.58212034487, Loss: 2.238168478012085 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15447 , TFLOPS: 97.88862866762726, Tokens per sec: 79987.31836179472, Loss: 2.269704818725586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15448 , TFLOPS: 97.74005492475153, Tokens per sec: 79865.91493186257, Loss: 2.2536959648132324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15449 , TFLOPS: 97.77815716298723, Tokens per sec: 79897.04925156383, Loss: 2.252572536468506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15450 , TFLOPS: 97.81065836294862, Tokens per sec: 79923.6068187076, Loss: 2.2521071434020996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15451 , TFLOPS: 98.30501706218062, Tokens per sec: 80327.56003777513, Loss: 2.2642033100128174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15452 , TFLOPS: 97.70713157564924, Tokens per sec: 79839.01241578869, Loss: 2.2432901859283447 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15453 , TFLOPS: 95.14789683065645, Tokens per sec: 77747.79582509185, Loss: 2.2473623752593994 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15454 , TFLOPS: 98.37620090946064, Tokens per sec: 80385.72619182286, Loss: 2.2609801292419434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15455 , TFLOPS: 97.76660652517089, Tokens per sec: 79887.61092806423, Loss: 2.247407913208008 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15456 , TFLOPS: 97.20441820213397, Tokens per sec: 79428.23237729586, Loss: 2.2591006755828857 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15457 , TFLOPS: 97.2525341626277, Tokens per sec: 79467.5491672303, Loss: 2.256512403488159 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15458 , TFLOPS: 97.89357374164388, Tokens per sec: 79991.35910907123, Loss: 2.2536067962646484 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15459 , TFLOPS: 96.4636446691798, Tokens per sec: 78822.92725431183, Loss: 2.2580928802490234 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15460 , TFLOPS: 96.54561255033445, Tokens per sec: 78889.90532004427, Loss: 2.249535322189331 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15461 , TFLOPS: 98.38159914983271, Tokens per sec: 80390.13723299395, Loss: 2.2377829551696777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15462 , TFLOPS: 95.88009443030936, Tokens per sec: 78346.09333221125, Loss: 2.253354787826538 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15463 , TFLOPS: 97.16380765671487, Tokens per sec: 79395.04845522548, Loss: 2.2611801624298096 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15464 , TFLOPS: 93.96323667977272, Tokens per sec: 76779.77951993947, Loss: 2.2737486362457275 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15465 , TFLOPS: 96.88836188455299, Tokens per sec: 79169.97462418703, Loss: 2.2525999546051025 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15466 , TFLOPS: 96.71060179409898, Tokens per sec: 79024.72227832524, Loss: 2.271517276763916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15467 , TFLOPS: 97.75365503521982, Tokens per sec: 79877.0279322242, Loss: 2.250288248062134 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15468 , TFLOPS: 95.49314375852754, Tokens per sec: 78029.9059773019, Loss: 2.273512125015259 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15469 , TFLOPS: 97.86208755765222, Tokens per sec: 79965.63093760516, Loss: 2.240755796432495 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15470 , TFLOPS: 97.1471977384107, Tokens per sec: 79381.4760634018, Loss: 2.2737889289855957 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15471 , TFLOPS: 95.52944258237524, Tokens per sec: 78059.56668068269, Loss: 2.2669126987457275 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15472 , TFLOPS: 97.05576383822529, Tokens per sec: 79306.76306984233, Loss: 2.2694613933563232 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15473 , TFLOPS: 96.69559434887323, Tokens per sec: 79012.45930850528, Loss: 2.2532007694244385 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15474 , TFLOPS: 97.05297645426235, Tokens per sec: 79304.48542665257, Loss: 2.268826484680176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15475 , TFLOPS: 97.33767135381294, Tokens per sec: 79537.11695777404, Loss: 2.2460360527038574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15476 , TFLOPS: 96.59261716934748, Tokens per sec: 78928.31400424635, Loss: 2.2403626441955566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15477 , TFLOPS: 97.7719026873702, Tokens per sec: 79891.93855853262, Loss: 2.2676849365234375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15478 , TFLOPS: 96.03778152228449, Tokens per sec: 78474.94351429101, Loss: 2.270291566848755 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15479 , TFLOPS: 97.7107837780734, Tokens per sec: 79841.99672440553, Loss: 2.231536865234375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15480 , TFLOPS: 96.10258948997026, Tokens per sec: 78527.89977299253, Loss: 2.2614431381225586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15481 , TFLOPS: 95.58448775357373, Tokens per sec: 78104.54550705774, Loss: 2.2468771934509277 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15482 , TFLOPS: 97.85144439134555, Tokens per sec: 79956.93413243651, Loss: 2.2491273880004883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15483 , TFLOPS: 97.21673862503995, Tokens per sec: 79438.29971200823, Loss: 2.2304677963256836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15484 , TFLOPS: 97.22217018732655, Tokens per sec: 79442.73798137347, Loss: 2.2860684394836426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15485 , TFLOPS: 96.61679297412077, Tokens per sec: 78948.06867666719, Loss: 2.259948968887329 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15486 , TFLOPS: 98.41410004209766, Tokens per sec: 80416.69454871105, Loss: 2.257485866546631 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15487 , TFLOPS: 97.85059428297306, Tokens per sec: 79956.23948700161, Loss: 2.2386016845703125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15488 , TFLOPS: 97.31750397427658, Tokens per sec: 79520.63767280034, Loss: 2.2679905891418457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15489 , TFLOPS: 98.44149760133794, Tokens per sec: 80439.0817996422, Loss: 2.2540464401245117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15490 , TFLOPS: 97.75301688590278, Tokens per sec: 79876.50648398983, Loss: 2.2670443058013916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15491 , TFLOPS: 95.98824848873815, Tokens per sec: 78434.46879748657, Loss: 2.2594966888427734 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15492 , TFLOPS: 98.3239392652317, Tokens per sec: 80343.021856989, Loss: 2.267988443374634 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15493 , TFLOPS: 97.79495461108863, Tokens per sec: 79910.77487881227, Loss: 2.2561450004577637 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15494 , TFLOPS: 96.49046386397514, Tokens per sec: 78844.84190876658, Loss: 2.256807327270508 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15495 , TFLOPS: 97.25800163250304, Tokens per sec: 79472.01677761064, Loss: 2.2558114528656006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15496 , TFLOPS: 97.29236817797836, Tokens per sec: 79500.0985665921, Loss: 2.250976800918579 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15497 , TFLOPS: 97.69796710720111, Tokens per sec: 79831.52389270531, Loss: 2.269540548324585 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15498 , TFLOPS: 96.05222421294, Tokens per sec: 78486.74500861355, Loss: 2.2498838901519775 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15499 , TFLOPS: 97.7588787741925, Tokens per sec: 79881.29638380978, Loss: 2.258485794067383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15500 , TFLOPS: 96.1725440385409, Tokens per sec: 78585.0614354199, Loss: 2.242521286010742 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15501 , TFLOPS: 97.25889697764048, Tokens per sec: 79472.74838717068, Loss: 2.263902187347412 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15502 , TFLOPS: 97.38492305525448, Tokens per sec: 79575.72753938893, Loss: 2.253641366958618 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15503 , TFLOPS: 95.66414451363988, Tokens per sec: 78169.63509625521, Loss: 2.256546974182129 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15504 , TFLOPS: 97.24975128639203, Tokens per sec: 79465.27520742052, Loss: 2.242386817932129 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15505 , TFLOPS: 97.30309480387052, Tokens per sec: 79508.8635687365, Loss: 2.233555793762207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15506 , TFLOPS: 95.95794707930695, Tokens per sec: 78409.70873581264, Loss: 2.241624355316162 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15507 , TFLOPS: 98.43153127624556, Tokens per sec: 80430.9380588532, Loss: 2.2449910640716553 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15508 , TFLOPS: 97.20981086254045, Tokens per sec: 79432.63885893344, Loss: 2.2683587074279785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15509 , TFLOPS: 95.94876237965316, Tokens per sec: 78402.20368129035, Loss: 2.258180618286133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15510 , TFLOPS: 97.61758927288636, Tokens per sec: 79765.84509517721, Loss: 2.2404603958129883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15511 , TFLOPS: 96.17100159605377, Tokens per sec: 78583.80106595765, Loss: 2.2511134147644043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15512 , TFLOPS: 97.709404551, Tokens per sec: 79840.86972245939, Loss: 2.250917911529541 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15513 , TFLOPS: 95.89499279138708, Tokens per sec: 78358.2671665658, Loss: 2.2275052070617676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15514 , TFLOPS: 97.05176168165575, Tokens per sec: 79303.49280468468, Loss: 2.2429213523864746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15515 , TFLOPS: 97.07582164346762, Tokens per sec: 79323.15281883968, Loss: 2.263970375061035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15516 , TFLOPS: 96.60847731828459, Tokens per sec: 78941.27374021942, Loss: 2.2424497604370117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15517 , TFLOPS: 96.57959962842563, Tokens per sec: 78917.6770364578, Loss: 2.272906541824341 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15518 , TFLOPS: 96.72312799424371, Tokens per sec: 79034.95775891657, Loss: 2.247236490249634 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15519 , TFLOPS: 95.69011398652326, Tokens per sec: 78190.85542106193, Loss: 2.2680747509002686 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15520 , TFLOPS: 97.68265055038978, Tokens per sec: 79819.00833985153, Loss: 2.255441665649414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15521 , TFLOPS: 96.59569322899749, Tokens per sec: 78930.82753177121, Loss: 2.278140068054199 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15522 , TFLOPS: 97.90377686351485, Tokens per sec: 79999.69634260373, Loss: 2.2597732543945312 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15523 , TFLOPS: 97.2783956795984, Tokens per sec: 79488.68127848174, Loss: 2.25146222114563 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15524 , TFLOPS: 97.7630851170395, Tokens per sec: 79884.73349483096, Loss: 2.2692763805389404 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15525 , TFLOPS: 98.460834801041, Tokens per sec: 80454.88272330338, Loss: 2.259194850921631 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15526 , TFLOPS: 97.24646532206363, Tokens per sec: 79462.59016137954, Loss: 2.2481586933135986 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15527 , TFLOPS: 97.85400339882051, Tokens per sec: 79959.025163319, Loss: 2.26815128326416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15528 , TFLOPS: 97.20988887330621, Tokens per sec: 79432.7026035383, Loss: 2.255547523498535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15529 , TFLOPS: 96.48179895111697, Tokens per sec: 78837.76158541549, Loss: 2.2382678985595703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15530 , TFLOPS: 97.18745130064038, Tokens per sec: 79414.36828531855, Loss: 2.271834135055542 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15531 , TFLOPS: 97.69371384952036, Tokens per sec: 79828.04844636579, Loss: 2.2209417819976807 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15532 , TFLOPS: 96.54166182860686, Tokens per sec: 78886.67708362003, Loss: 2.2593350410461426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15533 , TFLOPS: 97.7104139848585, Tokens per sec: 79841.6945568503, Loss: 2.2451179027557373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15534 , TFLOPS: 96.80653796075026, Tokens per sec: 79103.11418971264, Loss: 2.2408015727996826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15535 , TFLOPS: 97.29309019183279, Tokens per sec: 79500.68854269884, Loss: 2.255357265472412 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15536 , TFLOPS: 95.9332733700915, Tokens per sec: 78389.54721285493, Loss: 2.261472225189209 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15537 , TFLOPS: 97.82109904685572, Tokens per sec: 79932.13817029522, Loss: 2.2423288822174072 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15538 , TFLOPS: 96.69676031373243, Tokens per sec: 79013.41204840633, Loss: 2.253652572631836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15539 , TFLOPS: 97.17305182645718, Tokens per sec: 79402.60210428802, Loss: 2.2473196983337402 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15540 , TFLOPS: 97.30467486555492, Tokens per sec: 79510.15467781323, Loss: 2.2266252040863037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15541 , TFLOPS: 96.33319269043959, Tokens per sec: 78716.33158435053, Loss: 2.262364387512207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15542 , TFLOPS: 97.89592404046708, Tokens per sec: 79993.27959874175, Loss: 2.266252040863037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15543 , TFLOPS: 97.35438591203834, Tokens per sec: 79550.77485357101, Loss: 2.2343897819519043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15544 , TFLOPS: 97.10992371827551, Tokens per sec: 79351.0185020307, Loss: 2.2545013427734375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15545 , TFLOPS: 97.83709169503183, Tokens per sec: 79945.2061748073, Loss: 2.2570981979370117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15546 , TFLOPS: 97.67067618005463, Tokens per sec: 79809.22377360296, Loss: 2.2465903759002686 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15547 , TFLOPS: 96.48159942821331, Tokens per sec: 78837.59855011472, Loss: 2.2525038719177246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15548 , TFLOPS: 97.63686137437963, Tokens per sec: 79781.59282541538, Loss: 2.2300314903259277 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15549 , TFLOPS: 96.71139794595275, Tokens per sec: 79025.37283449974, Loss: 2.236361265182495 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15550 , TFLOPS: 97.11922369525276, Tokens per sec: 79358.61775262152, Loss: 2.255518913269043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15551 , TFLOPS: 96.59982165739441, Tokens per sec: 78934.20097688952, Loss: 2.248378276824951 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15552 , TFLOPS: 96.48205908869595, Tokens per sec: 78837.97415052779, Loss: 2.252589464187622 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15553 , TFLOPS: 97.27157617693366, Tokens per sec: 79483.10888730477, Loss: 2.2506394386291504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15554 , TFLOPS: 96.64437126263503, Tokens per sec: 78970.6036061405, Loss: 2.2378041744232178 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15555 , TFLOPS: 97.1237666133954, Tokens per sec: 79362.32988798099, Loss: 2.2607016563415527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15556 , TFLOPS: 95.46900675160292, Tokens per sec: 78010.18300760194, Loss: 2.25187611579895 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15557 , TFLOPS: 96.18126117853393, Tokens per sec: 78592.18443490735, Loss: 2.275369167327881 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15558 , TFLOPS: 97.22043272727859, Tokens per sec: 79441.3182580421, Loss: 2.24501371383667 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15559 , TFLOPS: 96.72984751182871, Tokens per sec: 79040.44844971104, Loss: 2.2887420654296875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15560 , TFLOPS: 96.73204395004407, Tokens per sec: 79042.24321591818, Loss: 2.274662494659424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15561 , TFLOPS: 97.85542543436682, Tokens per sec: 79960.18714517035, Loss: 2.251486301422119 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15562 , TFLOPS: 96.65764157964688, Tokens per sec: 78981.44712377923, Loss: 2.2599072456359863 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15563 , TFLOPS: 98.41483387862978, Tokens per sec: 80417.29418543207, Loss: 2.2814574241638184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15564 , TFLOPS: 96.63354604304328, Tokens per sec: 78961.75804054613, Loss: 2.2421066761016846 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15565 , TFLOPS: 97.80910584511861, Tokens per sec: 79922.33821642333, Loss: 2.244518756866455 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15566 , TFLOPS: 97.8759699464773, Tokens per sec: 79976.97459487861, Loss: 2.2690892219543457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15567 , TFLOPS: 96.31545366107682, Tokens per sec: 78701.83656681496, Loss: 2.2445833683013916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15568 , TFLOPS: 98.37856966023938, Tokens per sec: 80387.66175906124, Loss: 2.2696473598480225 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15569 , TFLOPS: 96.10584497500393, Tokens per sec: 78530.5599136172, Loss: 2.252250909805298 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15570 , TFLOPS: 97.1829094052174, Tokens per sec: 79410.65698564965, Loss: 2.256906509399414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15571 , TFLOPS: 97.63956264978798, Tokens per sec: 79783.80010708857, Loss: 2.2858328819274902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15572 , TFLOPS: 97.39646137214127, Tokens per sec: 79585.15579513983, Loss: 2.2669503688812256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15573 , TFLOPS: 97.17232774117467, Tokens per sec: 79402.01043556407, Loss: 2.239999771118164 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15574 , TFLOPS: 96.69978930877957, Tokens per sec: 79015.88711823263, Loss: 2.2525949478149414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15575 , TFLOPS: 98.0124351523203, Tokens per sec: 80088.48382749985, Loss: 2.251946449279785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15576 , TFLOPS: 97.17091208182927, Tokens per sec: 79400.85366387063, Loss: 2.2558913230895996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15577 , TFLOPS: 96.47241353798846, Tokens per sec: 78830.09252274616, Loss: 2.248859167098999 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15578 , TFLOPS: 97.7564605943832, Tokens per sec: 79879.32042683812, Loss: 2.255805015563965 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15579 , TFLOPS: 97.39558509171171, Tokens per sec: 79584.43976384337, Loss: 2.2403132915496826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15580 , TFLOPS: 97.88971508315743, Tokens per sec: 79988.20609989125, Loss: 2.2249674797058105 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15581 , TFLOPS: 96.76767636535709, Tokens per sec: 79071.35937973042, Loss: 2.2708206176757812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15582 , TFLOPS: 97.2118491570256, Tokens per sec: 79434.30440182761, Loss: 2.2525129318237305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15583 , TFLOPS: 97.24471966687427, Tokens per sec: 79461.16374158717, Loss: 2.258934736251831 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15584 , TFLOPS: 97.14809732809252, Tokens per sec: 79382.21114128832, Loss: 2.2546916007995605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15585 , TFLOPS: 97.23725264663965, Tokens per sec: 79455.06224713524, Loss: 2.2638416290283203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15586 , TFLOPS: 96.53443769547687, Tokens per sec: 78880.77405847379, Loss: 2.2673065662384033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15587 , TFLOPS: 97.84242889119419, Tokens per sec: 79949.56733518322, Loss: 2.2620291709899902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15588 , TFLOPS: 96.10520900865845, Tokens per sec: 78530.04024914508, Loss: 2.2595157623291016 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15589 , TFLOPS: 96.61111220533883, Tokens per sec: 78943.42677425947, Loss: 2.2375636100769043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15590 , TFLOPS: 96.59880691147255, Tokens per sec: 78933.37180187489, Loss: 2.2671687602996826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15591 , TFLOPS: 97.79652294731481, Tokens per sec: 79912.05640671524, Loss: 2.2720999717712402 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15592 , TFLOPS: 95.89369003497531, Tokens per sec: 78357.2026507658, Loss: 2.2565112113952637 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15593 , TFLOPS: 97.80079121431253, Tokens per sec: 79915.54411755405, Loss: 2.2474331855773926 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15594 , TFLOPS: 95.43673548883764, Tokens per sec: 77983.81332806022, Loss: 2.2589423656463623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15595 , TFLOPS: 97.20376691904625, Tokens per sec: 79427.70019711937, Loss: 2.2569074630737305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15596 , TFLOPS: 97.12065258237101, Tokens per sec: 79359.78533306866, Loss: 2.251521587371826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15597 , TFLOPS: 96.12528610790083, Tokens per sec: 78546.44576376655, Loss: 2.2692267894744873 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15598 , TFLOPS: 96.69695483411881, Tokens per sec: 79013.57099602144, Loss: 2.2441821098327637 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15599 , TFLOPS: 96.9147890749848, Tokens per sec: 79191.56896178526, Loss: 2.2441985607147217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15600 , TFLOPS: 97.72235978686783, Tokens per sec: 79851.45577918441, Loss: 2.2624928951263428 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015600 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015600/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015600/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015600/optimizer.pt +[rank0]:[2024-08-30 04:17:33,640] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.0077338309929473326, 'preprocessing_with_comm': 0.0014875640044920146, 'state_converting': 2.6344827380089555, : 2.645335070003057}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015600/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015600/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015600/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015600/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015600/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015600, took 14.98s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0013600 + eval ppl=7.822541236877441, eval loss=2.05700945854187 +------------------------------------------------------------------ +iteration: 15601 , TFLOPS: 95.85026028326368, Tokens per sec: 78321.71508266096, Loss: 2.27346134185791 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15602 , TFLOPS: 97.13677636327125, Tokens per sec: 79372.9604895053, Loss: 2.2361767292022705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15603 , TFLOPS: 94.86378001204889, Tokens per sec: 77515.63665879004, Loss: 2.2641937732696533 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15604 , TFLOPS: 97.32666992999962, Tokens per sec: 79528.12741117433, Loss: 2.2467823028564453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15605 , TFLOPS: 96.01222515376251, Tokens per sec: 78454.06074769226, Loss: 2.245314359664917 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15606 , TFLOPS: 97.1941742840654, Tokens per sec: 79419.86180814014, Loss: 2.2470479011535645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15607 , TFLOPS: 96.1241111932939, Tokens per sec: 78545.48571079622, Loss: 2.2607688903808594 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15608 , TFLOPS: 96.64829037027216, Tokens per sec: 78973.8060098776, Loss: 2.2405812740325928 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15609 , TFLOPS: 97.68129973869625, Tokens per sec: 79817.90455684376, Loss: 2.220843553543091 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15610 , TFLOPS: 97.21840684044888, Tokens per sec: 79439.66285376257, Loss: 2.2444276809692383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15611 , TFLOPS: 97.74989184914436, Tokens per sec: 79873.9529360089, Loss: 2.2336933612823486 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15612 , TFLOPS: 97.20448841888557, Tokens per sec: 79428.28975321114, Loss: 2.240966320037842 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15613 , TFLOPS: 97.97812918448832, Tokens per sec: 80060.45153807016, Loss: 2.2511236667633057 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15614 , TFLOPS: 96.08103983124083, Tokens per sec: 78510.29099210717, Loss: 2.2782609462738037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15615 , TFLOPS: 98.38090972909224, Tokens per sec: 80389.57388955964, Loss: 2.2459347248077393 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15616 , TFLOPS: 96.40318949833679, Tokens per sec: 78773.52777796146, Loss: 2.276559829711914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15617 , TFLOPS: 97.08443599440216, Tokens per sec: 79330.1918267414, Loss: 2.266089916229248 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15618 , TFLOPS: 96.6619142798998, Tokens per sec: 78984.93845714498, Loss: 2.254636764526367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15619 , TFLOPS: 97.2712801200193, Tokens per sec: 79482.86697157886, Loss: 2.2655508518218994 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15620 , TFLOPS: 97.73863844772777, Tokens per sec: 79864.7574920231, Loss: 2.2550833225250244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15621 , TFLOPS: 95.98344213811828, Tokens per sec: 78430.5414046686, Loss: 2.242863893508911 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15622 , TFLOPS: 96.9916828766303, Tokens per sec: 79254.40086653235, Loss: 2.2661666870117188 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15623 , TFLOPS: 98.38630719006908, Tokens per sec: 80393.98429386682, Loss: 2.2302377223968506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15624 , TFLOPS: 97.17498103655063, Tokens per sec: 79404.17851151766, Loss: 2.25243878364563 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15625 , TFLOPS: 97.74583652189125, Tokens per sec: 79870.639223717, Loss: 2.2468173503875732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15626 , TFLOPS: 97.70909004211211, Tokens per sec: 79840.6127291509, Loss: 2.2757678031921387 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15627 , TFLOPS: 97.83870428993134, Tokens per sec: 79946.52386761157, Loss: 2.2668001651763916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15628 , TFLOPS: 96.64414341808434, Tokens per sec: 78970.41742849289, Loss: 2.25831937789917 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15629 , TFLOPS: 97.83650073203115, Tokens per sec: 79944.7232837267, Loss: 2.2595372200012207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15630 , TFLOPS: 95.94754100558332, Tokens per sec: 78401.20566509692, Loss: 2.2554879188537598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15631 , TFLOPS: 97.18957308370264, Tokens per sec: 79416.10204887827, Loss: 2.272709846496582 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15632 , TFLOPS: 97.85010550868638, Tokens per sec: 79955.8400969498, Loss: 2.258030414581299 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15633 , TFLOPS: 96.43372269567915, Tokens per sec: 78798.47723950433, Loss: 2.2338006496429443 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15634 , TFLOPS: 97.00229916560876, Tokens per sec: 79263.07570954424, Loss: 2.2686145305633545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15635 , TFLOPS: 96.48581244378745, Tokens per sec: 78841.04111359303, Loss: 2.2267022132873535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15636 , TFLOPS: 96.601310124452, Tokens per sec: 78935.41724163883, Loss: 2.268908977508545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15637 , TFLOPS: 97.3124689038901, Tokens per sec: 79516.5233871733, Loss: 2.258246421813965 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15638 , TFLOPS: 96.73937090847201, Tokens per sec: 79048.23026226234, Loss: 2.247039556503296 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15639 , TFLOPS: 96.36826416242707, Tokens per sec: 78744.9893869315, Loss: 2.2379326820373535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15640 , TFLOPS: 96.79801206856779, Tokens per sec: 79096.14746373435, Loss: 2.2641313076019287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15641 , TFLOPS: 95.49732487666851, Tokens per sec: 78033.32247656643, Loss: 2.237489700317383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15642 , TFLOPS: 97.39185535472662, Tokens per sec: 79581.39209974087, Loss: 2.2380404472351074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15643 , TFLOPS: 96.13282359554722, Tokens per sec: 78552.60483895459, Loss: 2.2428436279296875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15644 , TFLOPS: 97.9483715605987, Tokens per sec: 80036.13581756057, Loss: 2.2620363235473633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15645 , TFLOPS: 95.20823695936596, Tokens per sec: 77797.10129755348, Loss: 2.2456459999084473 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15646 , TFLOPS: 96.80555475612367, Tokens per sec: 79102.3107879023, Loss: 2.236666202545166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15647 , TFLOPS: 98.3759393600283, Tokens per sec: 80385.51247304882, Loss: 2.245445728302002 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15648 , TFLOPS: 96.42530640492713, Tokens per sec: 78791.60007167625, Loss: 2.245469570159912 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15649 , TFLOPS: 97.7906602721773, Tokens per sec: 79907.2658639417, Loss: 2.244720697402954 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15650 , TFLOPS: 97.96366319148196, Tokens per sec: 80048.63100279689, Loss: 2.235067844390869 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15651 , TFLOPS: 97.68637779861574, Tokens per sec: 79822.05397032487, Loss: 2.2517623901367188 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15652 , TFLOPS: 97.31186388502101, Tokens per sec: 79516.02901068081, Loss: 2.2429027557373047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15653 , TFLOPS: 97.81343696052573, Tokens per sec: 79925.87728231533, Loss: 2.257352113723755 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15654 , TFLOPS: 96.01223902465647, Tokens per sec: 78454.07208195675, Loss: 2.235576629638672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15655 , TFLOPS: 97.11913221301828, Tokens per sec: 79358.54300013276, Loss: 2.2662973403930664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15656 , TFLOPS: 97.30691523706027, Tokens per sec: 79511.98534304254, Loss: 2.247471332550049 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15657 , TFLOPS: 97.16347182607073, Tokens per sec: 79394.7740393611, Loss: 2.286275625228882 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15658 , TFLOPS: 96.60622241634698, Tokens per sec: 78939.43120179986, Loss: 2.259721040725708 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15659 , TFLOPS: 96.84033291171843, Tokens per sec: 79130.72891410816, Loss: 2.256054401397705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15660 , TFLOPS: 97.70430058855501, Tokens per sec: 79836.69914335791, Loss: 2.235466957092285 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15661 , TFLOPS: 97.73209992590797, Tokens per sec: 79859.41469752761, Loss: 2.2711496353149414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15662 , TFLOPS: 95.54507865740148, Tokens per sec: 78072.34332009501, Loss: 2.272298574447632 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15663 , TFLOPS: 97.89845899166681, Tokens per sec: 79995.35097262249, Loss: 2.2667274475097656 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15664 , TFLOPS: 97.16532379222323, Tokens per sec: 79396.28732857815, Loss: 2.2680277824401855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15665 , TFLOPS: 97.80305571716957, Tokens per sec: 79917.39450113203, Loss: 2.243635892868042 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15666 , TFLOPS: 96.09802788376611, Tokens per sec: 78524.17236713695, Loss: 2.2625892162323 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15667 , TFLOPS: 97.21842941490043, Tokens per sec: 79439.68129992804, Loss: 2.242901086807251 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15668 , TFLOPS: 97.20688988401542, Tokens per sec: 79430.25205218795, Loss: 2.2388038635253906 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15669 , TFLOPS: 96.18399734354934, Tokens per sec: 78594.42022577659, Loss: 2.2523889541625977 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15670 , TFLOPS: 97.70057650863069, Tokens per sec: 79833.6561017854, Loss: 2.266256809234619 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15671 , TFLOPS: 94.82605879728922, Tokens per sec: 77484.81368317945, Loss: 2.2420265674591064 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15672 , TFLOPS: 97.22982395452678, Tokens per sec: 79448.99207157826, Loss: 2.2700281143188477 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15673 , TFLOPS: 96.18941172047836, Tokens per sec: 78598.84445254377, Loss: 2.2511191368103027 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15674 , TFLOPS: 97.1973538473157, Tokens per sec: 79422.45991112117, Loss: 2.2594072818756104 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15675 , TFLOPS: 96.23897926282909, Tokens per sec: 78639.3473673806, Loss: 2.2403814792633057 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15676 , TFLOPS: 96.47876389216852, Tokens per sec: 78835.28156061933, Loss: 2.253152370452881 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15677 , TFLOPS: 96.28821277942743, Tokens per sec: 78679.57734117657, Loss: 2.264944076538086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15678 , TFLOPS: 96.11228648049861, Tokens per sec: 78535.82343357602, Loss: 2.2613160610198975 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15679 , TFLOPS: 96.13894548170525, Tokens per sec: 78557.6071897257, Loss: 2.248924493789673 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15680 , TFLOPS: 97.35412316832539, Tokens per sec: 79550.56015891953, Loss: 2.2430365085601807 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15681 , TFLOPS: 97.2083099405169, Tokens per sec: 79431.41241691081, Loss: 2.285423517227173 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15682 , TFLOPS: 96.64831697987943, Tokens per sec: 78973.82775327268, Loss: 2.2490451335906982 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15683 , TFLOPS: 97.48941439901226, Tokens per sec: 79661.11010621989, Loss: 2.2520313262939453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15684 , TFLOPS: 96.76490296155002, Tokens per sec: 79069.09316008589, Loss: 2.2874155044555664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15685 , TFLOPS: 98.39034302818287, Tokens per sec: 80397.28208108133, Loss: 2.2720022201538086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15686 , TFLOPS: 95.41806194504335, Tokens per sec: 77968.55469472612, Loss: 2.2358996868133545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15687 , TFLOPS: 97.33742945949844, Tokens per sec: 79536.91929970274, Loss: 2.2867774963378906 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15688 , TFLOPS: 98.37290464408157, Tokens per sec: 80383.03272852823, Loss: 2.2746219635009766 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15689 , TFLOPS: 97.02163137361474, Tokens per sec: 79278.87255436122, Loss: 2.258819818496704 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15690 , TFLOPS: 96.20390693331429, Tokens per sec: 78610.68886409198, Loss: 2.2650649547576904 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15691 , TFLOPS: 98.40241794341597, Tokens per sec: 80407.14880515425, Loss: 2.2591917514801025 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15692 , TFLOPS: 97.13466892540343, Tokens per sec: 79371.23844777323, Loss: 2.2655720710754395 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15693 , TFLOPS: 96.67984078133088, Tokens per sec: 78999.58666293352, Loss: 2.2527782917022705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15694 , TFLOPS: 97.71065632716707, Tokens per sec: 79841.8925809891, Loss: 2.2744204998016357 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15695 , TFLOPS: 97.64956689549287, Tokens per sec: 79791.97483378623, Loss: 2.247178554534912 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15696 , TFLOPS: 96.00098775111883, Tokens per sec: 78444.87837671561, Loss: 2.2597177028656006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15697 , TFLOPS: 97.27719555482713, Tokens per sec: 79487.70062563717, Loss: 2.254918336868286 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15698 , TFLOPS: 97.57674685917709, Tokens per sec: 79732.4717075581, Loss: 2.282428741455078 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15699 , TFLOPS: 93.24724336230389, Tokens per sec: 76194.722948928, Loss: 2.260418653488159 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15700 , TFLOPS: 96.35805935175107, Tokens per sec: 78736.65077343294, Loss: 2.2584409713745117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15701 , TFLOPS: 96.19868705909707, Tokens per sec: 78606.42357049756, Loss: 2.2601826190948486 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15702 , TFLOPS: 97.85032995126222, Tokens per sec: 79956.02349475618, Loss: 2.2544758319854736 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15703 , TFLOPS: 95.85689390470206, Tokens per sec: 78327.13558550287, Loss: 2.246685266494751 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15704 , TFLOPS: 96.02124197354742, Tokens per sec: 78461.42862325208, Loss: 2.24372935295105 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15705 , TFLOPS: 96.64442401239444, Tokens per sec: 78970.6467093268, Loss: 2.2607779502868652 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15706 , TFLOPS: 96.77369800388188, Tokens per sec: 79076.27981557978, Loss: 2.275395393371582 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15707 , TFLOPS: 96.28119117388, Tokens per sec: 78673.83980653156, Loss: 2.2574400901794434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15708 , TFLOPS: 96.78804352983579, Tokens per sec: 79088.00191412342, Loss: 2.2651658058166504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15709 , TFLOPS: 96.1117665913612, Tokens per sec: 78535.3986187787, Loss: 2.250044822692871 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15710 , TFLOPS: 96.74036792376737, Tokens per sec: 79049.04494912563, Loss: 2.2475361824035645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15711 , TFLOPS: 96.76109632083276, Tokens per sec: 79065.98265596417, Loss: 2.2640655040740967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15712 , TFLOPS: 97.80966807404198, Tokens per sec: 79922.79762814968, Loss: 2.245203733444214 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15713 , TFLOPS: 95.97086881966217, Tokens per sec: 78420.26742249209, Loss: 2.269350528717041 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15714 , TFLOPS: 96.6486044457073, Tokens per sec: 78974.06264900068, Loss: 2.2569494247436523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15715 , TFLOPS: 96.30970689273, Tokens per sec: 78697.1407344638, Loss: 2.2546186447143555 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15716 , TFLOPS: 97.14455652186382, Tokens per sec: 79379.31785737039, Loss: 2.257647752761841 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15717 , TFLOPS: 95.79387190566601, Tokens per sec: 78275.63868775994, Loss: 2.2628016471862793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15718 , TFLOPS: 97.19377038951242, Tokens per sec: 79419.53177550372, Loss: 2.2568488121032715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15719 , TFLOPS: 96.62734854541029, Tokens per sec: 78956.69390569243, Loss: 2.2879233360290527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15720 , TFLOPS: 97.93135535117212, Tokens per sec: 80022.23143480191, Loss: 2.257277011871338 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15721 , TFLOPS: 97.4057889316708, Tokens per sec: 79592.77758414415, Loss: 2.267153263092041 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15722 , TFLOPS: 96.84944756110988, Tokens per sec: 79138.17672875787, Loss: 2.2733242511749268 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15723 , TFLOPS: 98.42304542800403, Tokens per sec: 80424.00405380985, Loss: 2.263702869415283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15724 , TFLOPS: 95.93089517086368, Tokens per sec: 78387.6039250456, Loss: 2.264227867126465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15725 , TFLOPS: 97.06986022858034, Tokens per sec: 79318.28159327479, Loss: 2.2587194442749023 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15726 , TFLOPS: 98.3402503368492, Tokens per sec: 80356.35005349199, Loss: 2.259185552597046 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15727 , TFLOPS: 98.40365648074203, Tokens per sec: 80408.16084588617, Loss: 2.2750895023345947 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15728 , TFLOPS: 95.36532891958252, Tokens per sec: 77925.46518215326, Loss: 2.2507662773132324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15729 , TFLOPS: 98.44029977317044, Tokens per sec: 80438.10302341169, Loss: 2.266195297241211 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15730 , TFLOPS: 97.81808849546911, Tokens per sec: 79929.67817125871, Loss: 2.2571635246276855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15731 , TFLOPS: 95.94256295834586, Tokens per sec: 78397.13797455285, Loss: 2.2459235191345215 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15732 , TFLOPS: 97.13605709658296, Tokens per sec: 79372.37275817872, Loss: 2.2490482330322266 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15733 , TFLOPS: 97.78005480483324, Tokens per sec: 79898.59986356586, Loss: 2.2611892223358154 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15734 , TFLOPS: 95.37279458207732, Tokens per sec: 77931.565567161, Loss: 2.264328956604004 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15735 , TFLOPS: 97.37954476108358, Tokens per sec: 79571.33279677229, Loss: 2.2653937339782715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15736 , TFLOPS: 97.75921947742555, Tokens per sec: 79881.57478119199, Loss: 2.258493423461914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15737 , TFLOPS: 96.5481501394865, Tokens per sec: 78891.97884946376, Loss: 2.2591288089752197 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15738 , TFLOPS: 95.4856215496392, Tokens per sec: 78023.75938678035, Loss: 2.25462007522583 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15739 , TFLOPS: 96.71244051537553, Tokens per sec: 79026.22474481321, Loss: 2.262075424194336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15740 , TFLOPS: 97.20615116446693, Tokens per sec: 79429.64842542855, Loss: 2.2512388229370117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15741 , TFLOPS: 96.0829365728319, Tokens per sec: 78511.84086848769, Loss: 2.258572816848755 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15742 , TFLOPS: 96.16304032023059, Tokens per sec: 78577.29570254107, Loss: 2.2390964031219482 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15743 , TFLOPS: 96.71874809325593, Tokens per sec: 79031.37882907079, Loss: 2.253196954727173 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15744 , TFLOPS: 96.5889161961348, Tokens per sec: 78925.28984376276, Loss: 2.260021448135376 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15745 , TFLOPS: 95.64184558946063, Tokens per sec: 78151.41407128298, Loss: 2.2417240142822266 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15746 , TFLOPS: 97.03289234648287, Tokens per sec: 79288.07418517502, Loss: 2.2471063137054443 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15747 , TFLOPS: 96.60781255917685, Tokens per sec: 78940.73054844006, Loss: 2.2368204593658447 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15748 , TFLOPS: 96.64871452131035, Tokens per sec: 78974.15259460954, Loss: 2.24843692779541 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15749 , TFLOPS: 97.21514171735299, Tokens per sec: 79436.99483762917, Loss: 2.2738542556762695 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15750 , TFLOPS: 97.10650143688918, Tokens per sec: 79348.22206781256, Loss: 2.233691453933716 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15751 , TFLOPS: 96.58797583650808, Tokens per sec: 78924.52145170474, Loss: 2.2296671867370605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15752 , TFLOPS: 97.40887801120405, Tokens per sec: 79595.30175055075, Loss: 2.2415993213653564 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15753 , TFLOPS: 96.69096392517388, Tokens per sec: 79008.67567010282, Loss: 2.2660276889801025 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15754 , TFLOPS: 97.23145352258524, Tokens per sec: 79450.32363358766, Loss: 2.280416250228882 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15755 , TFLOPS: 96.263716370297, Tokens per sec: 78659.56069468243, Loss: 2.2237865924835205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15756 , TFLOPS: 96.66148711506968, Tokens per sec: 78984.58940976608, Loss: 2.250887393951416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15757 , TFLOPS: 97.86005806043475, Tokens per sec: 79963.97258318486, Loss: 2.2479610443115234 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15758 , TFLOPS: 97.92180775852638, Tokens per sec: 80014.4298510742, Loss: 2.259031057357788 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15759 , TFLOPS: 97.84490922241864, Tokens per sec: 79951.59407767611, Loss: 2.251830816268921 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15760 , TFLOPS: 96.22729516084226, Tokens per sec: 78629.79998687163, Loss: 2.233415365219116 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15761 , TFLOPS: 97.7299206962522, Tokens per sec: 79857.63399287751, Loss: 2.2778472900390625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15762 , TFLOPS: 97.23341009496873, Tokens per sec: 79451.92239924887, Loss: 2.253117799758911 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15763 , TFLOPS: 96.03615243523025, Tokens per sec: 78473.61234532254, Loss: 2.245162010192871 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15764 , TFLOPS: 98.44320719147532, Tokens per sec: 80440.47874975222, Loss: 2.2558445930480957 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15765 , TFLOPS: 97.74464769108761, Tokens per sec: 79869.6677994641, Loss: 2.2352511882781982 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15766 , TFLOPS: 95.47527859503612, Tokens per sec: 78015.30789232282, Loss: 2.2452778816223145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15767 , TFLOPS: 98.40904716149875, Tokens per sec: 80412.56570990095, Loss: 2.2482497692108154 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15768 , TFLOPS: 98.38115209942853, Tokens per sec: 80389.7719366006, Loss: 2.246825933456421 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15769 , TFLOPS: 95.3347242142295, Tokens per sec: 77900.45728957409, Loss: 2.256289005279541 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15770 , TFLOPS: 97.16543202555256, Tokens per sec: 79396.37576881773, Loss: 2.257391929626465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15771 , TFLOPS: 96.76987677892237, Tokens per sec: 79073.1573942983, Loss: 2.259514570236206 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15772 , TFLOPS: 97.11805971516038, Tokens per sec: 79357.66663452453, Loss: 2.2534356117248535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15773 , TFLOPS: 95.60115876959841, Tokens per sec: 78118.16782340164, Loss: 2.2675256729125977 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15774 , TFLOPS: 98.3848716365523, Tokens per sec: 80392.81126613384, Loss: 2.247605085372925 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15775 , TFLOPS: 95.96856224066298, Tokens per sec: 78418.38265741523, Loss: 2.2363548278808594 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15776 , TFLOPS: 96.07507637228551, Tokens per sec: 78505.41809628168, Loss: 2.287456512451172 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15777 , TFLOPS: 96.78044122801464, Tokens per sec: 79081.7898776047, Loss: 2.2570714950561523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15778 , TFLOPS: 96.31056602433236, Tokens per sec: 78697.8427530121, Loss: 2.2497220039367676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15779 , TFLOPS: 96.7653634416739, Tokens per sec: 79069.46943024892, Loss: 2.271519184112549 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15780 , TFLOPS: 95.57198361059167, Tokens per sec: 78094.32804994183, Loss: 2.232353687286377 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15781 , TFLOPS: 97.89925531402304, Tokens per sec: 79996.00166811893, Loss: 2.255547285079956 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15782 , TFLOPS: 97.08194788962498, Tokens per sec: 79328.15873227845, Loss: 2.242201328277588 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15783 , TFLOPS: 96.61374634675975, Tokens per sec: 78945.57919902333, Loss: 2.263568878173828 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15784 , TFLOPS: 96.9880175557143, Tokens per sec: 79251.40583845813, Loss: 2.254828453063965 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15785 , TFLOPS: 97.16645070818873, Tokens per sec: 79397.20816062298, Loss: 2.2530901432037354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15786 , TFLOPS: 96.64865422140406, Tokens per sec: 78974.10332200384, Loss: 2.2467968463897705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15787 , TFLOPS: 96.6177711678461, Tokens per sec: 78948.8679839411, Loss: 2.236017942428589 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15788 , TFLOPS: 96.52184504147982, Tokens per sec: 78870.48427672904, Loss: 2.2426559925079346 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15789 , TFLOPS: 97.17925514380691, Tokens per sec: 79407.67099458208, Loss: 2.2601406574249268 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15790 , TFLOPS: 97.9856001692576, Tokens per sec: 80066.5562720454, Loss: 2.240377902984619 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15791 , TFLOPS: 96.87585263015231, Tokens per sec: 79159.75299039939, Loss: 2.260706901550293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15792 , TFLOPS: 97.21430282331401, Tokens per sec: 79436.30935571485, Loss: 2.2623353004455566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15793 , TFLOPS: 96.178146154305, Tokens per sec: 78589.63906842204, Loss: 2.2298665046691895 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15794 , TFLOPS: 97.29087953793342, Tokens per sec: 79498.88216049038, Loss: 2.2464447021484375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15795 , TFLOPS: 96.76310129780995, Tokens per sec: 79067.62097426494, Loss: 2.2779152393341064 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15796 , TFLOPS: 97.10679075964919, Tokens per sec: 79348.4584808876, Loss: 2.2693347930908203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15797 , TFLOPS: 97.89075741836716, Tokens per sec: 79989.05781882322, Loss: 2.2591068744659424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15798 , TFLOPS: 96.05862082753349, Tokens per sec: 78491.9718470614, Loss: 2.2639687061309814 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15799 , TFLOPS: 98.36669972970374, Tokens per sec: 80377.96253326138, Loss: 2.250171184539795 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15800 , TFLOPS: 97.27429393910911, Tokens per sec: 79485.32964073974, Loss: 2.2449514865875244 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015800 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015800/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015800/model.pt +[rank0]:[2024-08-30 05:13:07,508] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007670662991586141, 'preprocessing_with_comm': 0.0016288890037685633, 'state_converting': 2.596128243007115, : 2.6070869190007215}) +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015800/optimizer.pt +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015800/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015800/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015800/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015800/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015800/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015800, took 14.75s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0013800 + eval ppl=7.258831024169922, eval loss=1.9822187423706055 +------------------------------------------------------------------ +iteration: 15801 , TFLOPS: 94.16114420332579, Tokens per sec: 76941.49485202758, Loss: 2.225766897201538 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15802 , TFLOPS: 96.19163750766916, Tokens per sec: 78600.66320055422, Loss: 2.261967420578003 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15803 , TFLOPS: 96.6603803948268, Tokens per sec: 78983.68508016612, Loss: 2.245203971862793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15804 , TFLOPS: 97.36637847028305, Tokens per sec: 79560.5742816287, Loss: 2.277320384979248 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15805 , TFLOPS: 96.51537326683294, Tokens per sec: 78865.19602306678, Loss: 2.2365286350250244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15806 , TFLOPS: 97.20325817542604, Tokens per sec: 79427.28448961106, Loss: 2.2783398628234863 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15807 , TFLOPS: 96.12742113493573, Tokens per sec: 78548.19034931724, Loss: 2.282031297683716 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15808 , TFLOPS: 97.86304238006393, Tokens per sec: 79966.41114757709, Loss: 2.245997428894043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15809 , TFLOPS: 97.23372176685403, Tokens per sec: 79452.1770743695, Loss: 2.2446911334991455 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15810 , TFLOPS: 97.69130294012669, Tokens per sec: 79826.07843023789, Loss: 2.24637770652771 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15811 , TFLOPS: 96.43745227873984, Tokens per sec: 78801.52477783123, Loss: 2.265554189682007 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15812 , TFLOPS: 97.69325809143893, Tokens per sec: 79827.67603470519, Loss: 2.264028310775757 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15813 , TFLOPS: 97.67681746221989, Tokens per sec: 79814.24197335086, Loss: 2.240276336669922 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15814 , TFLOPS: 97.77650896237289, Tokens per sec: 79895.7024644134, Loss: 2.2435898780822754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15815 , TFLOPS: 97.10711697808658, Tokens per sec: 79348.72504237041, Loss: 2.2546167373657227 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15816 , TFLOPS: 97.0789121628611, Tokens per sec: 79325.67816179313, Loss: 2.2455596923828125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15817 , TFLOPS: 97.16477109263572, Tokens per sec: 79395.83570351746, Loss: 2.2448410987854004 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15818 , TFLOPS: 96.06527407964877, Tokens per sec: 78497.40839063475, Loss: 2.273789167404175 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15819 , TFLOPS: 97.67272701461918, Tokens per sec: 79810.89956331883, Loss: 2.2470216751098633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15820 , TFLOPS: 96.60091200832974, Tokens per sec: 78935.09193070691, Loss: 2.2478926181793213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15821 , TFLOPS: 97.33449321870707, Tokens per sec: 79534.52002176656, Loss: 2.229644775390625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15822 , TFLOPS: 96.50227315046321, Tokens per sec: 78854.49158075407, Loss: 2.2548623085021973 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15823 , TFLOPS: 96.63560664907826, Tokens per sec: 78963.44181478203, Loss: 2.2595667839050293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15824 , TFLOPS: 97.31054242864349, Tokens per sec: 79514.9492146574, Loss: 2.256570339202881 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15825 , TFLOPS: 97.3346986921224, Tokens per sec: 79534.68791938358, Loss: 2.2683606147766113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15826 , TFLOPS: 95.39604178950808, Tokens per sec: 77950.56145878903, Loss: 2.2557995319366455 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15827 , TFLOPS: 97.04543175775751, Tokens per sec: 79298.32046092069, Loss: 2.2790796756744385 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15828 , TFLOPS: 96.08859342889315, Tokens per sec: 78516.46323119578, Loss: 2.2595508098602295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15829 , TFLOPS: 97.97501939724093, Tokens per sec: 80057.91045085728, Loss: 2.279268980026245 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15830 , TFLOPS: 95.6056946051013, Tokens per sec: 78121.87417135386, Loss: 2.2415664196014404 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15831 , TFLOPS: 95.62696630680087, Tokens per sec: 78139.25582639487, Loss: 2.2444512844085693 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15832 , TFLOPS: 97.21911605451616, Tokens per sec: 79440.24237083373, Loss: 2.2700860500335693 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15833 , TFLOPS: 97.31845566698065, Tokens per sec: 79521.4153254076, Loss: 2.2652480602264404 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15834 , TFLOPS: 95.98984297699684, Tokens per sec: 78435.77169488852, Loss: 2.2541282176971436 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15835 , TFLOPS: 96.59481435894502, Tokens per sec: 78930.1093844272, Loss: 2.236039161682129 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15836 , TFLOPS: 97.1888603941916, Tokens per sec: 79415.51969193254, Loss: 2.2644400596618652 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15837 , TFLOPS: 95.12745415459203, Tokens per sec: 77731.09158823831, Loss: 2.252495050430298 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15838 , TFLOPS: 97.97311813208478, Tokens per sec: 80056.35687815526, Loss: 2.2392983436584473 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15839 , TFLOPS: 95.89155941606526, Tokens per sec: 78355.46166720739, Loss: 2.2505671977996826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15840 , TFLOPS: 96.14836372240177, Tokens per sec: 78565.30307664591, Loss: 2.224519729614258 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15841 , TFLOPS: 97.82253056177638, Tokens per sec: 79933.30789798735, Loss: 2.2632060050964355 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15842 , TFLOPS: 96.76572441344729, Tokens per sec: 79069.7643895772, Loss: 2.2509307861328125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15843 , TFLOPS: 97.86711580811986, Tokens per sec: 79969.73965050097, Loss: 2.2412023544311523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15844 , TFLOPS: 96.91872875909756, Tokens per sec: 79194.7881790903, Loss: 2.254969596862793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15845 , TFLOPS: 96.01599974064081, Tokens per sec: 78457.14505979701, Loss: 2.255084276199341 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15846 , TFLOPS: 97.86991929924052, Tokens per sec: 79972.03045526394, Loss: 2.2535653114318848 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15847 , TFLOPS: 97.23486494380674, Tokens per sec: 79453.11119368555, Loss: 2.2501447200775146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15848 , TFLOPS: 97.79051580524404, Tokens per sec: 79907.14781629163, Loss: 2.2508723735809326 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15849 , TFLOPS: 96.0545391998666, Tokens per sec: 78488.63664402434, Loss: 2.247713565826416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15850 , TFLOPS: 97.10908568184333, Tokens per sec: 79350.33372088897, Loss: 2.2720863819122314 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15851 , TFLOPS: 98.45441080631915, Tokens per sec: 80449.6335118478, Loss: 2.251659870147705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15852 , TFLOPS: 97.24050839556392, Tokens per sec: 79457.72260339171, Loss: 2.256194829940796 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15853 , TFLOPS: 97.23053496610441, Tokens per sec: 79449.57305744142, Loss: 2.2751715183258057 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15854 , TFLOPS: 96.68584765608578, Tokens per sec: 79004.49503492645, Loss: 2.228304386138916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15855 , TFLOPS: 97.13515003180619, Tokens per sec: 79371.63157219971, Loss: 2.2515347003936768 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15856 , TFLOPS: 97.20060399466027, Tokens per sec: 79425.11569018266, Loss: 2.279318332672119 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15857 , TFLOPS: 98.36804227652927, Tokens per sec: 80379.05956283274, Loss: 2.2683088779449463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15858 , TFLOPS: 96.17817603152994, Tokens per sec: 78589.66348187164, Loss: 2.260589122772217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15859 , TFLOPS: 97.85548647407839, Tokens per sec: 79960.23702228998, Loss: 2.245278835296631 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15860 , TFLOPS: 96.71856392536174, Tokens per sec: 79031.22834074353, Loss: 2.257089614868164 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15861 , TFLOPS: 95.91224129822925, Tokens per sec: 78372.36136552259, Loss: 2.271439552307129 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15862 , TFLOPS: 96.50904906903916, Tokens per sec: 78860.02835825014, Loss: 2.2345387935638428 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15863 , TFLOPS: 97.21134799766496, Tokens per sec: 79433.89489161305, Loss: 2.2676281929016113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15864 , TFLOPS: 96.18400438440969, Tokens per sec: 78594.42597904483, Loss: 2.2224817276000977 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15865 , TFLOPS: 96.20146374465527, Tokens per sec: 78608.69247174542, Loss: 2.219148635864258 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15866 , TFLOPS: 97.3107819541154, Tokens per sec: 79515.14493708644, Loss: 2.242765426635742 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15867 , TFLOPS: 96.30277992365608, Tokens per sec: 78691.48052971745, Loss: 2.2514541149139404 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15868 , TFLOPS: 96.19904379795803, Tokens per sec: 78606.71507100413, Loss: 2.2642667293548584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15869 , TFLOPS: 96.30962485500497, Tokens per sec: 78697.07369932684, Loss: 2.2252209186553955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15870 , TFLOPS: 97.96451831989917, Tokens per sec: 80049.32975023954, Loss: 2.250180959701538 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15871 , TFLOPS: 97.38723087090884, Tokens per sec: 79577.61331496856, Loss: 2.251946449279785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15872 , TFLOPS: 96.05755533831369, Tokens per sec: 78491.10120839176, Loss: 2.2876999378204346 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15873 , TFLOPS: 97.79177160825219, Tokens per sec: 79908.17396525684, Loss: 2.266411304473877 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15874 , TFLOPS: 96.83700639956437, Tokens per sec: 79128.01073539503, Loss: 2.2587060928344727 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15875 , TFLOPS: 97.32356188153221, Tokens per sec: 79525.58774476332, Loss: 2.272329807281494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15876 , TFLOPS: 96.60410083697131, Tokens per sec: 78937.6976046777, Loss: 2.285977602005005 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15877 , TFLOPS: 96.12137659868232, Tokens per sec: 78543.25120314438, Loss: 2.2512471675872803 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15878 , TFLOPS: 97.27279156742674, Tokens per sec: 79484.10201416358, Loss: 2.280797243118286 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15879 , TFLOPS: 97.0715400328447, Tokens per sec: 79319.65420458122, Loss: 2.2443079948425293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15880 , TFLOPS: 97.59268219641329, Tokens per sec: 79745.49288182588, Loss: 2.253154754638672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15881 , TFLOPS: 96.77876938924021, Tokens per sec: 79080.42377510516, Loss: 2.240880012512207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15882 , TFLOPS: 95.92552534195757, Tokens per sec: 78383.21609961471, Loss: 2.2440409660339355 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15883 , TFLOPS: 97.3048922428115, Tokens per sec: 79510.33230236538, Loss: 2.2600855827331543 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15884 , TFLOPS: 96.21761101604184, Tokens per sec: 78621.88682286313, Loss: 2.2659685611724854 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15885 , TFLOPS: 97.89667558504568, Tokens per sec: 79993.8937051634, Loss: 2.2416305541992188 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15886 , TFLOPS: 97.1302214142478, Tokens per sec: 79367.60427191884, Loss: 2.25363826751709 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15887 , TFLOPS: 96.00829450688089, Tokens per sec: 78450.84891494142, Loss: 2.26229190826416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15888 , TFLOPS: 97.78920569409804, Tokens per sec: 79906.07729074897, Loss: 2.2608530521392822 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15889 , TFLOPS: 98.33260502751199, Tokens per sec: 80350.10287442479, Loss: 2.2694594860076904 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15890 , TFLOPS: 96.0627029916103, Tokens per sec: 78495.30748841281, Loss: 2.256016969680786 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15891 , TFLOPS: 97.88109634783483, Tokens per sec: 79981.16350939317, Loss: 2.259566307067871 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15892 , TFLOPS: 95.56055158478762, Tokens per sec: 78084.98664737081, Loss: 2.2442939281463623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15893 , TFLOPS: 96.51446846884468, Tokens per sec: 78864.456689339, Loss: 2.254086494445801 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15894 , TFLOPS: 96.63722118720139, Tokens per sec: 78964.76109544435, Loss: 2.2451388835906982 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15895 , TFLOPS: 97.74216242058304, Tokens per sec: 79867.63702095827, Loss: 2.2627451419830322 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15896 , TFLOPS: 96.50451518055068, Tokens per sec: 78856.32360125353, Loss: 2.2553234100341797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15897 , TFLOPS: 96.21421204542986, Tokens per sec: 78619.1094364788, Loss: 2.2746901512145996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15898 , TFLOPS: 97.72430363742905, Tokens per sec: 79853.04414951717, Loss: 2.2396626472473145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15899 , TFLOPS: 95.82789628094, Tokens per sec: 78303.44088066164, Loss: 2.255866050720215 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15900 , TFLOPS: 96.77664295443012, Tokens per sec: 79078.68621048253, Loss: 2.2296910285949707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15901 , TFLOPS: 97.24362827397762, Tokens per sec: 79460.27193635635, Loss: 2.280503749847412 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15902 , TFLOPS: 96.50863506463689, Tokens per sec: 78859.69006459563, Loss: 2.2607622146606445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15903 , TFLOPS: 96.68801044600059, Tokens per sec: 79006.26230624101, Loss: 2.2654366493225098 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15904 , TFLOPS: 97.38415086625481, Tokens per sec: 79575.09656387939, Loss: 2.245466709136963 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15905 , TFLOPS: 98.49209068544305, Tokens per sec: 80480.42271104633, Loss: 2.2326154708862305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15906 , TFLOPS: 96.2131760852897, Tokens per sec: 78618.26292678028, Loss: 2.227557420730591 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15907 , TFLOPS: 97.41316595193474, Tokens per sec: 79598.80553730321, Loss: 2.2507781982421875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15908 , TFLOPS: 97.18013208246171, Tokens per sec: 79408.38756373133, Loss: 2.2603213787078857 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15909 , TFLOPS: 96.52034565394702, Tokens per sec: 78869.25908857831, Loss: 2.2482962608337402 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15910 , TFLOPS: 97.22864363298358, Tokens per sec: 79448.02760046115, Loss: 2.2457871437072754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15911 , TFLOPS: 97.74985844649456, Tokens per sec: 79873.9256418439, Loss: 2.2627127170562744 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15912 , TFLOPS: 97.17467545174111, Tokens per sec: 79403.92881030335, Loss: 2.2469866275787354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15913 , TFLOPS: 96.08444825865467, Tokens per sec: 78513.07610588775, Loss: 2.2295773029327393 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15914 , TFLOPS: 96.59469831600579, Tokens per sec: 78930.01456275434, Loss: 2.2343897819519043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15915 , TFLOPS: 96.6654821084744, Tokens per sec: 78987.85382171735, Loss: 2.279796600341797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15916 , TFLOPS: 96.81560461451255, Tokens per sec: 79110.52278589824, Loss: 2.2563297748565674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15917 , TFLOPS: 97.15333273244538, Tokens per sec: 79386.48912495634, Loss: 2.2444889545440674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15918 , TFLOPS: 97.80621620114297, Tokens per sec: 79919.97701394478, Loss: 2.253815174102783 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15919 , TFLOPS: 96.77298770858481, Tokens per sec: 79075.69941500791, Loss: 2.2429003715515137 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15920 , TFLOPS: 96.06226962916867, Tokens per sec: 78494.953376806, Loss: 2.2455642223358154 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15921 , TFLOPS: 97.3661044553768, Tokens per sec: 79560.35037699502, Loss: 2.2469959259033203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15922 , TFLOPS: 96.77834049102167, Tokens per sec: 79080.07331132995, Loss: 2.2446367740631104 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15923 , TFLOPS: 97.09224275084745, Tokens per sec: 79336.57092839663, Loss: 2.23502516746521 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15924 , TFLOPS: 97.29526709285246, Tokens per sec: 79502.46734456044, Loss: 2.2564871311187744 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15925 , TFLOPS: 95.83756693888526, Tokens per sec: 78311.34302421361, Loss: 2.245405673980713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15926 , TFLOPS: 97.73331379999746, Tokens per sec: 79860.40658529403, Loss: 2.2689623832702637 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15927 , TFLOPS: 98.39099428303525, Tokens per sec: 80397.81423818601, Loss: 2.2376182079315186 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15928 , TFLOPS: 96.0137076132929, Tokens per sec: 78455.27210353642, Loss: 2.246539354324341 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15929 , TFLOPS: 97.26170237096504, Tokens per sec: 79475.04074627341, Loss: 2.2643377780914307 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15930 , TFLOPS: 95.9081432386486, Tokens per sec: 78369.01273554582, Loss: 2.2518513202667236 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15931 , TFLOPS: 96.17933410869873, Tokens per sec: 78590.6097765378, Loss: 2.2562224864959717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15932 , TFLOPS: 96.58901827996823, Tokens per sec: 78925.3732590909, Loss: 2.2471201419830322 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15933 , TFLOPS: 95.97836509826814, Tokens per sec: 78426.39282471241, Loss: 2.251230001449585 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15934 , TFLOPS: 97.21535214364181, Tokens per sec: 79437.1667823666, Loss: 2.2330212593078613 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15935 , TFLOPS: 94.26976258672177, Tokens per sec: 77030.24972918635, Loss: 2.2481577396392822 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15936 , TFLOPS: 97.76319119800601, Tokens per sec: 79884.82017631947, Loss: 2.2514238357543945 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15937 , TFLOPS: 96.14470006431623, Tokens per sec: 78562.30940731327, Loss: 2.248494863510132 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15938 , TFLOPS: 97.26890729212874, Tokens per sec: 79480.92807282746, Loss: 2.259431838989258 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15939 , TFLOPS: 97.21879111405565, Tokens per sec: 79439.97685361875, Loss: 2.2518417835235596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15940 , TFLOPS: 96.59522685408088, Tokens per sec: 78930.44644482127, Loss: 2.27065110206604 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15941 , TFLOPS: 96.68627680612887, Tokens per sec: 79004.84570447396, Loss: 2.266056776046753 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15942 , TFLOPS: 97.36208255042008, Tokens per sec: 79557.06397492184, Loss: 2.2302048206329346 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15943 , TFLOPS: 98.33145023727081, Tokens per sec: 80349.15926559178, Loss: 2.2410128116607666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15944 , TFLOPS: 95.56647378975579, Tokens per sec: 78089.82583350143, Loss: 2.260881185531616 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15945 , TFLOPS: 96.56584669139063, Tokens per sec: 78906.43915757441, Loss: 2.260740280151367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15946 , TFLOPS: 97.77096716369691, Tokens per sec: 79891.17411805666, Loss: 2.246331214904785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15947 , TFLOPS: 97.12269343748778, Tokens per sec: 79361.45296832085, Loss: 2.245573043823242 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15948 , TFLOPS: 96.47925067999661, Tokens per sec: 78835.6793274847, Loss: 2.254894733428955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15949 , TFLOPS: 97.80965635427316, Tokens per sec: 79922.78805162487, Loss: 2.268293619155884 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15950 , TFLOPS: 96.4992806430741, Tokens per sec: 78852.04632593243, Loss: 2.2420029640197754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15951 , TFLOPS: 96.85393994273527, Tokens per sec: 79141.84756942817, Loss: 2.2554314136505127 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15952 , TFLOPS: 96.71918601253313, Tokens per sec: 79031.73666418521, Loss: 2.242997646331787 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15953 , TFLOPS: 96.05450749161373, Tokens per sec: 78488.6107343947, Loss: 2.280500888824463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15954 , TFLOPS: 97.25905809412238, Tokens per sec: 79472.88003959555, Loss: 2.26176118850708 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15955 , TFLOPS: 97.21755493412613, Tokens per sec: 79438.96673917622, Loss: 2.2478530406951904 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15956 , TFLOPS: 97.15822422905686, Tokens per sec: 79390.48609275589, Loss: 2.2424633502960205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15957 , TFLOPS: 96.22692347781177, Tokens per sec: 78629.49627509943, Loss: 2.257178783416748 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15958 , TFLOPS: 96.93536938824246, Tokens per sec: 79208.38566553249, Loss: 2.2457940578460693 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15959 , TFLOPS: 96.5954718435333, Tokens per sec: 78930.64663200943, Loss: 2.2531466484069824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15960 , TFLOPS: 96.82384237441391, Tokens per sec: 79117.25407157245, Loss: 2.248441696166992 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15961 , TFLOPS: 96.8371958486929, Tokens per sec: 79128.16553915481, Loss: 2.2496488094329834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15962 , TFLOPS: 97.78277437713905, Tokens per sec: 79900.82209610495, Loss: 2.2440381050109863 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15963 , TFLOPS: 95.71782473913582, Tokens per sec: 78213.49859035989, Loss: 2.261915445327759 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15964 , TFLOPS: 97.20286470336023, Tokens per sec: 79426.96297345722, Loss: 2.2587239742279053 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15965 , TFLOPS: 97.18970287910761, Tokens per sec: 79416.20810804491, Loss: 2.275149345397949 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15966 , TFLOPS: 97.15389263262067, Tokens per sec: 79386.94663380267, Loss: 2.2510831356048584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15967 , TFLOPS: 97.76746024089269, Tokens per sec: 79888.3085211571, Loss: 2.2570340633392334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15968 , TFLOPS: 95.67929195412735, Tokens per sec: 78182.01245980735, Loss: 2.263124465942383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15969 , TFLOPS: 97.14890320346132, Tokens per sec: 79382.86964279723, Loss: 2.25986647605896 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15970 , TFLOPS: 96.54549971981012, Tokens per sec: 78889.81312331832, Loss: 2.2677359580993652 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15971 , TFLOPS: 97.24430733061328, Tokens per sec: 79460.82681101383, Loss: 2.2512149810791016 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15972 , TFLOPS: 96.5349105238817, Tokens per sec: 78881.16041873506, Loss: 2.2722768783569336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15973 , TFLOPS: 96.1271837509229, Tokens per sec: 78547.99637672954, Loss: 2.244102716445923 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15974 , TFLOPS: 97.2077708937111, Tokens per sec: 79430.9719478895, Loss: 2.274149179458618 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15975 , TFLOPS: 96.27914586246135, Tokens per sec: 78672.16852992037, Loss: 2.2653772830963135 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15976 , TFLOPS: 96.10725580722165, Tokens per sec: 78531.71274094035, Loss: 2.2395081520080566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15977 , TFLOPS: 97.42557007986316, Tokens per sec: 79608.94126954419, Loss: 2.247936964035034 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15978 , TFLOPS: 97.84232798029937, Tokens per sec: 79949.48487829338, Loss: 2.243755578994751 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15979 , TFLOPS: 97.24996722260374, Tokens per sec: 79465.45165445781, Loss: 2.2416114807128906 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15980 , TFLOPS: 97.81167455547362, Tokens per sec: 79924.4371757789, Loss: 2.2851505279541016 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15981 , TFLOPS: 98.44379386345327, Tokens per sec: 80440.95813452768, Loss: 2.2651586532592773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15982 , TFLOPS: 97.21089058462978, Tokens per sec: 79433.52112764714, Loss: 2.2550835609436035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15983 , TFLOPS: 97.29439199569957, Tokens per sec: 79501.7522801498, Loss: 2.2308411598205566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15984 , TFLOPS: 97.29137564593454, Tokens per sec: 79499.28754310905, Loss: 2.241241931915283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15985 , TFLOPS: 95.93604537065733, Tokens per sec: 78391.81228587493, Loss: 2.2619996070861816 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15986 , TFLOPS: 97.12551275222148, Tokens per sec: 79363.75670296539, Loss: 2.2735772132873535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15987 , TFLOPS: 97.73647083709182, Tokens per sec: 79862.9862815732, Loss: 2.275331497192383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15988 , TFLOPS: 96.63728172878704, Tokens per sec: 78964.81056553248, Loss: 2.2755837440490723 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15989 , TFLOPS: 96.17685635195011, Tokens per sec: 78588.58513771539, Loss: 2.257319450378418 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15990 , TFLOPS: 95.9966313834665, Tokens per sec: 78441.31867656401, Loss: 2.259047269821167 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15991 , TFLOPS: 96.77663783289671, Tokens per sec: 79078.68202554574, Loss: 2.2357468605041504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15992 , TFLOPS: 96.78082599058997, Tokens per sec: 79082.10427700967, Loss: 2.2476541996002197 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15993 , TFLOPS: 95.99885713711927, Tokens per sec: 78443.13739716969, Loss: 2.231105089187622 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15994 , TFLOPS: 97.72621525524264, Tokens per sec: 79854.60618164207, Loss: 2.267237663269043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15995 , TFLOPS: 96.25084004189608, Tokens per sec: 78649.03911527985, Loss: 2.248429536819458 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15996 , TFLOPS: 96.33776911276394, Tokens per sec: 78720.07109683934, Loss: 2.26597261428833 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15997 , TFLOPS: 97.23519596857898, Tokens per sec: 79453.38168254826, Loss: 2.24981689453125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15998 , TFLOPS: 96.09913368725665, Tokens per sec: 78525.07594763499, Loss: 2.2645070552825928 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 15999 , TFLOPS: 97.24150244000455, Tokens per sec: 79458.53486269312, Loss: 2.2607483863830566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16000 , TFLOPS: 97.42577486792516, Tokens per sec: 79609.10860714134, Loss: 2.2655653953552246 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016000 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016000/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016000/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016000/optimizer.pt +[rank0]:[2024-08-30 06:08:43,597] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.0076027150062145665, 'preprocessing_with_comm': 0.001579224001034163, 'state_converting': 2.6278631359891733, : 2.6386362290068064}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016000/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016000/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016000/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016000/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016000/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016000, took 14.87s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014000 + eval ppl=7.649644374847412, eval loss=2.0346591472625732 +------------------------------------------------------------------ +iteration: 16001 , TFLOPS: 95.51894041186016, Tokens per sec: 78050.98508680491, Loss: 2.2581286430358887 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16002 , TFLOPS: 96.69869119803441, Tokens per sec: 79014.98982367486, Loss: 2.2585625648498535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16003 , TFLOPS: 97.24081182695576, Tokens per sec: 79457.970544993, Loss: 2.2406721115112305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16004 , TFLOPS: 96.72334850848672, Tokens per sec: 79035.13794678109, Loss: 2.245359420776367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16005 , TFLOPS: 96.29383336880068, Tokens per sec: 78684.17006944028, Loss: 2.265766143798828 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16006 , TFLOPS: 97.38733380448048, Tokens per sec: 79577.69742463969, Loss: 2.270486354827881 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16007 , TFLOPS: 96.93716377211601, Tokens per sec: 79209.85190278712, Loss: 2.286911964416504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16008 , TFLOPS: 96.88025957961261, Tokens per sec: 79163.35402224837, Loss: 2.2583975791931152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16009 , TFLOPS: 97.31250614872604, Tokens per sec: 79516.55382088752, Loss: 2.2228167057037354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16010 , TFLOPS: 97.8031280762611, Tokens per sec: 79917.45362760841, Loss: 2.266462802886963 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16011 , TFLOPS: 96.98379708886814, Tokens per sec: 79247.95718635365, Loss: 2.238102436065674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16012 , TFLOPS: 97.82953669598844, Tokens per sec: 79939.03279060597, Loss: 2.2489569187164307 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16013 , TFLOPS: 96.98155235765266, Tokens per sec: 79246.12295869295, Loss: 2.286003589630127 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16014 , TFLOPS: 97.66989515135039, Tokens per sec: 79808.58557494327, Loss: 2.225843667984009 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16015 , TFLOPS: 97.77726370587101, Tokens per sec: 79896.31918475454, Loss: 2.257481813430786 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16016 , TFLOPS: 95.94090750767845, Tokens per sec: 78395.78526319742, Loss: 2.241697311401367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16017 , TFLOPS: 96.59227521690568, Tokens per sec: 78928.03458610353, Loss: 2.243682384490967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16018 , TFLOPS: 97.19336985193073, Tokens per sec: 79419.20448593496, Loss: 2.244894027709961 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16019 , TFLOPS: 97.17989459375356, Tokens per sec: 79408.19350559439, Loss: 2.274574041366577 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16020 , TFLOPS: 97.21602274129812, Tokens per sec: 79437.7147449743, Loss: 2.258288860321045 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16021 , TFLOPS: 96.06075763538134, Tokens per sec: 78493.7178877602, Loss: 2.26522159576416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16022 , TFLOPS: 96.61683882756141, Tokens per sec: 78948.10614469391, Loss: 2.242549419403076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16023 , TFLOPS: 97.40212938837874, Tokens per sec: 79589.78727711517, Loss: 2.2535042762756348 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16024 , TFLOPS: 95.68191302796039, Tokens per sec: 78184.15420670876, Loss: 2.2545855045318604 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16025 , TFLOPS: 97.74831206530558, Tokens per sec: 79872.662053967, Loss: 2.2581892013549805 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16026 , TFLOPS: 97.29539529593414, Tokens per sec: 79502.57210259873, Loss: 2.2345550060272217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16027 , TFLOPS: 96.6283218704805, Tokens per sec: 78957.48923466292, Loss: 2.2366106510162354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16028 , TFLOPS: 97.10218830400107, Tokens per sec: 79344.69769589978, Loss: 2.271411895751953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16029 , TFLOPS: 97.77825157022646, Tokens per sec: 79897.12639414927, Loss: 2.2409117221832275 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16030 , TFLOPS: 96.09257283642556, Tokens per sec: 78519.71490752957, Loss: 2.2474308013916016 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16031 , TFLOPS: 96.48226328630943, Tokens per sec: 78838.1410056543, Loss: 2.2491068840026855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16032 , TFLOPS: 97.34095340545205, Tokens per sec: 79539.79880665589, Loss: 2.2092862129211426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16033 , TFLOPS: 96.1204452861198, Tokens per sec: 78542.49020367542, Loss: 2.259713649749756 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16034 , TFLOPS: 96.57748466121163, Tokens per sec: 78915.94884230313, Loss: 2.2611935138702393 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16035 , TFLOPS: 96.5806137792012, Tokens per sec: 78918.50572516306, Loss: 2.258296012878418 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16036 , TFLOPS: 97.11062228790176, Tokens per sec: 79351.58932125509, Loss: 2.2637269496917725 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16037 , TFLOPS: 95.83659104240846, Tokens per sec: 78310.54559408058, Loss: 2.2494449615478516 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16038 , TFLOPS: 96.99924437456681, Tokens per sec: 79260.57956114656, Loss: 2.2454848289489746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16039 , TFLOPS: 96.25877174483634, Tokens per sec: 78655.52031393253, Loss: 2.2479915618896484 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16040 , TFLOPS: 96.25078794013329, Tokens per sec: 78648.99654158826, Loss: 2.263489007949829 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16041 , TFLOPS: 96.12585363262541, Tokens per sec: 78546.90950282838, Loss: 2.265573263168335 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16042 , TFLOPS: 97.70656654120644, Tokens per sec: 79838.5507116002, Loss: 2.242558002471924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16043 , TFLOPS: 96.18601278868454, Tokens per sec: 78596.06709787874, Loss: 2.258265972137451 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16044 , TFLOPS: 97.99527561326555, Tokens per sec: 80074.46232641238, Loss: 2.2450437545776367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16045 , TFLOPS: 97.26696617819744, Tokens per sec: 79479.34193866547, Loss: 2.2545835971832275 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16046 , TFLOPS: 97.30720132653444, Tokens per sec: 79512.21911411648, Loss: 2.2352757453918457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16047 , TFLOPS: 98.48091460275378, Tokens per sec: 80471.29044618297, Loss: 2.2674312591552734 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16048 , TFLOPS: 97.10074436940124, Tokens per sec: 79343.51781976807, Loss: 2.2576591968536377 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16049 , TFLOPS: 97.2049740305024, Tokens per sec: 79428.68655896401, Loss: 2.2324137687683105 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16050 , TFLOPS: 97.8021308805292, Tokens per sec: 79916.63879330577, Loss: 2.2540745735168457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16051 , TFLOPS: 97.80949875283106, Tokens per sec: 79922.65927142951, Loss: 2.2556159496307373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16052 , TFLOPS: 95.67242656884183, Tokens per sec: 78176.4025767598, Loss: 2.2673115730285645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16053 , TFLOPS: 97.844673595148, Tokens per sec: 79951.40154056769, Loss: 2.2471418380737305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16054 , TFLOPS: 96.58453499726014, Tokens per sec: 78921.70985338016, Loss: 2.2461278438568115 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16055 , TFLOPS: 97.90800065241379, Tokens per sec: 80003.14770924304, Loss: 2.2729756832122803 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16056 , TFLOPS: 96.5368600818453, Tokens per sec: 78882.7534527332, Loss: 2.2546253204345703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16057 , TFLOPS: 96.63649254313891, Tokens per sec: 78964.16570162395, Loss: 2.2350544929504395 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16058 , TFLOPS: 97.23641762832251, Tokens per sec: 79454.37993217303, Loss: 2.2437186241149902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16059 , TFLOPS: 96.73510240218904, Tokens per sec: 79044.7423558974, Loss: 2.272423267364502 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16060 , TFLOPS: 96.45774306096338, Tokens per sec: 78818.10489831728, Loss: 2.2705132961273193 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16061 , TFLOPS: 96.67061623246671, Tokens per sec: 78992.0490465956, Loss: 2.239621162414551 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16062 , TFLOPS: 96.77593412067063, Tokens per sec: 79078.10700417064, Loss: 2.2708637714385986 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16063 , TFLOPS: 97.68564727312048, Tokens per sec: 79821.4570391373, Loss: 2.255619764328003 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16064 , TFLOPS: 96.70816870610712, Tokens per sec: 79022.73413949383, Loss: 2.2503037452697754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16065 , TFLOPS: 97.24270039441829, Tokens per sec: 79459.5137420827, Loss: 2.2397165298461914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16066 , TFLOPS: 97.65409749983138, Tokens per sec: 79795.67690721936, Loss: 2.2617998123168945 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16067 , TFLOPS: 97.02291209881027, Tokens per sec: 79279.9190678873, Loss: 2.2464470863342285 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16068 , TFLOPS: 96.82985252921556, Tokens per sec: 79122.16512377601, Loss: 2.269345760345459 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16069 , TFLOPS: 97.25923670845035, Tokens per sec: 79473.02598996086, Loss: 2.238401412963867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16070 , TFLOPS: 97.6709375813282, Tokens per sec: 79809.43737131267, Loss: 2.263436794281006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16071 , TFLOPS: 96.0776037683333, Tokens per sec: 78507.4832966533, Loss: 2.2592499256134033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16072 , TFLOPS: 97.68332363472243, Tokens per sec: 79819.55833438647, Loss: 2.267789840698242 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16073 , TFLOPS: 95.81833930053992, Tokens per sec: 78295.63162595818, Loss: 2.2506086826324463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16074 , TFLOPS: 98.36008486817605, Tokens per sec: 80372.55735962543, Loss: 2.2450785636901855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16075 , TFLOPS: 95.98026108722784, Tokens per sec: 78427.9420860985, Loss: 2.2153372764587402 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16076 , TFLOPS: 97.75968486868352, Tokens per sec: 79881.95506436912, Loss: 2.2542412281036377 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16077 , TFLOPS: 97.02278795457546, Tokens per sec: 79279.81762643729, Loss: 2.2856554985046387 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16078 , TFLOPS: 97.80268335449024, Tokens per sec: 79917.09023400127, Loss: 2.224611520767212 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16079 , TFLOPS: 95.98853475567604, Tokens per sec: 78434.70271357069, Loss: 2.2412912845611572 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16080 , TFLOPS: 97.13839171858329, Tokens per sec: 79374.28043791371, Loss: 2.252077102661133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16081 , TFLOPS: 96.08861902393738, Tokens per sec: 78516.48414556531, Loss: 2.257647752761841 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16082 , TFLOPS: 97.95382869687086, Tokens per sec: 80040.59498408774, Loss: 2.2553815841674805 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16083 , TFLOPS: 96.63367214757871, Tokens per sec: 78961.86108380825, Loss: 2.252700090408325 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16084 , TFLOPS: 96.065706842738, Tokens per sec: 78497.76201249527, Loss: 2.2720799446105957 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16085 , TFLOPS: 98.41305875900525, Tokens per sec: 80415.84368949123, Loss: 2.2597768306732178 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16086 , TFLOPS: 97.87338075934876, Tokens per sec: 79974.85890342428, Loss: 2.256436824798584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16087 , TFLOPS: 97.2167868745213, Tokens per sec: 79438.33913790151, Loss: 2.266827344894409 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16088 , TFLOPS: 97.17917114162012, Tokens per sec: 79407.60235423285, Loss: 2.2650153636932373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16089 , TFLOPS: 97.71346614423916, Tokens per sec: 79844.18855484982, Loss: 2.2547006607055664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16090 , TFLOPS: 97.14993986166417, Tokens per sec: 79383.71672290075, Loss: 2.261385202407837 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16091 , TFLOPS: 96.70256545205932, Tokens per sec: 79018.15557637079, Loss: 2.2352380752563477 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16092 , TFLOPS: 96.65906612216085, Tokens per sec: 78982.61115413837, Loss: 2.2599782943725586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16093 , TFLOPS: 96.5301431430516, Tokens per sec: 78877.26486913547, Loss: 2.253633499145508 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16094 , TFLOPS: 97.15198900778107, Tokens per sec: 79385.39113294258, Loss: 2.244347095489502 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16095 , TFLOPS: 96.08715882105973, Tokens per sec: 78515.290976205, Loss: 2.267706871032715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16096 , TFLOPS: 97.37871172072518, Tokens per sec: 79570.65209805107, Loss: 2.2419071197509766 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16097 , TFLOPS: 96.52757677129355, Tokens per sec: 78875.16782070769, Loss: 2.277200698852539 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16098 , TFLOPS: 96.6192314084471, Tokens per sec: 78950.06118412611, Loss: 2.254450798034668 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16099 , TFLOPS: 96.82309298283921, Tokens per sec: 79116.6417244257, Loss: 2.259582996368408 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16100 , TFLOPS: 96.1540829137537, Tokens per sec: 78569.97637512465, Loss: 2.258082628250122 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16101 , TFLOPS: 97.84962855835484, Tokens per sec: 79955.4503685561, Loss: 2.2284398078918457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16102 , TFLOPS: 96.65518067874044, Tokens per sec: 78979.43625829935, Loss: 2.2602853775024414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16103 , TFLOPS: 96.76621852180605, Tokens per sec: 79070.16813823664, Loss: 2.262430191040039 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16104 , TFLOPS: 97.71761443207274, Tokens per sec: 79847.57822762505, Loss: 2.245723247528076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16105 , TFLOPS: 97.09532355337272, Tokens per sec: 79339.08833144695, Loss: 2.267357349395752 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16106 , TFLOPS: 97.76940892831213, Tokens per sec: 79889.9008438112, Loss: 2.2680490016937256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16107 , TFLOPS: 97.86858098477981, Tokens per sec: 79970.9368840668, Loss: 2.2554566860198975 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16108 , TFLOPS: 97.09994016782132, Tokens per sec: 79342.86068595512, Loss: 2.2665395736694336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16109 , TFLOPS: 97.33266593989134, Tokens per sec: 79533.02690520788, Loss: 2.245393991470337 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16110 , TFLOPS: 97.183964241905, Tokens per sec: 79411.518919861, Loss: 2.247131824493408 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16111 , TFLOPS: 96.01101438067434, Tokens per sec: 78453.07139383382, Loss: 2.2464332580566406 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16112 , TFLOPS: 96.4904714021408, Tokens per sec: 78844.8480683958, Loss: 2.259434223175049 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16113 , TFLOPS: 96.73223474873129, Tokens per sec: 79042.3991224371, Loss: 2.2374753952026367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16114 , TFLOPS: 97.03924172435023, Tokens per sec: 79293.26242527796, Loss: 2.25232195854187 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16115 , TFLOPS: 96.86463638527565, Tokens per sec: 79150.58790797883, Loss: 2.2339563369750977 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16116 , TFLOPS: 96.06131669138541, Tokens per sec: 78494.17470681251, Loss: 2.2553372383117676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16117 , TFLOPS: 97.29281074114246, Tokens per sec: 79500.46019634607, Loss: 2.242281436920166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16118 , TFLOPS: 96.73298220998852, Tokens per sec: 79043.00989227174, Loss: 2.265320062637329 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16119 , TFLOPS: 97.1269480229919, Tokens per sec: 79364.92949965906, Loss: 2.2399609088897705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16120 , TFLOPS: 97.82554143421665, Tokens per sec: 79935.76815936505, Loss: 2.237342357635498 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16121 , TFLOPS: 97.35673888616108, Tokens per sec: 79552.69752929766, Loss: 2.2488651275634766 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16122 , TFLOPS: 97.19365206176087, Tokens per sec: 79419.4350868519, Loss: 2.2589831352233887 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16123 , TFLOPS: 97.70320985430206, Tokens per sec: 79835.80787632182, Loss: 2.230877637863159 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16124 , TFLOPS: 97.14042934058604, Tokens per sec: 79375.94543130521, Loss: 2.2463393211364746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16125 , TFLOPS: 96.6230138296113, Tokens per sec: 78953.15189782756, Loss: 2.270214557647705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16126 , TFLOPS: 97.3101060558975, Tokens per sec: 79514.59264325343, Loss: 2.2628893852233887 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16127 , TFLOPS: 97.30853453999423, Tokens per sec: 79513.30851715445, Loss: 2.2464115619659424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16128 , TFLOPS: 96.4093823847812, Tokens per sec: 78778.58814489425, Loss: 2.2505288124084473 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16129 , TFLOPS: 96.21550750850446, Tokens per sec: 78620.16799270523, Loss: 2.2679085731506348 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16130 , TFLOPS: 97.17404228688625, Tokens per sec: 79403.41143500133, Loss: 2.2452125549316406 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16131 , TFLOPS: 96.64991882366661, Tokens per sec: 78975.13666106632, Loss: 2.2540879249572754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16132 , TFLOPS: 96.64518814472216, Tokens per sec: 78971.2711015223, Loss: 2.247513771057129 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16133 , TFLOPS: 95.71879474905317, Tokens per sec: 78214.29121043364, Loss: 2.2422122955322266 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16134 , TFLOPS: 97.40204419684845, Tokens per sec: 79589.71766492275, Loss: 2.254897117614746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16135 , TFLOPS: 95.47751398516282, Tokens per sec: 78017.13448713941, Loss: 2.240647077560425 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16136 , TFLOPS: 96.78492293361138, Tokens per sec: 79085.45199461716, Loss: 2.2539169788360596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16137 , TFLOPS: 96.25131151546475, Tokens per sec: 78649.42436846963, Loss: 2.2663674354553223 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16138 , TFLOPS: 96.68861406724746, Tokens per sec: 79006.75554070041, Loss: 2.2420918941497803 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16139 , TFLOPS: 97.21006704826645, Tokens per sec: 79432.84819488494, Loss: 2.2549448013305664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16140 , TFLOPS: 96.76311509502192, Tokens per sec: 79067.632248322, Loss: 2.2405033111572266 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16141 , TFLOPS: 97.26213982396764, Tokens per sec: 79475.3982003829, Loss: 2.27335524559021 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16142 , TFLOPS: 97.42069583548158, Tokens per sec: 79604.95839898578, Loss: 2.2437121868133545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16143 , TFLOPS: 97.11230747385873, Tokens per sec: 79352.96633008108, Loss: 2.2544331550598145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16144 , TFLOPS: 97.04828426495513, Tokens per sec: 79300.65131798193, Loss: 2.261810779571533 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16145 , TFLOPS: 97.29652698216309, Tokens per sec: 79503.4968325486, Loss: 2.255100965499878 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16146 , TFLOPS: 97.71746666620994, Tokens per sec: 79847.45748433474, Loss: 2.2500739097595215 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16147 , TFLOPS: 96.68306332667156, Tokens per sec: 79002.21988768708, Loss: 2.2618768215179443 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16148 , TFLOPS: 97.80126174353026, Tokens per sec: 79915.92859909036, Loss: 2.2412073612213135 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16149 , TFLOPS: 96.44020177683481, Tokens per sec: 78803.771463503, Loss: 2.2231357097625732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16150 , TFLOPS: 97.15655532120589, Tokens per sec: 79389.12238518936, Loss: 2.2906782627105713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16151 , TFLOPS: 96.53984944680337, Tokens per sec: 78885.19613979354, Loss: 2.2489850521087646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16152 , TFLOPS: 97.70816716600581, Tokens per sec: 79839.85862332751, Loss: 2.2568018436431885 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16153 , TFLOPS: 97.75095858974105, Tokens per sec: 79874.82459721081, Loss: 2.23891544342041 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16154 , TFLOPS: 96.17807885279656, Tokens per sec: 78589.58407462698, Loss: 2.2604103088378906 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16155 , TFLOPS: 97.26565496184449, Tokens per sec: 79478.27051002976, Loss: 2.2453927993774414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16156 , TFLOPS: 97.09427824351052, Tokens per sec: 79338.23418184988, Loss: 2.237635374069214 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16157 , TFLOPS: 96.06901991970012, Tokens per sec: 78500.4692129672, Loss: 2.2812654972076416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16158 , TFLOPS: 97.30025562412104, Tokens per sec: 79506.54360187621, Loss: 2.2147135734558105 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16159 , TFLOPS: 96.6797265948898, Tokens per sec: 78999.4933582531, Loss: 2.26366925239563 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16160 , TFLOPS: 97.80790454174449, Tokens per sec: 79921.35660051204, Loss: 2.243269443511963 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16161 , TFLOPS: 96.45642073953366, Tokens per sec: 78817.02439543746, Loss: 2.251861572265625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16162 , TFLOPS: 97.10896254445043, Tokens per sec: 79350.23310215541, Loss: 2.2384355068206787 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16163 , TFLOPS: 95.97071735792491, Tokens per sec: 78420.14365920762, Loss: 2.2557268142700195 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16164 , TFLOPS: 97.78769968528759, Tokens per sec: 79904.84669218185, Loss: 2.2569382190704346 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16165 , TFLOPS: 96.77524889146733, Tokens per sec: 79077.54708574925, Loss: 2.248897075653076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16166 , TFLOPS: 97.17348423625346, Tokens per sec: 79402.95543746377, Loss: 2.2642171382904053 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16167 , TFLOPS: 96.19345264074886, Tokens per sec: 78602.14639251932, Loss: 2.240901231765747 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16168 , TFLOPS: 97.1316020957332, Tokens per sec: 79368.73246230246, Loss: 2.2668395042419434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16169 , TFLOPS: 96.7326848010832, Tokens per sec: 79042.76687179922, Loss: 2.2701427936553955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16170 , TFLOPS: 93.22840544269782, Tokens per sec: 76179.3299998867, Loss: 2.2643938064575195 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16171 , TFLOPS: 95.52507548862043, Tokens per sec: 78055.99821595664, Loss: 2.237886667251587 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16172 , TFLOPS: 97.00934187029455, Tokens per sec: 79268.83048483834, Loss: 2.250430107116699 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16173 , TFLOPS: 96.68734366620595, Tokens per sec: 79005.71746330637, Loss: 2.2475345134735107 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16174 , TFLOPS: 95.65510214877705, Tokens per sec: 78162.24634715462, Loss: 2.241464376449585 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16175 , TFLOPS: 96.72193624057067, Tokens per sec: 79033.98394631187, Loss: 2.2621262073516846 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16176 , TFLOPS: 96.85932273695056, Tokens per sec: 79146.24598914692, Loss: 2.2370004653930664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16177 , TFLOPS: 98.41773316900263, Tokens per sec: 80419.66327023011, Loss: 2.26159405708313 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16178 , TFLOPS: 96.65657982621198, Tokens per sec: 78980.5795377156, Loss: 2.2325687408447266 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16179 , TFLOPS: 97.3481094818232, Tokens per sec: 79545.64622087254, Loss: 2.2353875637054443 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16180 , TFLOPS: 97.84919969873849, Tokens per sec: 79955.09993632375, Loss: 2.2452194690704346 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16181 , TFLOPS: 97.11256702842272, Tokens per sec: 79353.17841879684, Loss: 2.253150463104248 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16182 , TFLOPS: 96.57956823475003, Tokens per sec: 78917.65138387731, Loss: 2.2541847229003906 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16183 , TFLOPS: 97.30906696157848, Tokens per sec: 79513.7435725367, Loss: 2.234900951385498 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16184 , TFLOPS: 97.71910195777792, Tokens per sec: 79848.79372317108, Loss: 2.234553813934326 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16185 , TFLOPS: 96.67386505429849, Tokens per sec: 78994.70374254645, Loss: 2.271162509918213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16186 , TFLOPS: 97.36228342906058, Tokens per sec: 79557.22811803008, Loss: 2.25589919090271 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16187 , TFLOPS: 96.62449312060728, Tokens per sec: 78954.36066458073, Loss: 2.257631778717041 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16188 , TFLOPS: 97.18516252025543, Tokens per sec: 79412.49806394755, Loss: 2.2561938762664795 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16189 , TFLOPS: 97.06894886006289, Tokens per sec: 79317.53689059847, Loss: 2.2447237968444824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16190 , TFLOPS: 97.7014097026255, Tokens per sec: 79834.33692604688, Loss: 2.249858856201172 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16191 , TFLOPS: 96.53326948411119, Tokens per sec: 78879.81948289445, Loss: 2.2557122707366943 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16192 , TFLOPS: 96.23423360211781, Tokens per sec: 78635.46956584923, Loss: 2.249133348464966 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16193 , TFLOPS: 97.79563945945614, Tokens per sec: 79911.33448604292, Loss: 2.2449300289154053 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16194 , TFLOPS: 96.00514854716332, Tokens per sec: 78448.27827027199, Loss: 2.254053831100464 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16195 , TFLOPS: 97.33392207165667, Tokens per sec: 79534.05332280905, Loss: 2.2509517669677734 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16196 , TFLOPS: 96.17402527182695, Tokens per sec: 78586.27178926827, Loss: 2.254701852798462 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16197 , TFLOPS: 97.84909095043388, Tokens per sec: 79955.01107528461, Loss: 2.2550923824310303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16198 , TFLOPS: 97.31885872278755, Tokens per sec: 79521.744672683, Loss: 2.2675981521606445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16199 , TFLOPS: 96.64130762111222, Tokens per sec: 78968.10022578706, Loss: 2.2484121322631836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16200 , TFLOPS: 97.81188032656081, Tokens per sec: 79924.60531663126, Loss: 2.234489917755127 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016200 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016200/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016200/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016200/optimizer.pt +[rank0]:[2024-08-30 07:04:16,484] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007677376997889951, 'preprocessing_with_comm': 0.0016157079953700304, 'state_converting': 2.67097438199562, : 2.6818957750074333}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016200/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016200/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016200/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016200/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016200/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016200, took 15.14s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014200 + eval ppl=7.081910610198975, eval loss=1.9575437307357788 +------------------------------------------------------------------ +iteration: 16201 , TFLOPS: 95.87380296916463, Tokens per sec: 78340.95241735327, Loss: 2.252108097076416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16202 , TFLOPS: 96.90145352836393, Tokens per sec: 79180.67214335383, Loss: 2.2491700649261475 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16203 , TFLOPS: 95.99437057518112, Tokens per sec: 78439.47131191565, Loss: 2.2582411766052246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16204 , TFLOPS: 97.21500415500023, Tokens per sec: 79436.88243188955, Loss: 2.2522823810577393 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16205 , TFLOPS: 97.85368245404925, Tokens per sec: 79958.76291108456, Loss: 2.2558507919311523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16206 , TFLOPS: 96.50060252755516, Tokens per sec: 78853.12647177024, Loss: 2.248995065689087 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16207 , TFLOPS: 98.3792055691795, Tokens per sec: 80388.18137662597, Loss: 2.227858304977417 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16208 , TFLOPS: 96.5507112944692, Tokens per sec: 78894.07163513012, Loss: 2.2601284980773926 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16209 , TFLOPS: 96.56129022927792, Tokens per sec: 78902.71595508834, Loss: 2.2701399326324463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16210 , TFLOPS: 97.68649415537476, Tokens per sec: 79822.14904842795, Loss: 2.272763967514038 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16211 , TFLOPS: 96.64202462267424, Tokens per sec: 78968.68610622064, Loss: 2.2737154960632324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16212 , TFLOPS: 97.74521275554028, Tokens per sec: 79870.12952817444, Loss: 2.24177885055542 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16213 , TFLOPS: 95.70827249374564, Tokens per sec: 78205.6932047549, Loss: 2.2280774116516113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16214 , TFLOPS: 98.39919165729923, Tokens per sec: 80404.51252371627, Loss: 2.2560982704162598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16215 , TFLOPS: 97.08932372583698, Tokens per sec: 79334.1857179182, Loss: 2.266058921813965 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16216 , TFLOPS: 96.55656045889798, Tokens per sec: 78898.85113795577, Loss: 2.2406022548675537 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16217 , TFLOPS: 96.82913510282735, Tokens per sec: 79121.578896206, Loss: 2.2708663940429688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16218 , TFLOPS: 97.83827058990198, Tokens per sec: 79946.16948015316, Loss: 2.2463793754577637 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16219 , TFLOPS: 97.23118922808065, Tokens per sec: 79450.10767174441, Loss: 2.2596001625061035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16220 , TFLOPS: 97.23163124657577, Tokens per sec: 79450.46885643534, Loss: 2.2699637413024902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16221 , TFLOPS: 97.29302421563573, Tokens per sec: 79500.63463184985, Loss: 2.254254102706909 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16222 , TFLOPS: 95.92731420944845, Tokens per sec: 78384.67782929106, Loss: 2.2705252170562744 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16223 , TFLOPS: 97.88130848546612, Tokens per sec: 79981.33685251261, Loss: 2.27248215675354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16224 , TFLOPS: 96.79382680556915, Tokens per sec: 79092.72757759992, Loss: 2.2653191089630127 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16225 , TFLOPS: 97.81049115348698, Tokens per sec: 79923.47018755213, Loss: 2.24301815032959 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16226 , TFLOPS: 96.19150387546699, Tokens per sec: 78600.55400624183, Loss: 2.2797257900238037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16227 , TFLOPS: 97.80287172778307, Tokens per sec: 79917.244158668, Loss: 2.264484405517578 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16228 , TFLOPS: 95.4287187184134, Tokens per sec: 77977.26261857655, Loss: 2.2616891860961914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16229 , TFLOPS: 96.60948278654911, Tokens per sec: 78942.09533422142, Loss: 2.2515177726745605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16230 , TFLOPS: 96.21642388021166, Tokens per sec: 78620.9167836167, Loss: 2.2696807384490967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16231 , TFLOPS: 97.46629813867945, Tokens per sec: 79642.22121483648, Loss: 2.2454965114593506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16232 , TFLOPS: 96.26247430856435, Tokens per sec: 78658.54577406717, Loss: 2.26332426071167 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16233 , TFLOPS: 96.55651929052442, Tokens per sec: 78898.81749821796, Loss: 2.2640395164489746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16234 , TFLOPS: 96.16658506070772, Tokens per sec: 78580.19220123468, Loss: 2.2459535598754883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16235 , TFLOPS: 96.96168441931161, Tokens per sec: 79229.88835482826, Loss: 2.214775562286377 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16236 , TFLOPS: 97.30608937723686, Tokens per sec: 79511.31051172131, Loss: 2.2648420333862305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16237 , TFLOPS: 96.2935089535055, Tokens per sec: 78683.9049813514, Loss: 2.2523269653320312 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16238 , TFLOPS: 97.37049543820348, Tokens per sec: 79563.93836209664, Loss: 2.2594168186187744 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16239 , TFLOPS: 96.22902141153727, Tokens per sec: 78631.21055075216, Loss: 2.2521770000457764 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16240 , TFLOPS: 97.08659648247101, Tokens per sec: 79331.95721715837, Loss: 2.241922378540039 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16241 , TFLOPS: 96.11072197431446, Tokens per sec: 78534.54503529896, Loss: 2.2631609439849854 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16242 , TFLOPS: 97.80801315083463, Tokens per sec: 79921.44534779544, Loss: 2.265526294708252 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16243 , TFLOPS: 97.23513754332701, Tokens per sec: 79453.33394177089, Loss: 2.263874053955078 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16244 , TFLOPS: 96.62655767281079, Tokens per sec: 78956.04766333252, Loss: 2.2507483959198 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16245 , TFLOPS: 98.44195124777283, Tokens per sec: 80439.45248582204, Loss: 2.25809645652771 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16246 , TFLOPS: 96.6008274569993, Tokens per sec: 78935.02284163827, Loss: 2.274568796157837 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16247 , TFLOPS: 96.59416969653087, Tokens per sec: 78929.5826141735, Loss: 2.2412428855895996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16248 , TFLOPS: 97.64970922399986, Tokens per sec: 79792.09113407314, Loss: 2.2533535957336426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16249 , TFLOPS: 96.66906214332555, Tokens per sec: 78990.77916035253, Loss: 2.2598910331726074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16250 , TFLOPS: 97.79111880961536, Tokens per sec: 79907.64054668615, Loss: 2.240638494491577 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16251 , TFLOPS: 96.8294347927096, Tokens per sec: 79121.82378052351, Loss: 2.254648447036743 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16252 , TFLOPS: 97.74941779744894, Tokens per sec: 79873.5655761654, Loss: 2.2572011947631836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16253 , TFLOPS: 96.56342473177476, Tokens per sec: 78904.4601120255, Loss: 2.238536834716797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16254 , TFLOPS: 96.53851874381623, Tokens per sec: 78884.10878812741, Loss: 2.250556230545044 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16255 , TFLOPS: 96.14558152303964, Tokens per sec: 78563.02966992692, Loss: 2.236288547515869 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16256 , TFLOPS: 97.93349878528396, Tokens per sec: 80023.98288999159, Loss: 2.2557051181793213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16257 , TFLOPS: 97.18875108506387, Tokens per sec: 79415.4303726304, Loss: 2.2525837421417236 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16258 , TFLOPS: 97.32587036479114, Tokens per sec: 79527.47406585985, Loss: 2.248007297515869 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16259 , TFLOPS: 97.2414932175342, Tokens per sec: 79458.52732677519, Loss: 2.247410535812378 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16260 , TFLOPS: 96.57323613277363, Tokens per sec: 78912.4772603496, Loss: 2.236853837966919 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16261 , TFLOPS: 97.8777289653607, Tokens per sec: 79978.41193448943, Loss: 2.273630380630493 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16262 , TFLOPS: 96.85224786183043, Tokens per sec: 79140.46492656278, Loss: 2.247504711151123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16263 , TFLOPS: 97.82923610036148, Tokens per sec: 79938.78716618117, Loss: 2.257547378540039 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16264 , TFLOPS: 95.37710641017638, Tokens per sec: 77935.08887289706, Loss: 2.2525362968444824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16265 , TFLOPS: 97.73496192477876, Tokens per sec: 79861.75331047934, Loss: 2.2620513439178467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16266 , TFLOPS: 95.57476591328289, Tokens per sec: 78096.60154109365, Loss: 2.2587010860443115 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16267 , TFLOPS: 97.1876195574749, Tokens per sec: 79414.50577230935, Loss: 2.256518840789795 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16268 , TFLOPS: 96.21835565969229, Tokens per sec: 78622.49529035875, Loss: 2.2544784545898438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16269 , TFLOPS: 96.7512862698227, Tokens per sec: 79057.96661075068, Loss: 2.244983196258545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16270 , TFLOPS: 96.23931778394584, Tokens per sec: 78639.6239816994, Loss: 2.239866256713867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16271 , TFLOPS: 97.22762640719716, Tokens per sec: 79447.19639908535, Loss: 2.251725673675537 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16272 , TFLOPS: 95.51359947130783, Tokens per sec: 78046.62086679155, Loss: 2.2497429847717285 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16273 , TFLOPS: 97.05463377412524, Tokens per sec: 79305.83966537574, Loss: 2.262209415435791 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16274 , TFLOPS: 97.35072250374097, Tokens per sec: 79547.78138834672, Loss: 2.236543655395508 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16275 , TFLOPS: 96.79288756914704, Tokens per sec: 79091.9601033413, Loss: 2.2504355907440186 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16276 , TFLOPS: 96.60521880628663, Tokens per sec: 78938.61112618433, Loss: 2.2409236431121826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16277 , TFLOPS: 96.45892484772537, Tokens per sec: 78819.0705667024, Loss: 2.2312862873077393 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16278 , TFLOPS: 97.07350882602603, Tokens per sec: 79321.26295617092, Loss: 2.260824203491211 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16279 , TFLOPS: 96.10318590413814, Tokens per sec: 78528.3871183622, Loss: 2.259080171585083 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16280 , TFLOPS: 97.90380248555958, Tokens per sec: 79999.71727903605, Loss: 2.2454311847686768 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16281 , TFLOPS: 97.31322679819499, Tokens per sec: 79517.14268211978, Loss: 2.260089159011841 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16282 , TFLOPS: 97.1087587453427, Tokens per sec: 79350.06657265824, Loss: 2.2452781200408936 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16283 , TFLOPS: 97.7612119549685, Tokens per sec: 79883.20288588328, Loss: 2.2550809383392334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16284 , TFLOPS: 96.85423851243694, Tokens per sec: 79142.09153841727, Loss: 2.239597797393799 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16285 , TFLOPS: 95.88923078068814, Tokens per sec: 78353.55887929634, Loss: 2.2483389377593994 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16286 , TFLOPS: 98.42885710067881, Tokens per sec: 80428.75292115819, Loss: 2.24166202545166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16287 , TFLOPS: 96.12195170171712, Tokens per sec: 78543.72113463857, Loss: 2.2594316005706787 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16288 , TFLOPS: 98.39541621427217, Tokens per sec: 80401.42751203032, Loss: 2.2512447834014893 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16289 , TFLOPS: 96.1584494663974, Tokens per sec: 78573.54439769487, Loss: 2.240365505218506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16290 , TFLOPS: 97.82112400582594, Tokens per sec: 79932.15856491227, Loss: 2.241792678833008 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16291 , TFLOPS: 97.12591275431075, Tokens per sec: 79364.0835549695, Loss: 2.2672314643859863 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16292 , TFLOPS: 96.69375923778803, Tokens per sec: 79010.9597919976, Loss: 2.238281488418579 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16293 , TFLOPS: 96.80007547535565, Tokens per sec: 79097.83352653754, Loss: 2.260796546936035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16294 , TFLOPS: 97.95615598354799, Tokens per sec: 80042.4966699413, Loss: 2.245666742324829 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16295 , TFLOPS: 97.22497552282687, Tokens per sec: 79445.03029322655, Loss: 2.254574775695801 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16296 , TFLOPS: 97.24081277503642, Tokens per sec: 79457.9713196941, Loss: 2.2525362968444824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16297 , TFLOPS: 97.28519858452195, Tokens per sec: 79494.24010721713, Loss: 2.2620038986206055 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16298 , TFLOPS: 96.52440433669447, Tokens per sec: 78872.57554273107, Loss: 2.2576708793640137 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16299 , TFLOPS: 97.17412938550703, Tokens per sec: 79403.48260552649, Loss: 2.256333827972412 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16300 , TFLOPS: 96.79544434159932, Tokens per sec: 79094.04930792925, Loss: 2.2524659633636475 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16301 , TFLOPS: 97.78625827333276, Tokens per sec: 79903.66887736831, Loss: 2.2560229301452637 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16302 , TFLOPS: 96.03296791269344, Tokens per sec: 78471.01018998082, Loss: 2.2766621112823486 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16303 , TFLOPS: 97.80362253525952, Tokens per sec: 79917.85766278452, Loss: 2.262155771255493 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16304 , TFLOPS: 95.37886170853513, Tokens per sec: 77936.5231723714, Loss: 2.270979881286621 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16305 , TFLOPS: 96.50923307207023, Tokens per sec: 78860.17871186348, Loss: 2.2568061351776123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16306 , TFLOPS: 96.30185666191463, Tokens per sec: 78690.72610878164, Loss: 2.2382731437683105 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16307 , TFLOPS: 97.39249846199978, Tokens per sec: 79581.91759924883, Loss: 2.242887496948242 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16308 , TFLOPS: 96.14278625519829, Tokens per sec: 78560.74558461717, Loss: 2.2546541690826416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16309 , TFLOPS: 97.3314252221131, Tokens per sec: 79532.01308277236, Loss: 2.2652292251586914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16310 , TFLOPS: 95.51432574634714, Tokens per sec: 78047.21432482215, Loss: 2.2424256801605225 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16311 , TFLOPS: 97.27060362136795, Tokens per sec: 79482.31418711621, Loss: 2.2411069869995117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16312 , TFLOPS: 97.34723232421476, Tokens per sec: 79544.92947281063, Loss: 2.2683708667755127 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16313 , TFLOPS: 96.571306828569, Tokens per sec: 78910.90077621929, Loss: 2.2628767490386963 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16314 , TFLOPS: 96.80188002767554, Tokens per sec: 79099.30807269142, Loss: 2.2350752353668213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16315 , TFLOPS: 96.3554674678246, Tokens per sec: 78734.5328783556, Loss: 2.2743473052978516 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16316 , TFLOPS: 97.25836720802879, Tokens per sec: 79472.31549878347, Loss: 2.252002239227295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16317 , TFLOPS: 95.97993655524543, Tokens per sec: 78427.67690266146, Loss: 2.2601234912872314 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16318 , TFLOPS: 97.78591769722911, Tokens per sec: 79903.3905838668, Loss: 2.2629804611206055 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16319 , TFLOPS: 97.9063034037495, Tokens per sec: 80001.7608436684, Loss: 2.260800361633301 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16320 , TFLOPS: 96.06287699750919, Tokens per sec: 78495.4496731121, Loss: 2.267848491668701 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16321 , TFLOPS: 98.39855952431714, Tokens per sec: 80403.99599158407, Loss: 2.256007194519043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16322 , TFLOPS: 97.26908216559885, Tokens per sec: 79481.07096644152, Loss: 2.242666482925415 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16323 , TFLOPS: 96.524880165382, Tokens per sec: 78872.96435460055, Loss: 2.25007700920105 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16324 , TFLOPS: 97.8253772733122, Tokens per sec: 79935.63401926415, Loss: 2.2424397468566895 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16325 , TFLOPS: 96.02232700291947, Tokens per sec: 78462.31522868312, Loss: 2.2542388439178467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16326 , TFLOPS: 98.42209452438685, Tokens per sec: 80423.22704598583, Loss: 2.2568652629852295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16327 , TFLOPS: 95.46509738820411, Tokens per sec: 78006.98856613282, Loss: 2.237793445587158 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16328 , TFLOPS: 97.87225545088604, Tokens per sec: 79973.93938491138, Loss: 2.2510015964508057 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16329 , TFLOPS: 97.17349727449377, Tokens per sec: 79402.96609134556, Loss: 2.2506330013275146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16330 , TFLOPS: 95.97634538568838, Tokens per sec: 78424.74246557147, Loss: 2.2544262409210205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16331 , TFLOPS: 96.60313591587025, Tokens per sec: 78936.90914280673, Loss: 2.2539875507354736 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16332 , TFLOPS: 96.6695661020021, Tokens per sec: 78991.19095796019, Loss: 2.2626352310180664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16333 , TFLOPS: 97.63423849070655, Tokens per sec: 79779.44959964708, Loss: 2.2492294311523438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16334 , TFLOPS: 96.48188571753784, Tokens per sec: 78837.83248449153, Loss: 2.2540442943573 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16335 , TFLOPS: 96.61105223716511, Tokens per sec: 78943.37777272101, Loss: 2.258573055267334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16336 , TFLOPS: 96.50959533178954, Tokens per sec: 78860.47472360554, Loss: 2.2650277614593506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16337 , TFLOPS: 97.90076355513166, Tokens per sec: 79997.23409075435, Loss: 2.2468173503875732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16338 , TFLOPS: 96.7627971375403, Tokens per sec: 79067.37243707881, Loss: 2.251487970352173 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16339 , TFLOPS: 97.36773325712832, Tokens per sec: 79561.68131283527, Loss: 2.251903772354126 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16340 , TFLOPS: 95.95900175180755, Tokens per sec: 78410.57053586259, Loss: 2.2706494331359863 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16341 , TFLOPS: 97.01558502025402, Tokens per sec: 79273.93192338322, Loss: 2.248992681503296 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16342 , TFLOPS: 96.03160624879266, Tokens per sec: 78469.89753935514, Loss: 2.261502742767334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16343 , TFLOPS: 97.75683140891742, Tokens per sec: 79879.62342893964, Loss: 2.2514681816101074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16344 , TFLOPS: 95.58757002926167, Tokens per sec: 78107.0641138672, Loss: 2.2798612117767334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16345 , TFLOPS: 97.39629027055432, Tokens per sec: 79585.01598362857, Loss: 2.245623826980591 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16346 , TFLOPS: 97.42721028919182, Tokens per sec: 79610.28152680937, Loss: 2.2405078411102295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16347 , TFLOPS: 96.11669157978307, Tokens per sec: 78539.42295359868, Loss: 2.2622580528259277 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16348 , TFLOPS: 96.19349356929257, Tokens per sec: 78602.17983628598, Loss: 2.2493224143981934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16349 , TFLOPS: 97.15456034270005, Tokens per sec: 79387.49223689693, Loss: 2.2433903217315674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16350 , TFLOPS: 97.8147379961687, Tokens per sec: 79926.94039203084, Loss: 2.2736594676971436 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16351 , TFLOPS: 97.22186915060742, Tokens per sec: 79442.4919965209, Loss: 2.2376866340637207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16352 , TFLOPS: 96.65626312714004, Tokens per sec: 78980.32075475136, Loss: 2.232093095779419 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16353 , TFLOPS: 96.9278142057394, Tokens per sec: 79202.21213142155, Loss: 2.2652695178985596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16354 , TFLOPS: 96.6331593391839, Tokens per sec: 78961.44205486799, Loss: 2.2517924308776855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16355 , TFLOPS: 95.98640993344583, Tokens per sec: 78432.96646662858, Loss: 2.2686402797698975 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16356 , TFLOPS: 97.79655961652746, Tokens per sec: 79912.08637007285, Loss: 2.259051561355591 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16357 , TFLOPS: 97.87952183112188, Tokens per sec: 79979.87693125535, Loss: 2.246001958847046 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16358 , TFLOPS: 96.86967740763421, Tokens per sec: 79154.70705711548, Loss: 2.2696101665496826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16359 , TFLOPS: 97.05583057854582, Tokens per sec: 79306.81760507627, Loss: 2.250375747680664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16360 , TFLOPS: 97.81788986138193, Tokens per sec: 79929.5158622328, Loss: 2.262299060821533 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16361 , TFLOPS: 96.5074992766926, Tokens per sec: 78858.76198302857, Loss: 2.2334132194519043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16362 , TFLOPS: 97.31545154422363, Tokens per sec: 79518.96057936984, Loss: 2.2638139724731445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16363 , TFLOPS: 96.65606957903974, Tokens per sec: 78980.16260161623, Loss: 2.2566630840301514 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16364 , TFLOPS: 97.71102964248125, Tokens per sec: 79842.19762654229, Loss: 2.2476553916931152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16365 , TFLOPS: 96.08202018354872, Tokens per sec: 78511.09206321446, Loss: 2.2503604888916016 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16366 , TFLOPS: 97.83500368068502, Tokens per sec: 79943.50000453425, Loss: 2.2629756927490234 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16367 , TFLOPS: 97.23513924501158, Tokens per sec: 79453.33533226117, Loss: 2.238940477371216 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16368 , TFLOPS: 95.81464407020893, Tokens per sec: 78292.61215813104, Loss: 2.256754159927368 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16369 , TFLOPS: 96.67027599171492, Tokens per sec: 78991.77102711872, Loss: 2.246802568435669 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16370 , TFLOPS: 96.24810754410198, Tokens per sec: 78646.80632099156, Loss: 2.267387628555298 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16371 , TFLOPS: 98.33560273655156, Tokens per sec: 80352.55237964891, Loss: 2.2385127544403076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16372 , TFLOPS: 94.94827788505292, Tokens per sec: 77584.68204599038, Loss: 2.260234832763672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16373 , TFLOPS: 95.94896122988875, Tokens per sec: 78402.36616693671, Loss: 2.252591133117676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16374 , TFLOPS: 97.09121011809822, Tokens per sec: 79335.7271375949, Loss: 2.268906593322754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16375 , TFLOPS: 96.74251042660563, Tokens per sec: 79050.79564334771, Loss: 2.254225254058838 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16376 , TFLOPS: 97.28717238753985, Tokens per sec: 79495.85295247326, Loss: 2.255155086517334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16377 , TFLOPS: 96.81158916525177, Tokens per sec: 79107.24165894039, Loss: 2.2523109912872314 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16378 , TFLOPS: 96.1641019305811, Tokens per sec: 78578.16317168676, Loss: 2.271371841430664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16379 , TFLOPS: 97.69061500534433, Tokens per sec: 79825.5163010182, Loss: 2.2462689876556396 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16380 , TFLOPS: 96.54389923404034, Tokens per sec: 78888.50532519567, Loss: 2.263155221939087 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16381 , TFLOPS: 97.07035215448622, Tokens per sec: 79318.68355859579, Loss: 2.2610864639282227 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16382 , TFLOPS: 96.55467539142148, Tokens per sec: 78897.31080079476, Loss: 2.2600595951080322 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16383 , TFLOPS: 97.2993527013575, Tokens per sec: 79505.80580044285, Loss: 2.2597930431365967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16384 , TFLOPS: 96.41783934312107, Tokens per sec: 78785.49854325525, Loss: 2.2418911457061768 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16385 , TFLOPS: 97.27257392683815, Tokens per sec: 79483.92417443609, Loss: 2.282248020172119 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16386 , TFLOPS: 96.11811421533919, Tokens per sec: 78540.58542573349, Loss: 2.2604427337646484 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16387 , TFLOPS: 96.75906859879362, Tokens per sec: 79064.32575208778, Loss: 2.2446179389953613 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16388 , TFLOPS: 96.54703382737074, Tokens per sec: 78891.06668209755, Loss: 2.261659860610962 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16389 , TFLOPS: 97.79108681414209, Tokens per sec: 79907.6144023613, Loss: 2.244861364364624 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16390 , TFLOPS: 95.85679960593585, Tokens per sec: 78327.05853155353, Loss: 2.267047882080078 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16391 , TFLOPS: 96.29911286326067, Tokens per sec: 78688.48408026983, Loss: 2.2586753368377686 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16392 , TFLOPS: 97.87792287242453, Tokens per sec: 79978.5703809429, Loss: 2.2528491020202637 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16393 , TFLOPS: 96.0100527745771, Tokens per sec: 78452.28564073783, Loss: 2.270172119140625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16394 , TFLOPS: 97.21117344940347, Tokens per sec: 79433.75226373531, Loss: 2.2512857913970947 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16395 , TFLOPS: 97.89724662868254, Tokens per sec: 79994.36031961905, Loss: 2.2462220191955566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16396 , TFLOPS: 98.41346047896481, Tokens per sec: 80416.17194521138, Loss: 2.2636919021606445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16397 , TFLOPS: 95.12412423322012, Tokens per sec: 77728.37062376553, Loss: 2.2397377490997314 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16398 , TFLOPS: 98.33304148938147, Tokens per sec: 80350.45951865386, Loss: 2.2418088912963867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16399 , TFLOPS: 97.12032245449257, Tokens per sec: 79359.51557708094, Loss: 2.2769739627838135 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16400 , TFLOPS: 95.88372363508337, Tokens per sec: 78349.05884885615, Loss: 2.261117696762085 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016400 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016400/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016400/model.pt +[rank0]:[2024-08-30 07:59:51,592] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007693911000387743, 'preprocessing_with_comm': 0.0016303290030919015, 'state_converting': 2.6187972520128824, : 2.6297263859887607}) +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016400/optimizer.pt +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016400/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016400/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016400/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016400/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016400/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016400, took 14.63s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014400 + eval ppl=8.049273490905762, eval loss=2.0855817794799805 +------------------------------------------------------------------ +iteration: 16401 , TFLOPS: 96.82194871309179, Tokens per sec: 79115.70671215896, Loss: 2.2712600231170654 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16402 , TFLOPS: 95.35745439475913, Tokens per sec: 77919.03070521176, Loss: 2.271108865737915 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16403 , TFLOPS: 97.2546773504097, Tokens per sec: 79469.30042113752, Loss: 2.2448911666870117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16404 , TFLOPS: 92.75175975534745, Tokens per sec: 75789.85053880115, Loss: 2.273435592651367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16405 , TFLOPS: 95.90269352209855, Tokens per sec: 78364.55963186458, Loss: 2.244150400161743 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16406 , TFLOPS: 98.40551595529993, Tokens per sec: 80409.68027041458, Loss: 2.2514114379882812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16407 , TFLOPS: 95.63998640084534, Tokens per sec: 78149.89488040566, Loss: 2.2622923851013184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16408 , TFLOPS: 98.34257602422484, Tokens per sec: 80358.25043251515, Loss: 2.2575087547302246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16409 , TFLOPS: 97.94542020157947, Tokens per sec: 80033.7241861312, Loss: 2.2470974922180176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16410 , TFLOPS: 97.23999752403176, Tokens per sec: 79457.30515711178, Loss: 2.265733480453491 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16411 , TFLOPS: 97.34179284032038, Tokens per sec: 79540.48473049577, Loss: 2.2237465381622314 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16412 , TFLOPS: 97.93940650520241, Tokens per sec: 80028.8102400152, Loss: 2.2566158771514893 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16413 , TFLOPS: 96.85062346115252, Tokens per sec: 79139.13758695307, Loss: 2.2669782638549805 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16414 , TFLOPS: 96.61305371580757, Tokens per sec: 78945.0132324424, Loss: 2.261577606201172 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16415 , TFLOPS: 96.45105202310812, Tokens per sec: 78812.63747904311, Loss: 2.24802827835083 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16416 , TFLOPS: 97.17732740495349, Tokens per sec: 79406.09578954054, Loss: 2.2461628913879395 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16417 , TFLOPS: 97.78857114207909, Tokens per sec: 79905.55878195947, Loss: 2.2422871589660645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16418 , TFLOPS: 97.82531873858701, Tokens per sec: 79935.58618903338, Loss: 2.2518954277038574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16419 , TFLOPS: 96.41148963667743, Tokens per sec: 78780.31003466416, Loss: 2.2773447036743164 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16420 , TFLOPS: 96.66263992635189, Tokens per sec: 78985.53140154078, Loss: 2.2622127532958984 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16421 , TFLOPS: 97.0671838580369, Tokens per sec: 79316.09466200785, Loss: 2.2296557426452637 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16422 , TFLOPS: 96.68936493033868, Tokens per sec: 79007.3690902612, Loss: 2.247718572616577 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16423 , TFLOPS: 97.77117276363003, Tokens per sec: 79891.34211905464, Loss: 2.2651991844177246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16424 , TFLOPS: 96.10450842924178, Tokens per sec: 78529.4677876692, Loss: 2.2716264724731445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16425 , TFLOPS: 96.80454852337107, Tokens per sec: 79101.48856921743, Loss: 2.269904375076294 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16426 , TFLOPS: 98.3720689983262, Tokens per sec: 80382.34990087006, Loss: 2.2552459239959717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16427 , TFLOPS: 96.15048025571271, Tokens per sec: 78567.03255049868, Loss: 2.238467216491699 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16428 , TFLOPS: 97.73824282488458, Tokens per sec: 79864.43421841369, Loss: 2.249239921569824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16429 , TFLOPS: 96.6369682328993, Tokens per sec: 78964.55439997249, Loss: 2.256783962249756 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16430 , TFLOPS: 97.76356975427046, Tokens per sec: 79885.12950438805, Loss: 2.231365919113159 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16431 , TFLOPS: 97.26714465076915, Tokens per sec: 79479.4877731981, Loss: 2.2482659816741943 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16432 , TFLOPS: 95.99705367378229, Tokens per sec: 78441.66374085177, Loss: 2.2482833862304688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16433 , TFLOPS: 96.44329265209052, Tokens per sec: 78806.29709724065, Loss: 2.2541420459747314 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16434 , TFLOPS: 97.13738645870971, Tokens per sec: 79373.45901419331, Loss: 2.2587473392486572 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16435 , TFLOPS: 96.65394191140128, Tokens per sec: 78978.42402961782, Loss: 2.273315668106079 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16436 , TFLOPS: 96.04041701805244, Tokens per sec: 78477.0970457267, Loss: 2.256594181060791 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16437 , TFLOPS: 96.75694890107552, Tokens per sec: 79062.59369251644, Loss: 2.260223627090454 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16438 , TFLOPS: 96.19284950399837, Tokens per sec: 78601.65355395438, Loss: 2.2632102966308594 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16439 , TFLOPS: 97.19974641034872, Tokens per sec: 79424.41493596551, Loss: 2.2393712997436523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16440 , TFLOPS: 95.54513916314673, Tokens per sec: 78072.39276089701, Loss: 2.241053581237793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16441 , TFLOPS: 97.07792034803066, Tokens per sec: 79324.86772436359, Loss: 2.271263599395752 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16442 , TFLOPS: 95.90429309741315, Tokens per sec: 78365.86668603087, Loss: 2.2588069438934326 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16443 , TFLOPS: 96.3005815022797, Tokens per sec: 78689.6841430184, Loss: 2.262101650238037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16444 , TFLOPS: 98.41423757462381, Tokens per sec: 80416.80693007857, Loss: 2.2454161643981934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16445 , TFLOPS: 95.02990257308424, Tokens per sec: 77651.37967978719, Loss: 2.2484145164489746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16446 , TFLOPS: 98.40373815643501, Tokens per sec: 80408.22758519743, Loss: 2.264789581298828 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16447 , TFLOPS: 97.85427757256848, Tokens per sec: 79959.24919774634, Loss: 2.2518014907836914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16448 , TFLOPS: 97.18061712130633, Tokens per sec: 79408.78390145727, Loss: 2.2392992973327637 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16449 , TFLOPS: 97.3137353136545, Tokens per sec: 79517.5582031921, Loss: 2.2611465454101562 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16450 , TFLOPS: 97.99081258243945, Tokens per sec: 80070.81546903573, Loss: 2.2444984912872314 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16451 , TFLOPS: 97.7995329815468, Tokens per sec: 79914.51598317138, Loss: 2.2616958618164062 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16452 , TFLOPS: 96.59211812651921, Tokens per sec: 78927.90622350488, Loss: 2.2675657272338867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16453 , TFLOPS: 96.64306263730586, Tokens per sec: 78969.53429469703, Loss: 2.2658700942993164 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16454 , TFLOPS: 96.28211358328765, Tokens per sec: 78674.59353100348, Loss: 2.2442331314086914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16455 , TFLOPS: 97.72011111618916, Tokens per sec: 79849.61833248695, Loss: 2.247804641723633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16456 , TFLOPS: 97.83500136761364, Tokens per sec: 79943.49811446408, Loss: 2.2754788398742676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16457 , TFLOPS: 97.26245796060131, Tokens per sec: 79475.65815801582, Loss: 2.2367262840270996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16458 , TFLOPS: 96.76248545666682, Tokens per sec: 79067.11775461372, Loss: 2.2568202018737793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16459 , TFLOPS: 97.08923264617614, Tokens per sec: 79334.11129438273, Loss: 2.268745183944702 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16460 , TFLOPS: 95.94182731256808, Tokens per sec: 78396.53685945061, Loss: 2.2776527404785156 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16461 , TFLOPS: 97.82162948761852, Tokens per sec: 79932.57160709726, Loss: 2.239086627960205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16462 , TFLOPS: 96.83366586076609, Tokens per sec: 79125.28109514984, Loss: 2.2465436458587646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16463 , TFLOPS: 96.74134590525352, Tokens per sec: 79049.84408297346, Loss: 2.2557849884033203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16464 , TFLOPS: 98.4596020753312, Tokens per sec: 80453.87543139272, Loss: 2.2609965801239014 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16465 , TFLOPS: 96.14011923345672, Tokens per sec: 78558.56629249688, Loss: 2.2590224742889404 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16466 , TFLOPS: 97.74551858895931, Tokens per sec: 79870.37943253397, Loss: 2.248514413833618 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16467 , TFLOPS: 96.57715280164945, Tokens per sec: 78915.67767131206, Loss: 2.2741880416870117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16468 , TFLOPS: 97.80480325124606, Tokens per sec: 79918.82245621146, Loss: 2.2549240589141846 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16469 , TFLOPS: 97.1518811629033, Tokens per sec: 79385.30301011677, Loss: 2.2527618408203125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16470 , TFLOPS: 96.10697366388369, Tokens per sec: 78531.48219435589, Loss: 2.2330758571624756 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16471 , TFLOPS: 97.29115217444216, Tokens per sec: 79499.10493879997, Loss: 2.236449718475342 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16472 , TFLOPS: 96.4159529143546, Tokens per sec: 78783.95709374918, Loss: 2.243915557861328 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16473 , TFLOPS: 96.66806663781641, Tokens per sec: 78989.96570717443, Loss: 2.254082679748535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16474 , TFLOPS: 96.77297406608659, Tokens per sec: 79075.68826737144, Loss: 2.240997314453125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16475 , TFLOPS: 95.97049251229085, Tokens per sec: 78419.95993205198, Loss: 2.2505009174346924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16476 , TFLOPS: 96.73145835307452, Tokens per sec: 79041.76470955973, Loss: 2.252830743789673 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16477 , TFLOPS: 96.47180878763865, Tokens per sec: 78829.59836566767, Loss: 2.257469654083252 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16478 , TFLOPS: 96.2631845746249, Tokens per sec: 78659.12615074911, Loss: 2.248171329498291 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16479 , TFLOPS: 96.62952250116852, Tokens per sec: 78958.47030090509, Loss: 2.2476298809051514 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16480 , TFLOPS: 96.16381184522479, Tokens per sec: 78577.92613547466, Loss: 2.247832775115967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16481 , TFLOPS: 95.77305384684898, Tokens per sec: 78258.62771599603, Loss: 2.244762897491455 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16482 , TFLOPS: 98.45133214146063, Tokens per sec: 80447.11785554003, Loss: 2.2538902759552 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16483 , TFLOPS: 94.98357771384669, Tokens per sec: 77613.52644479621, Loss: 2.249598741531372 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16484 , TFLOPS: 98.17359274158028, Tokens per sec: 80220.1698422496, Loss: 2.2654285430908203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16485 , TFLOPS: 97.1502173990451, Tokens per sec: 79383.94350583915, Loss: 2.2716684341430664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16486 , TFLOPS: 97.13677668070315, Tokens per sec: 79372.96074888707, Loss: 2.262052297592163 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16487 , TFLOPS: 96.67456546101832, Tokens per sec: 78995.2760629073, Loss: 2.2773349285125732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16488 , TFLOPS: 97.97034262596293, Tokens per sec: 80054.08894065522, Loss: 2.239715814590454 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16489 , TFLOPS: 97.36391792425971, Tokens per sec: 79558.56370613292, Loss: 2.2392828464508057 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16490 , TFLOPS: 96.53836442281755, Tokens per sec: 78883.98268846681, Loss: 2.2388076782226562 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16491 , TFLOPS: 96.6114789622701, Tokens per sec: 78943.72646078917, Loss: 2.250279664993286 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16492 , TFLOPS: 97.21049308977962, Tokens per sec: 79433.19632437263, Loss: 2.225709915161133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16493 , TFLOPS: 97.81605304630664, Tokens per sec: 79928.01495335094, Loss: 2.2888760566711426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16494 , TFLOPS: 97.91341414587764, Tokens per sec: 80007.57121410807, Loss: 2.2567226886749268 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16495 , TFLOPS: 97.11861608367133, Tokens per sec: 79358.12125755701, Loss: 2.2555344104766846 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16496 , TFLOPS: 96.06952010759589, Tokens per sec: 78500.87792937281, Loss: 2.2405569553375244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16497 , TFLOPS: 97.17878112898617, Tokens per sec: 79407.28366486983, Loss: 2.254065990447998 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16498 , TFLOPS: 96.50147927809437, Tokens per sec: 78853.8428872054, Loss: 2.2849836349487305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16499 , TFLOPS: 97.84639721558638, Tokens per sec: 79952.80995519782, Loss: 2.263570785522461 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16500 , TFLOPS: 96.76896187221567, Tokens per sec: 79072.40980047645, Loss: 2.250739336013794 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16501 , TFLOPS: 96.79226669253192, Tokens per sec: 79091.4527690763, Loss: 2.27229380607605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16502 , TFLOPS: 97.63527012024554, Tokens per sec: 79780.2925706999, Loss: 2.2740674018859863 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16503 , TFLOPS: 96.68645112807162, Tokens per sec: 79004.98814742084, Loss: 2.2411653995513916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16504 , TFLOPS: 97.83520348712543, Tokens per sec: 79943.66327152018, Loss: 2.241529703140259 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16505 , TFLOPS: 96.60499278532299, Tokens per sec: 78938.42643863663, Loss: 2.241734027862549 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16506 , TFLOPS: 97.86903731241043, Tokens per sec: 79971.3097611207, Loss: 2.230353355407715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16507 , TFLOPS: 96.58049358758737, Tokens per sec: 78918.40751350141, Loss: 2.2695698738098145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16508 , TFLOPS: 96.5886720474977, Tokens per sec: 78925.09034362642, Loss: 2.24265193939209 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16509 , TFLOPS: 97.3321804026907, Tokens per sec: 79532.63016026239, Loss: 2.271811008453369 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16510 , TFLOPS: 96.6183503309555, Tokens per sec: 78949.34123302677, Loss: 2.2568142414093018 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16511 , TFLOPS: 96.15493778549417, Tokens per sec: 78570.67491283013, Loss: 2.2483956813812256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16512 , TFLOPS: 96.71067880182889, Tokens per sec: 79024.78520332371, Loss: 2.255190372467041 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16513 , TFLOPS: 97.17615339203894, Tokens per sec: 79405.13647336628, Loss: 2.2375638484954834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16514 , TFLOPS: 96.06293680437051, Tokens per sec: 78495.49854283806, Loss: 2.2554852962493896 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16515 , TFLOPS: 96.70579123641767, Tokens per sec: 79020.79144780907, Loss: 2.245112180709839 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16516 , TFLOPS: 95.48242955222084, Tokens per sec: 78021.15112352047, Loss: 2.2481086254119873 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16517 , TFLOPS: 97.33660826827771, Tokens per sec: 79536.24828321695, Loss: 2.258687973022461 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16518 , TFLOPS: 96.16880542555084, Tokens per sec: 78582.00651850527, Loss: 2.2474899291992188 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16519 , TFLOPS: 95.56939889807168, Tokens per sec: 78092.21601480532, Loss: 2.252427577972412 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16520 , TFLOPS: 97.87883833737662, Tokens per sec: 79979.3184309214, Loss: 2.25972318649292 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16521 , TFLOPS: 95.52755217702091, Tokens per sec: 78058.02198180431, Loss: 2.2365827560424805 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16522 , TFLOPS: 98.40461565835697, Tokens per sec: 80408.94461460678, Loss: 2.236745834350586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16523 , TFLOPS: 96.53208982722415, Tokens per sec: 78878.85555488498, Loss: 2.252234935760498 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16524 , TFLOPS: 97.74458032746871, Tokens per sec: 79869.612754917, Loss: 2.2744252681732178 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16525 , TFLOPS: 96.61864983367863, Tokens per sec: 78949.58596441174, Loss: 2.2553787231445312 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16526 , TFLOPS: 96.71179925676905, Tokens per sec: 79025.70075589839, Loss: 2.249528169631958 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16527 , TFLOPS: 98.3882410632345, Tokens per sec: 80395.56451141262, Loss: 2.2379379272460938 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16528 , TFLOPS: 95.56326808040937, Tokens per sec: 78087.20636587217, Loss: 2.2442033290863037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16529 , TFLOPS: 97.1092606095552, Tokens per sec: 79350.47665882538, Loss: 2.2467434406280518 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16530 , TFLOPS: 96.66383312314322, Tokens per sec: 78986.50639335459, Loss: 2.252504348754883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16531 , TFLOPS: 97.91806593902055, Tokens per sec: 80011.37231403295, Loss: 2.2421491146087646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16532 , TFLOPS: 97.92962982943993, Tokens per sec: 80020.82146657565, Loss: 2.252680540084839 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16533 , TFLOPS: 97.64398978693664, Tokens per sec: 79787.41763481738, Loss: 2.26175856590271 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16534 , TFLOPS: 97.17702496384996, Tokens per sec: 79405.84865712932, Loss: 2.261861562728882 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16535 , TFLOPS: 97.14761006874414, Tokens per sec: 79381.81298913159, Loss: 2.2633724212646484 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16536 , TFLOPS: 97.29743534759743, Tokens per sec: 79504.2390813284, Loss: 2.2565290927886963 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16537 , TFLOPS: 97.68002260334146, Tokens per sec: 79816.86097666896, Loss: 2.2404773235321045 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16538 , TFLOPS: 96.71466290236829, Tokens per sec: 79028.04071443432, Loss: 2.2550876140594482 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16539 , TFLOPS: 96.24604053213872, Tokens per sec: 78645.11731230674, Loss: 2.2518792152404785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16540 , TFLOPS: 98.431146714892, Tokens per sec: 80430.62382387169, Loss: 2.254086494445801 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16541 , TFLOPS: 96.64192994214507, Tokens per sec: 78968.60874032296, Loss: 2.2539780139923096 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16542 , TFLOPS: 97.79975208687578, Tokens per sec: 79914.69501977596, Loss: 2.2469658851623535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16543 , TFLOPS: 95.9662416106884, Tokens per sec: 78416.48641092467, Loss: 2.246385097503662 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16544 , TFLOPS: 97.75669299964885, Tokens per sec: 79879.51033116331, Loss: 2.2354869842529297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16545 , TFLOPS: 97.3348575225581, Tokens per sec: 79534.81770382123, Loss: 2.2690443992614746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16546 , TFLOPS: 96.67190646071123, Tokens per sec: 78993.10332530762, Loss: 2.2637715339660645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16547 , TFLOPS: 97.23443381904211, Tokens per sec: 79452.75891054222, Loss: 2.2524569034576416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16548 , TFLOPS: 96.54900726609208, Tokens per sec: 78892.67922967758, Loss: 2.282905101776123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16549 , TFLOPS: 96.04106497222509, Tokens per sec: 78477.62650576099, Loss: 2.2537460327148438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16550 , TFLOPS: 96.86503929020557, Tokens per sec: 79150.91713196876, Loss: 2.2594213485717773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16551 , TFLOPS: 97.44203902734024, Tokens per sec: 79622.39847047612, Loss: 2.2716736793518066 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16552 , TFLOPS: 96.04838711506032, Tokens per sec: 78483.60961716037, Loss: 2.258636474609375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16553 , TFLOPS: 96.6455020312832, Tokens per sec: 78971.5275863115, Loss: 2.26794171333313 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16554 , TFLOPS: 96.21366832597283, Tokens per sec: 78618.66514931388, Loss: 2.249513626098633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16555 , TFLOPS: 96.00568331087725, Tokens per sec: 78448.71523946867, Loss: 2.2340664863586426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16556 , TFLOPS: 96.22875967809668, Tokens per sec: 78630.99668162024, Loss: 2.2518584728240967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16557 , TFLOPS: 96.23568125130998, Tokens per sec: 78636.65247726998, Loss: 2.2588486671447754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16558 , TFLOPS: 97.09821529444129, Tokens per sec: 79341.45124751411, Loss: 2.272123098373413 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16559 , TFLOPS: 96.3287523344622, Tokens per sec: 78712.70325517646, Loss: 2.2668230533599854 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16560 , TFLOPS: 97.77363498557156, Tokens per sec: 79893.35406398629, Loss: 2.2587172985076904 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16561 , TFLOPS: 97.37901750934009, Tokens per sec: 79570.9019658, Loss: 2.2348484992980957 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16562 , TFLOPS: 97.06591502577427, Tokens per sec: 79315.05786650341, Loss: 2.2388715744018555 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16563 , TFLOPS: 96.74431163232362, Tokens per sec: 79052.26745490702, Loss: 2.2434303760528564 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16564 , TFLOPS: 96.75526212581859, Tokens per sec: 79061.21538503256, Loss: 2.26725697517395 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16565 , TFLOPS: 98.45092784841884, Tokens per sec: 80446.78749728815, Loss: 2.264420509338379 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16566 , TFLOPS: 95.35348304885903, Tokens per sec: 77915.78561624524, Loss: 2.259699821472168 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16567 , TFLOPS: 97.16829303242854, Tokens per sec: 79398.71357118496, Loss: 2.2715651988983154 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16568 , TFLOPS: 96.15604541739303, Tokens per sec: 78571.57998736769, Loss: 2.235205888748169 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16569 , TFLOPS: 97.7667785434634, Tokens per sec: 79887.75148863919, Loss: 2.2494823932647705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16570 , TFLOPS: 97.19312384992655, Tokens per sec: 79419.00347136469, Loss: 2.2704076766967773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16571 , TFLOPS: 97.73787695264208, Tokens per sec: 79864.13525478594, Loss: 2.270836591720581 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16572 , TFLOPS: 97.31366425485747, Tokens per sec: 79517.50013921982, Loss: 2.250762462615967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16573 , TFLOPS: 97.17536994829602, Tokens per sec: 79404.4963013163, Loss: 2.2512307167053223 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16574 , TFLOPS: 97.18919600622598, Tokens per sec: 79415.79392916526, Loss: 2.2740302085876465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16575 , TFLOPS: 98.41831296193202, Tokens per sec: 80420.1370339579, Loss: 2.2480533123016357 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16576 , TFLOPS: 96.82127998537527, Tokens per sec: 79115.1602775272, Loss: 2.2606544494628906 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16577 , TFLOPS: 96.19111312926125, Tokens per sec: 78600.23471745843, Loss: 2.2740097045898438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16578 , TFLOPS: 97.73162391110479, Tokens per sec: 79859.02573357822, Loss: 2.274895191192627 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16579 , TFLOPS: 96.76950065919112, Tokens per sec: 79072.8500571837, Loss: 2.246161699295044 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16580 , TFLOPS: 97.79861571136921, Tokens per sec: 79913.76645809662, Loss: 2.2674460411071777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16581 , TFLOPS: 96.68352079265254, Tokens per sec: 79002.59369491636, Loss: 2.2516722679138184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16582 , TFLOPS: 97.86535912113955, Tokens per sec: 79968.30421634819, Loss: 2.238119125366211 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16583 , TFLOPS: 96.57774616890327, Tokens per sec: 78916.1625269698, Loss: 2.2503128051757812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16584 , TFLOPS: 96.60359421266074, Tokens per sec: 78937.28362891186, Loss: 2.254063606262207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16585 , TFLOPS: 97.14839070775224, Tokens per sec: 79382.45086936053, Loss: 2.2547805309295654 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16586 , TFLOPS: 96.52329066262035, Tokens per sec: 78871.66553097656, Loss: 2.240940809249878 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16587 , TFLOPS: 96.06598113383819, Tokens per sec: 78497.9861428141, Loss: 2.274897336959839 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16588 , TFLOPS: 96.50470344102403, Tokens per sec: 78856.47743373257, Loss: 2.264498233795166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16589 , TFLOPS: 97.31474516463896, Tokens per sec: 79518.38337842736, Loss: 2.2535202503204346 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16590 , TFLOPS: 96.08826933683106, Tokens per sec: 78516.19840722898, Loss: 2.2566213607788086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16591 , TFLOPS: 96.66193282572063, Tokens per sec: 78984.95361141258, Loss: 2.2485101222991943 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16592 , TFLOPS: 95.49784615417641, Tokens per sec: 78033.74842583701, Loss: 2.2391278743743896 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16593 , TFLOPS: 97.19621945414507, Tokens per sec: 79421.53296925958, Loss: 2.2725400924682617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16594 , TFLOPS: 96.29161142515679, Tokens per sec: 78682.35446209095, Loss: 2.245260238647461 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16595 , TFLOPS: 95.73092391386872, Tokens per sec: 78224.2022632369, Loss: 2.24975323677063 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16596 , TFLOPS: 97.76405559889463, Tokens per sec: 79885.5265005372, Loss: 2.2623987197875977 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16597 , TFLOPS: 95.61273329396008, Tokens per sec: 78127.62566521233, Loss: 2.2438669204711914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16598 , TFLOPS: 98.48141615637823, Tokens per sec: 80471.70027856065, Loss: 2.260080575942993 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16599 , TFLOPS: 97.11829844666522, Tokens per sec: 79357.86170818262, Loss: 2.2318460941314697 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16600 , TFLOPS: 97.76341421219271, Tokens per sec: 79885.0024069523, Loss: 2.238964319229126 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016600 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016600/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016600/model.pt +[rank0]:[2024-08-30 08:55:25,588] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.0076584019989240915, 'preprocessing_with_comm': 0.0016159560182131827, 'state_converting': 2.6330862900067586, : 2.6439785819966346}) +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016600/optimizer.pt +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016600/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016600/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016600/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016600/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016600/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016600, took 14.90s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014600 + eval ppl=8.12195873260498, eval loss=2.094571352005005 +------------------------------------------------------------------ +iteration: 16601 , TFLOPS: 94.87203767561031, Tokens per sec: 77522.38420825725, Loss: 2.2325901985168457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16602 , TFLOPS: 96.15039208056618, Tokens per sec: 78566.96050031664, Loss: 2.243774175643921 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16603 , TFLOPS: 96.11860775653027, Tokens per sec: 78540.9887109456, Loss: 2.256085157394409 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16604 , TFLOPS: 96.10628844092031, Tokens per sec: 78530.9222810333, Loss: 2.243741035461426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16605 , TFLOPS: 96.82421031613201, Tokens per sec: 79117.55472622196, Loss: 2.2441654205322266 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16606 , TFLOPS: 97.37769173633936, Tokens per sec: 79569.81864255267, Loss: 2.244318962097168 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16607 , TFLOPS: 95.68140149034171, Tokens per sec: 78183.73621615239, Loss: 2.259460687637329 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16608 , TFLOPS: 96.65703827528554, Tokens per sec: 78980.95414825514, Loss: 2.269399881362915 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16609 , TFLOPS: 97.82845587755087, Tokens per sec: 79938.14962603788, Loss: 2.2493736743927 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16610 , TFLOPS: 97.06196361326658, Tokens per sec: 79311.82906562497, Loss: 2.2688801288604736 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16611 , TFLOPS: 97.2576111811268, Tokens per sec: 79471.69772974002, Loss: 2.2405951023101807 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16612 , TFLOPS: 96.97239409176012, Tokens per sec: 79238.63950387742, Loss: 2.2532787322998047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16613 , TFLOPS: 97.45810977989187, Tokens per sec: 79635.53029608403, Loss: 2.2443642616271973 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16614 , TFLOPS: 95.81624270494207, Tokens per sec: 78293.91844372402, Loss: 2.24910306930542 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16615 , TFLOPS: 97.708187506954, Tokens per sec: 79839.8752444399, Loss: 2.262880563735962 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16616 , TFLOPS: 95.92425771042129, Tokens per sec: 78382.18028525468, Loss: 2.2628955841064453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16617 , TFLOPS: 96.3859157602455, Tokens per sec: 78759.41296190149, Loss: 2.245384931564331 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16618 , TFLOPS: 97.77017786832144, Tokens per sec: 79890.5291644871, Loss: 2.2674384117126465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16619 , TFLOPS: 97.21569483724174, Tokens per sec: 79437.44680612884, Loss: 2.2440268993377686 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16620 , TFLOPS: 97.81181988949093, Tokens per sec: 79924.55593194568, Loss: 2.2596347332000732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16621 , TFLOPS: 97.0439184111177, Tokens per sec: 79297.08386642429, Loss: 2.2646889686584473 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16622 , TFLOPS: 96.03212168135416, Tokens per sec: 78470.31871256957, Loss: 2.24489688873291 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16623 , TFLOPS: 98.4667190161215, Tokens per sec: 80459.69086691883, Loss: 2.2456138134002686 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16624 , TFLOPS: 97.86910514604887, Tokens per sec: 79971.36518973285, Loss: 2.2389681339263916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16625 , TFLOPS: 97.18112875142117, Tokens per sec: 79409.20196759464, Loss: 2.263181686401367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16626 , TFLOPS: 97.71201604397918, Tokens per sec: 79843.00364059847, Loss: 2.2669901847839355 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16627 , TFLOPS: 96.62723680304845, Tokens per sec: 78956.60259813207, Loss: 2.2373342514038086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16628 , TFLOPS: 96.50297374696127, Tokens per sec: 78855.0640561876, Loss: 2.2530572414398193 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16629 , TFLOPS: 97.23056898638946, Tokens per sec: 79449.60085629209, Loss: 2.246472120285034 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16630 , TFLOPS: 96.78504199504832, Tokens per sec: 79085.5492827822, Loss: 2.2416200637817383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16631 , TFLOPS: 98.34653449551247, Tokens per sec: 80361.48500130439, Loss: 2.2561590671539307 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16632 , TFLOPS: 93.46139969654267, Tokens per sec: 76369.715602509, Loss: 2.24674391746521 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16633 , TFLOPS: 96.74922895327083, Tokens per sec: 79056.2855244361, Loss: 2.2356162071228027 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16634 , TFLOPS: 97.23894886122481, Tokens per sec: 79456.44826773724, Loss: 2.246206760406494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16635 , TFLOPS: 96.07866606314319, Tokens per sec: 78508.35132508836, Loss: 2.264207363128662 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16636 , TFLOPS: 96.05443270310366, Tokens per sec: 78488.54962277786, Loss: 2.2608697414398193 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16637 , TFLOPS: 97.28478717225182, Tokens per sec: 79493.9039316605, Loss: 2.2478485107421875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16638 , TFLOPS: 96.17863850895006, Tokens per sec: 78590.04138407685, Loss: 2.2612409591674805 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16639 , TFLOPS: 95.25245839813566, Tokens per sec: 77833.2357735333, Loss: 2.272136926651001 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16640 , TFLOPS: 96.1421117185269, Tokens per sec: 78560.19440333886, Loss: 2.2509829998016357 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16641 , TFLOPS: 96.1812887012593, Tokens per sec: 78592.20692443477, Loss: 2.23503041267395 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16642 , TFLOPS: 97.25323108044661, Tokens per sec: 79468.11863672039, Loss: 2.28678560256958 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16643 , TFLOPS: 96.3553910188531, Tokens per sec: 78734.47040993301, Loss: 2.2544991970062256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16644 , TFLOPS: 97.20374021447309, Tokens per sec: 79427.6783761252, Loss: 2.266331911087036 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16645 , TFLOPS: 96.37041891011188, Tokens per sec: 78746.75008673289, Loss: 2.254800319671631 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16646 , TFLOPS: 96.18698837543599, Tokens per sec: 78596.86427492718, Loss: 2.2627201080322266 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16647 , TFLOPS: 96.5614812259485, Tokens per sec: 78902.87202338455, Loss: 2.2669548988342285 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16648 , TFLOPS: 98.444024731611, Tokens per sec: 80441.14678284242, Loss: 2.251750946044922 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16649 , TFLOPS: 95.68939341285319, Tokens per sec: 78190.2666217669, Loss: 2.2511045932769775 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16650 , TFLOPS: 97.27383668200183, Tokens per sec: 79484.95600418653, Loss: 2.234158515930176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16651 , TFLOPS: 97.3471238212747, Tokens per sec: 79544.8408122652, Loss: 2.26191782951355 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16652 , TFLOPS: 96.530287676382, Tokens per sec: 78877.38297104037, Loss: 2.2418508529663086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16653 , TFLOPS: 97.79159312892813, Tokens per sec: 79908.02812520668, Loss: 2.2491140365600586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16654 , TFLOPS: 96.0103063325929, Tokens per sec: 78452.4928295197, Loss: 2.2637457847595215 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16655 , TFLOPS: 97.74107774862095, Tokens per sec: 79866.75070757611, Loss: 2.2392499446868896 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16656 , TFLOPS: 98.2762951319524, Tokens per sec: 80304.09060921738, Loss: 2.281219959259033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16657 , TFLOPS: 97.74731622492939, Tokens per sec: 79871.8483271604, Loss: 2.2388687133789062 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16658 , TFLOPS: 98.36653136095076, Tokens per sec: 80377.82495481912, Loss: 2.2394683361053467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16659 , TFLOPS: 96.61921382075994, Tokens per sec: 78950.04681277419, Loss: 2.239398956298828 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16660 , TFLOPS: 96.0098236052357, Tokens per sec: 78452.09838056962, Loss: 2.2560501098632812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16661 , TFLOPS: 98.3783353558335, Tokens per sec: 80387.47030289967, Loss: 2.2311320304870605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16662 , TFLOPS: 97.30637321498872, Tokens per sec: 79511.54244285499, Loss: 2.2840633392333984 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16663 , TFLOPS: 97.1725293901249, Tokens per sec: 79402.1752081122, Loss: 2.253312826156616 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16664 , TFLOPS: 96.14222703346607, Tokens per sec: 78560.28863014412, Loss: 2.233368158340454 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16665 , TFLOPS: 96.7033361012855, Tokens per sec: 79018.78529369178, Loss: 2.2712583541870117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16666 , TFLOPS: 98.43405201153463, Tokens per sec: 80432.99781654739, Loss: 2.2590978145599365 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16667 , TFLOPS: 96.80244621451178, Tokens per sec: 79099.77071853026, Loss: 2.249758243560791 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16668 , TFLOPS: 96.07939128111686, Tokens per sec: 78508.94391936342, Loss: 2.261092185974121 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16669 , TFLOPS: 97.1704617589593, Tokens per sec: 79400.48569346123, Loss: 2.2537734508514404 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16670 , TFLOPS: 96.06176248216838, Tokens per sec: 78494.53897393694, Loss: 2.253939628601074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16671 , TFLOPS: 96.57433780887176, Tokens per sec: 78913.37746824905, Loss: 2.2410452365875244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16672 , TFLOPS: 97.28747435952843, Tokens per sec: 79496.09970155857, Loss: 2.250250816345215 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16673 , TFLOPS: 95.62596304646277, Tokens per sec: 78138.4360365464, Loss: 2.2274954319000244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16674 , TFLOPS: 97.18563757977606, Tokens per sec: 79412.88624731099, Loss: 2.2460861206054688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16675 , TFLOPS: 96.73214534010931, Tokens per sec: 79042.32606435049, Loss: 2.2614946365356445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16676 , TFLOPS: 96.77514553289477, Tokens per sec: 79077.46262879892, Loss: 2.2476813793182373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16677 , TFLOPS: 96.93327564882078, Tokens per sec: 79206.6748171534, Loss: 2.2513082027435303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16678 , TFLOPS: 94.9054257584709, Tokens per sec: 77549.66647024835, Loss: 2.2351231575012207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16679 , TFLOPS: 96.14739447482776, Tokens per sec: 78564.51107950252, Loss: 2.2394943237304688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16680 , TFLOPS: 97.76374882767682, Tokens per sec: 79885.27582987816, Loss: 2.239828109741211 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16681 , TFLOPS: 95.08813805799215, Tokens per sec: 77698.9653936202, Loss: 2.266550302505493 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16682 , TFLOPS: 97.49916537034184, Tokens per sec: 79669.07787590563, Loss: 2.243997573852539 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16683 , TFLOPS: 96.30853798504295, Tokens per sec: 78696.18558990234, Loss: 2.274672269821167 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16684 , TFLOPS: 95.69384821501957, Tokens per sec: 78193.90675529386, Loss: 2.2465124130249023 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16685 , TFLOPS: 98.40860571817505, Tokens per sec: 80412.2049951974, Loss: 2.258378505706787 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16686 , TFLOPS: 97.21156497578251, Tokens per sec: 79434.0721900184, Loss: 2.2628395557403564 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16687 , TFLOPS: 97.72030888350926, Tokens per sec: 79849.7799332552, Loss: 2.2516980171203613 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16688 , TFLOPS: 97.84852966932084, Tokens per sec: 79954.5524380385, Loss: 2.2593584060668945 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16689 , TFLOPS: 97.69517446298936, Tokens per sec: 79829.24195123083, Loss: 2.24600887298584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16690 , TFLOPS: 97.7811524005705, Tokens per sec: 79899.4967372974, Loss: 2.26082444190979 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16691 , TFLOPS: 97.22203315265428, Tokens per sec: 79442.62600681516, Loss: 2.2687273025512695 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16692 , TFLOPS: 94.71136994036172, Tokens per sec: 77391.09846583017, Loss: 2.2322170734405518 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16693 , TFLOPS: 98.40087022875232, Tokens per sec: 80405.88412766087, Loss: 2.252451181411743 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16694 , TFLOPS: 97.22695241705647, Tokens per sec: 79446.64566439133, Loss: 2.257418394088745 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16695 , TFLOPS: 97.16931309808966, Tokens per sec: 79399.54709309549, Loss: 2.245875597000122 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16696 , TFLOPS: 96.73260705298146, Tokens per sec: 79042.70334182387, Loss: 2.2531511783599854 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16697 , TFLOPS: 97.3505183083346, Tokens per sec: 79547.61453502372, Loss: 2.2543885707855225 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16698 , TFLOPS: 97.13706209967465, Tokens per sec: 79373.19397207603, Loss: 2.2774951457977295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16699 , TFLOPS: 96.83965258749453, Tokens per sec: 79130.1730036717, Loss: 2.253103256225586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16700 , TFLOPS: 97.34409893083098, Tokens per sec: 79542.36909641603, Loss: 2.264383316040039 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16701 , TFLOPS: 97.92076890636689, Tokens per sec: 80013.58097823222, Loss: 2.236450433731079 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16702 , TFLOPS: 95.58018004461269, Tokens per sec: 78101.0255671754, Loss: 2.2468910217285156 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16703 , TFLOPS: 97.07318918350303, Tokens per sec: 79321.00176803705, Loss: 2.2301104068756104 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16704 , TFLOPS: 97.07539972179734, Tokens per sec: 79322.80805578164, Loss: 2.262421131134033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16705 , TFLOPS: 96.64186049506752, Tokens per sec: 78968.55199332818, Loss: 2.2693381309509277 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16706 , TFLOPS: 96.72310870821819, Tokens per sec: 79034.94199980865, Loss: 2.269432544708252 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16707 , TFLOPS: 97.39370942078662, Tokens per sec: 79582.90710484641, Loss: 2.2449705600738525 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16708 , TFLOPS: 96.75413677490064, Tokens per sec: 79060.29583182839, Loss: 2.271627187728882 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16709 , TFLOPS: 95.93273618575515, Tokens per sec: 78389.10826570535, Loss: 2.257351875305176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16710 , TFLOPS: 96.54605093504674, Tokens per sec: 78890.2635354777, Loss: 2.2465999126434326 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16711 , TFLOPS: 95.57326180848274, Tokens per sec: 78095.37249834237, Loss: 2.2634003162384033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16712 , TFLOPS: 97.87750410482496, Tokens per sec: 79978.22819515724, Loss: 2.248307466506958 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16713 , TFLOPS: 95.10414585867377, Tokens per sec: 77712.0457796346, Loss: 2.267496347427368 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16714 , TFLOPS: 96.93042514693308, Tokens per sec: 79204.3455986825, Loss: 2.2396035194396973 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16715 , TFLOPS: 96.25440240651625, Tokens per sec: 78651.95001511442, Loss: 2.2662830352783203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16716 , TFLOPS: 95.72090769401332, Tokens per sec: 78216.0177521526, Loss: 2.2505838871002197 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16717 , TFLOPS: 97.16880565599615, Tokens per sec: 79399.13244909816, Loss: 2.233656406402588 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16718 , TFLOPS: 97.91725304452508, Tokens per sec: 80010.70807701495, Loss: 2.2429778575897217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16719 , TFLOPS: 96.69623964948707, Tokens per sec: 79012.98660024836, Loss: 2.2524452209472656 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16720 , TFLOPS: 97.9879544405807, Tokens per sec: 80068.48000774783, Loss: 2.259779691696167 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16721 , TFLOPS: 96.43371878610597, Tokens per sec: 78798.47404489144, Loss: 2.254204750061035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16722 , TFLOPS: 97.30179542048123, Tokens per sec: 79507.80180912, Loss: 2.2563862800598145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16723 , TFLOPS: 95.4782218497919, Tokens per sec: 78017.71290154988, Loss: 2.259423017501831 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16724 , TFLOPS: 97.79107137024076, Tokens per sec: 79907.60178275197, Loss: 2.251129150390625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16725 , TFLOPS: 97.00202581323299, Tokens per sec: 79262.85234628129, Loss: 2.2510387897491455 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16726 , TFLOPS: 97.83195573859724, Tokens per sec: 79941.00945259331, Loss: 2.2719738483428955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16727 , TFLOPS: 97.26862316022341, Tokens per sec: 79480.69590133341, Loss: 2.252056121826172 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16728 , TFLOPS: 96.74026885363381, Tokens per sec: 79048.9639963692, Loss: 2.2419393062591553 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16729 , TFLOPS: 97.25959592470033, Tokens per sec: 79473.31951480579, Loss: 2.2495481967926025 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16730 , TFLOPS: 96.0237958311568, Tokens per sec: 78463.51544604685, Loss: 2.2505104541778564 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16731 , TFLOPS: 96.81521439706964, Tokens per sec: 79110.20392918054, Loss: 2.2370967864990234 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16732 , TFLOPS: 97.85586697989069, Tokens per sec: 79960.5479433843, Loss: 2.2436423301696777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16733 , TFLOPS: 97.88609140884343, Tokens per sec: 79985.2451023276, Loss: 2.2585151195526123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16734 , TFLOPS: 94.96097882226144, Tokens per sec: 77595.06030873468, Loss: 2.2513747215270996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16735 , TFLOPS: 97.82090115746738, Tokens per sec: 79931.97646978186, Loss: 2.2675225734710693 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16736 , TFLOPS: 97.76712883852285, Tokens per sec: 79888.03772374966, Loss: 2.2715070247650146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16737 , TFLOPS: 96.63636230759525, Tokens per sec: 78964.05928280864, Loss: 2.2528600692749023 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16738 , TFLOPS: 96.73684650088805, Tokens per sec: 79046.16750384145, Loss: 2.24627423286438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16739 , TFLOPS: 96.72982531533022, Tokens per sec: 79040.43031238073, Loss: 2.2625060081481934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16740 , TFLOPS: 97.16462829851623, Tokens per sec: 79395.71902276657, Loss: 2.2733519077301025 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16741 , TFLOPS: 96.12337116114102, Tokens per sec: 78544.88101147399, Loss: 2.2693212032318115 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16742 , TFLOPS: 97.86378132915597, Tokens per sec: 79967.01496190243, Loss: 2.230485439300537 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16743 , TFLOPS: 94.8722738956926, Tokens per sec: 77522.57722976734, Loss: 2.2553083896636963 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16744 , TFLOPS: 97.36478197438424, Tokens per sec: 79559.26974373231, Loss: 2.257795810699463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16745 , TFLOPS: 96.17166144017307, Tokens per sec: 78584.34024157345, Loss: 2.260615825653076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16746 , TFLOPS: 95.56834947880041, Tokens per sec: 78091.35850730432, Loss: 2.272622585296631 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16747 , TFLOPS: 96.45187506723268, Tokens per sec: 78813.31000958433, Loss: 2.2482693195343018 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16748 , TFLOPS: 96.81029470974126, Tokens per sec: 79106.1839260205, Loss: 2.2424187660217285 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16749 , TFLOPS: 96.68920538721865, Tokens per sec: 79007.23872347081, Loss: 2.2640340328216553 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16750 , TFLOPS: 97.67250711130595, Tokens per sec: 79810.71987466078, Loss: 2.2408299446105957 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16751 , TFLOPS: 96.88672855498288, Tokens per sec: 79168.63998854965, Loss: 2.240525722503662 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16752 , TFLOPS: 97.41175521054318, Tokens per sec: 79597.65278419646, Loss: 2.258700370788574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16753 , TFLOPS: 97.25560152337106, Tokens per sec: 79470.05558665459, Loss: 2.26462721824646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16754 , TFLOPS: 96.26664046387805, Tokens per sec: 78661.95004682036, Loss: 2.2687344551086426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16755 , TFLOPS: 97.77833677810078, Tokens per sec: 79897.19601969681, Loss: 2.260505199432373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16756 , TFLOPS: 96.63258220862346, Tokens per sec: 78960.97046663039, Loss: 2.2590081691741943 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16757 , TFLOPS: 97.65979065450496, Tokens per sec: 79800.32893045797, Loss: 2.2485361099243164 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16758 , TFLOPS: 98.33011405434969, Tokens per sec: 80348.06743613053, Loss: 2.269939661026001 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16759 , TFLOPS: 97.22283558073788, Tokens per sec: 79443.28169145861, Loss: 2.2627522945404053 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16760 , TFLOPS: 97.2326616572408, Tokens per sec: 79451.31083151487, Loss: 2.260113000869751 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16761 , TFLOPS: 97.17153679639792, Tokens per sec: 79401.36413422624, Loss: 2.2451975345611572 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16762 , TFLOPS: 95.97388981943078, Tokens per sec: 78422.73595917049, Loss: 2.2682197093963623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16763 , TFLOPS: 97.24689542876739, Tokens per sec: 79462.94161263912, Loss: 2.21870493888855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16764 , TFLOPS: 97.93282376021796, Tokens per sec: 80023.43130963348, Loss: 2.2543320655822754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16765 , TFLOPS: 97.71270681923188, Tokens per sec: 79843.5680908396, Loss: 2.2292754650115967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16766 , TFLOPS: 94.43492171331737, Tokens per sec: 77165.20550310177, Loss: 2.261357307434082 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16767 , TFLOPS: 97.32808740568923, Tokens per sec: 79529.28566704938, Loss: 2.2406351566314697 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16768 , TFLOPS: 97.1646997632476, Tokens per sec: 79395.77741843823, Loss: 2.2656655311584473 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16769 , TFLOPS: 96.16212928470435, Tokens per sec: 78576.55127196044, Loss: 2.244919776916504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16770 , TFLOPS: 96.24657937338338, Tokens per sec: 78645.55761335876, Loss: 2.2564611434936523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16771 , TFLOPS: 97.38557827442358, Tokens per sec: 79576.26293583884, Loss: 2.2430145740509033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16772 , TFLOPS: 95.44918178896886, Tokens per sec: 77993.98352029371, Loss: 2.277836799621582 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16773 , TFLOPS: 96.4756769282527, Tokens per sec: 78832.7591229382, Loss: 2.2484920024871826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16774 , TFLOPS: 97.80088658268726, Tokens per sec: 79915.62204550803, Loss: 2.2504239082336426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16775 , TFLOPS: 95.02415652935319, Tokens per sec: 77646.68443953822, Loss: 2.2618484497070312 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16776 , TFLOPS: 97.31065263127643, Tokens per sec: 79515.03926406565, Loss: 2.2580976486206055 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16777 , TFLOPS: 95.68682557771545, Tokens per sec: 78188.16837757391, Loss: 2.235346794128418 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16778 , TFLOPS: 97.97955250399265, Tokens per sec: 80061.61456907664, Loss: 2.252676248550415 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16779 , TFLOPS: 95.02479162512847, Tokens per sec: 77647.20339264502, Loss: 2.2498607635498047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16780 , TFLOPS: 97.77248686284717, Tokens per sec: 79892.41590335288, Loss: 2.2650227546691895 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16781 , TFLOPS: 95.49125844683586, Tokens per sec: 78028.36544058636, Loss: 2.268017530441284 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16782 , TFLOPS: 97.44731883619203, Tokens per sec: 79626.71273820328, Loss: 2.2676796913146973 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16783 , TFLOPS: 96.73737726012618, Tokens per sec: 79046.60120087788, Loss: 2.2520461082458496 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16784 , TFLOPS: 95.98471084658362, Tokens per sec: 78431.57809901508, Loss: 2.238905429840088 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16785 , TFLOPS: 97.3916924568368, Tokens per sec: 79581.25899168174, Loss: 2.2643866539001465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16786 , TFLOPS: 96.78856637939461, Tokens per sec: 79088.42914795729, Loss: 2.238724708557129 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16787 , TFLOPS: 96.5044478854843, Tokens per sec: 78856.26861272246, Loss: 2.2464375495910645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16788 , TFLOPS: 97.82364517330095, Tokens per sec: 79934.21867575673, Loss: 2.2593488693237305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16789 , TFLOPS: 98.43868636319276, Tokens per sec: 80436.78466458606, Loss: 2.260129451751709 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16790 , TFLOPS: 98.3834594728917, Tokens per sec: 80391.65735085444, Loss: 2.2629647254943848 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16791 , TFLOPS: 97.8932195914518, Tokens per sec: 79991.06972383264, Loss: 2.2492575645446777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16792 , TFLOPS: 97.66781372072215, Tokens per sec: 79806.88478439614, Loss: 2.2525038719177246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16793 , TFLOPS: 97.19710556003182, Tokens per sec: 79422.25702919005, Loss: 2.253192901611328 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16794 , TFLOPS: 95.92553821925398, Tokens per sec: 78383.2266219851, Loss: 2.2442736625671387 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16795 , TFLOPS: 97.86201832624491, Tokens per sec: 79965.57436684007, Loss: 2.2376163005828857 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16796 , TFLOPS: 97.17794887317086, Tokens per sec: 79406.60360721896, Loss: 2.270057201385498 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16797 , TFLOPS: 97.81099678759644, Tokens per sec: 79923.88335419916, Loss: 2.2460947036743164 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16798 , TFLOPS: 95.03050938707915, Tokens per sec: 77651.8755231232, Loss: 2.2510569095611572 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16799 , TFLOPS: 97.76799730953441, Tokens per sec: 79888.74737376964, Loss: 2.266244888305664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16800 , TFLOPS: 97.82227285413366, Tokens per sec: 79933.0973184385, Loss: 2.2691712379455566 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016800 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016800/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016800/model.pt +[rank0]:[2024-08-30 09:51:01,505] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007765596004901454, 'preprocessing_with_comm': 0.0016229290049523115, 'state_converting': 2.59540625102818, : 2.6063951709948014}) +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016800/optimizer.pt +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016800/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016800/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016800/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016800/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016800/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016800, took 14.64s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0014800 + eval ppl=7.783336162567139, eval loss=2.051985025405884 +------------------------------------------------------------------ +iteration: 16801 , TFLOPS: 95.01489800592391, Tokens per sec: 77639.11906170855, Loss: 2.2455122470855713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16802 , TFLOPS: 96.17913079376942, Tokens per sec: 78590.44364267528, Loss: 2.2571113109588623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16803 , TFLOPS: 97.19480760530551, Tokens per sec: 79420.37931122856, Loss: 2.261080026626587 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16804 , TFLOPS: 97.74866011545652, Tokens per sec: 79872.94645470493, Loss: 2.2743589878082275 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16805 , TFLOPS: 98.44558843548946, Tokens per sec: 80442.42452553484, Loss: 2.2461438179016113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16806 , TFLOPS: 98.25894118164291, Tokens per sec: 80289.91024968908, Loss: 2.238490104675293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16807 , TFLOPS: 97.22988174339521, Tokens per sec: 79449.0392923503, Loss: 2.250006914138794 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16808 , TFLOPS: 98.41769426493148, Tokens per sec: 80419.63148071208, Loss: 2.2629406452178955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16809 , TFLOPS: 97.79704801057468, Tokens per sec: 79912.48544942122, Loss: 2.2533442974090576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16810 , TFLOPS: 97.82642446117734, Tokens per sec: 79936.48970342577, Loss: 2.2606284618377686 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16811 , TFLOPS: 95.31950093952268, Tokens per sec: 77888.0179599292, Loss: 2.251814365386963 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16812 , TFLOPS: 97.12335146006706, Tokens per sec: 79361.9906555094, Loss: 2.2510907649993896 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16813 , TFLOPS: 98.32854757368031, Tokens per sec: 80346.78742445077, Loss: 2.2604217529296875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16814 , TFLOPS: 97.35415595291926, Tokens per sec: 79550.58694805515, Loss: 2.27502179145813 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16815 , TFLOPS: 97.26928635539561, Tokens per sec: 79481.23781518076, Loss: 2.2347025871276855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16816 , TFLOPS: 96.4936573564566, Tokens per sec: 78847.451393681, Loss: 2.2545509338378906 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16817 , TFLOPS: 97.141111858001, Tokens per sec: 79376.50313385454, Loss: 2.2612781524658203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16818 , TFLOPS: 97.77157376118713, Tokens per sec: 79891.66978448113, Loss: 2.2520675659179688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16819 , TFLOPS: 98.45739092164607, Tokens per sec: 80452.06864079642, Loss: 2.237072706222534 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16820 , TFLOPS: 97.1237097494355, Tokens per sec: 79362.28342297554, Loss: 2.2438254356384277 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16821 , TFLOPS: 95.97541080594893, Tokens per sec: 78423.97879640813, Loss: 2.264564275741577 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16822 , TFLOPS: 97.81978787183694, Tokens per sec: 79931.06677543471, Loss: 2.2417383193969727 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16823 , TFLOPS: 97.82441416814437, Tokens per sec: 79934.84704123899, Loss: 2.2676334381103516 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16824 , TFLOPS: 95.06615746540864, Tokens per sec: 77681.00448558941, Loss: 2.2672433853149414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16825 , TFLOPS: 98.46557288498272, Tokens per sec: 80458.75433366129, Loss: 2.2475810050964355 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16826 , TFLOPS: 97.16630700366183, Tokens per sec: 79397.09073595474, Loss: 2.252943992614746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16827 , TFLOPS: 96.03749085431564, Tokens per sec: 78474.7060020112, Loss: 2.2560150623321533 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16828 , TFLOPS: 97.19035090284972, Tokens per sec: 79416.7376249262, Loss: 2.2609682083129883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16829 , TFLOPS: 96.71390369460275, Tokens per sec: 79027.42034622535, Loss: 2.254255533218384 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16830 , TFLOPS: 96.63007307125983, Tokens per sec: 78958.92018589962, Loss: 2.2629151344299316 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16831 , TFLOPS: 97.2831950247416, Tokens per sec: 79492.60294694437, Loss: 2.2601120471954346 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16832 , TFLOPS: 96.22519872101118, Tokens per sec: 78628.08693191849, Loss: 2.2503349781036377 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16833 , TFLOPS: 97.67997077901548, Tokens per sec: 79816.81862967814, Loss: 2.2401206493377686 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16834 , TFLOPS: 94.94427397878685, Tokens per sec: 77581.41035111068, Loss: 2.26912784576416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16835 , TFLOPS: 97.76621906344326, Tokens per sec: 79887.29432311244, Loss: 2.2623178958892822 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16836 , TFLOPS: 96.11862803937822, Tokens per sec: 78541.00528458279, Loss: 2.2444887161254883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16837 , TFLOPS: 96.13443387858561, Tokens per sec: 78553.92064267764, Loss: 2.251781702041626 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16838 , TFLOPS: 97.29191113389213, Tokens per sec: 79499.72510410388, Loss: 2.247807025909424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16839 , TFLOPS: 95.86291635978512, Tokens per sec: 78332.05668858302, Loss: 2.2457540035247803 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16840 , TFLOPS: 96.1250728585273, Tokens per sec: 78546.2715122139, Loss: 2.2450923919677734 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16841 , TFLOPS: 97.92232089234854, Tokens per sec: 80014.8491459295, Loss: 2.261265993118286 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16842 , TFLOPS: 97.85976674266144, Tokens per sec: 79963.7345399331, Loss: 2.2928011417388916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16843 , TFLOPS: 98.37863750864499, Tokens per sec: 80387.71719974003, Loss: 2.2581934928894043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16844 , TFLOPS: 98.35093243016497, Tokens per sec: 80365.07866692187, Loss: 2.241812229156494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16845 , TFLOPS: 97.19623689352152, Tokens per sec: 79421.54721942301, Loss: 2.229618549346924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16846 , TFLOPS: 98.34648732289055, Tokens per sec: 80361.44645534069, Loss: 2.284546375274658 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16847 , TFLOPS: 97.15296267293878, Tokens per sec: 79386.18673980732, Loss: 2.2523560523986816 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16848 , TFLOPS: 97.72640039575784, Tokens per sec: 79854.75746472298, Loss: 2.264481544494629 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16849 , TFLOPS: 95.45105527112601, Tokens per sec: 77995.51439079204, Loss: 2.24101185798645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16850 , TFLOPS: 97.92558656435253, Tokens per sec: 80017.51761058984, Loss: 2.2679264545440674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16851 , TFLOPS: 98.41835519052562, Tokens per sec: 80420.1715400288, Loss: 2.2551496028900146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16852 , TFLOPS: 97.84872213689577, Tokens per sec: 79954.70970824853, Loss: 2.2566683292388916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16853 , TFLOPS: 96.41441655481191, Tokens per sec: 78782.70169481743, Loss: 2.2564303874969482 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16854 , TFLOPS: 97.14036595141602, Tokens per sec: 79375.8936343826, Loss: 2.269575357437134 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16855 , TFLOPS: 97.21408500892397, Tokens per sec: 79436.13137396972, Loss: 2.2516064643859863 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16856 , TFLOPS: 97.77459069055394, Tokens per sec: 79894.13499512941, Loss: 2.258897304534912 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16857 , TFLOPS: 98.41706843341585, Tokens per sec: 80419.12009767033, Loss: 2.239837408065796 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16858 , TFLOPS: 97.10114251733378, Tokens per sec: 79343.843156693, Loss: 2.2659287452697754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16859 , TFLOPS: 96.1536025169137, Tokens per sec: 78569.58383050017, Loss: 2.261659622192383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16860 , TFLOPS: 97.92528095877114, Tokens per sec: 80017.2678924023, Loss: 2.2665352821350098 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16861 , TFLOPS: 98.40701390748141, Tokens per sec: 80410.90428570278, Loss: 2.2519092559814453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16862 , TFLOPS: 94.32293910346212, Tokens per sec: 77073.70162990023, Loss: 2.2400221824645996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16863 , TFLOPS: 98.35463431928375, Tokens per sec: 80368.10357581591, Loss: 2.2550017833709717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16864 , TFLOPS: 97.69621819941344, Tokens per sec: 79830.09481513112, Loss: 2.24483585357666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16865 , TFLOPS: 95.24637869047257, Tokens per sec: 77828.26788789619, Loss: 2.2488598823547363 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16866 , TFLOPS: 97.92463246810148, Tokens per sec: 80016.73799398248, Loss: 2.2483174800872803 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16867 , TFLOPS: 96.511577194839, Tokens per sec: 78862.09415492094, Loss: 2.2650439739227295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16868 , TFLOPS: 93.68128756260818, Tokens per sec: 76549.39163828836, Loss: 2.2666726112365723 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16869 , TFLOPS: 97.95823096250746, Tokens per sec: 80044.1921886638, Loss: 2.2605741024017334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16870 , TFLOPS: 95.95312249460771, Tokens per sec: 78405.76644345906, Loss: 2.2396464347839355 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16871 , TFLOPS: 98.44840097930481, Tokens per sec: 80444.72272748768, Loss: 2.259244918823242 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16872 , TFLOPS: 94.94035160820735, Tokens per sec: 77578.20528113937, Loss: 2.257526159286499 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16873 , TFLOPS: 97.83609483190813, Tokens per sec: 79944.39161228736, Loss: 2.254948854446411 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16874 , TFLOPS: 95.28355435128077, Tokens per sec: 77858.64507732923, Loss: 2.2626030445098877 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16875 , TFLOPS: 96.17218595221787, Tokens per sec: 78584.76883386739, Loss: 2.2546660900115967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16876 , TFLOPS: 97.28881983635674, Tokens per sec: 79497.19912531064, Loss: 2.2358057498931885 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16877 , TFLOPS: 95.78290978953562, Tokens per sec: 78266.68126047982, Loss: 2.2311174869537354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16878 , TFLOPS: 95.65414683225738, Tokens per sec: 78161.46573343438, Loss: 2.239302158355713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16879 , TFLOPS: 97.41117517820506, Tokens per sec: 79597.17882484161, Loss: 2.2482340335845947 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16880 , TFLOPS: 97.83509261777269, Tokens per sec: 79943.57267731804, Loss: 2.2329213619232178 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16881 , TFLOPS: 98.39976862051266, Tokens per sec: 80404.98397521033, Loss: 2.265514373779297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16882 , TFLOPS: 98.40438741135165, Tokens per sec: 80408.75810810312, Loss: 2.2434773445129395 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16883 , TFLOPS: 97.32621264642728, Tokens per sec: 79527.75375299584, Loss: 2.2586135864257812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16884 , TFLOPS: 98.42433711362395, Tokens per sec: 80425.05952338084, Loss: 2.269352912902832 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16885 , TFLOPS: 97.71155504886691, Tokens per sec: 79842.62694962518, Loss: 2.2620742321014404 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16886 , TFLOPS: 97.82149959608287, Tokens per sec: 79932.46546937976, Loss: 2.2484524250030518 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16887 , TFLOPS: 95.38796876462125, Tokens per sec: 77943.96478233585, Loss: 2.2461886405944824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16888 , TFLOPS: 97.19009670930912, Tokens per sec: 79416.52991684065, Loss: 2.2767138481140137 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16889 , TFLOPS: 98.42664088358832, Tokens per sec: 80426.94199312304, Loss: 2.267559051513672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16890 , TFLOPS: 97.88217655758089, Tokens per sec: 79982.04617658375, Loss: 2.284231185913086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16891 , TFLOPS: 97.31884970975592, Tokens per sec: 79521.73730790283, Loss: 2.2523715496063232 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16892 , TFLOPS: 97.10225163745872, Tokens per sec: 79344.74944729837, Loss: 2.2427542209625244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16893 , TFLOPS: 97.18967893542198, Tokens per sec: 79416.18854304303, Loss: 2.2464864253997803 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16894 , TFLOPS: 97.84076449440579, Tokens per sec: 79948.20731372201, Loss: 2.253204822540283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16895 , TFLOPS: 97.70400220678923, Tokens per sec: 79836.45532793606, Loss: 2.2600746154785156 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16896 , TFLOPS: 97.65474027419482, Tokens per sec: 79796.2021346982, Loss: 2.2465567588806152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16897 , TFLOPS: 96.11494260853043, Tokens per sec: 78537.99382416562, Loss: 2.2550442218780518 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16898 , TFLOPS: 97.94975263652469, Tokens per sec: 80037.26433025175, Loss: 2.2610208988189697 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16899 , TFLOPS: 97.7316810011645, Tokens per sec: 79859.07238333565, Loss: 2.248340368270874 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16900 , TFLOPS: 95.09478514894354, Tokens per sec: 77704.3969027479, Loss: 2.2837257385253906 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16901 , TFLOPS: 98.39582563498867, Tokens per sec: 80401.7620602372, Loss: 2.26413893699646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16902 , TFLOPS: 97.87794520537787, Tokens per sec: 79978.58862977396, Loss: 2.253474235534668 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16903 , TFLOPS: 95.33838768759213, Tokens per sec: 77903.4508079649, Loss: 2.2699809074401855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16904 , TFLOPS: 97.92969029867159, Tokens per sec: 80020.87087754146, Loss: 2.2528371810913086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16905 , TFLOPS: 96.58198607195928, Tokens per sec: 78919.62706090166, Loss: 2.2416434288024902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16906 , TFLOPS: 95.87769567980887, Tokens per sec: 78344.13325142798, Loss: 2.246499538421631 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16907 , TFLOPS: 97.8161287916163, Tokens per sec: 79928.07684679332, Loss: 2.244513511657715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16908 , TFLOPS: 96.02710734475727, Tokens per sec: 78466.22136905573, Loss: 2.257174491882324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16909 , TFLOPS: 97.67900285726073, Tokens per sec: 79816.0277158958, Loss: 2.265838623046875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16910 , TFLOPS: 95.65632483640685, Tokens per sec: 78163.24543669171, Loss: 2.246562957763672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16911 , TFLOPS: 97.18108164319077, Tokens per sec: 79409.1634742469, Loss: 2.24698805809021 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16912 , TFLOPS: 96.62187999370637, Tokens per sec: 78952.22541132211, Loss: 2.261734962463379 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16913 , TFLOPS: 96.19486676092681, Tokens per sec: 78603.30190651942, Loss: 2.249574899673462 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16914 , TFLOPS: 97.13397193725801, Tokens per sec: 79370.66892081754, Loss: 2.265925168991089 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16915 , TFLOPS: 95.83868589141758, Tokens per sec: 78312.25734913215, Loss: 2.2600202560424805 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16916 , TFLOPS: 95.6835990221535, Tokens per sec: 78185.5318759653, Loss: 2.2489802837371826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16917 , TFLOPS: 97.21427181615526, Tokens per sec: 79436.28401896726, Loss: 2.2790985107421875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16918 , TFLOPS: 97.83958403697766, Tokens per sec: 79947.24273156983, Loss: 2.2494263648986816 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16919 , TFLOPS: 97.27064494129024, Tokens per sec: 79482.34795068839, Loss: 2.2530901432037354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16920 , TFLOPS: 98.33111141722665, Tokens per sec: 80348.8824070117, Loss: 2.2662057876586914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16921 , TFLOPS: 97.18070680787443, Tokens per sec: 79408.8571866608, Loss: 2.2564537525177 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16922 , TFLOPS: 97.84097940837316, Tokens per sec: 79948.38292545715, Loss: 2.2412121295928955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16923 , TFLOPS: 97.88650959949449, Tokens per sec: 79985.58681667376, Loss: 2.275183916091919 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16924 , TFLOPS: 97.83740187162658, Tokens per sec: 79945.45962808753, Loss: 2.2460575103759766 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16925 , TFLOPS: 95.362327600155, Tokens per sec: 77923.01272678819, Loss: 2.25064754486084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16926 , TFLOPS: 97.83086849476246, Tokens per sec: 79940.12103766776, Loss: 2.2475500106811523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16927 , TFLOPS: 98.36186278327379, Tokens per sec: 80374.0101398194, Loss: 2.2475757598876953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16928 , TFLOPS: 97.30776790217078, Tokens per sec: 79512.6820776539, Loss: 2.273625373840332 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16929 , TFLOPS: 97.79888707865081, Tokens per sec: 79913.98819928805, Loss: 2.2687952518463135 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16930 , TFLOPS: 96.02738966821876, Tokens per sec: 78466.45206282377, Loss: 2.252815008163452 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16931 , TFLOPS: 97.09547759339357, Tokens per sec: 79339.21420151333, Loss: 2.255702495574951 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16932 , TFLOPS: 97.81794005026407, Tokens per sec: 79929.55687286037, Loss: 2.251781702041626 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16933 , TFLOPS: 98.4279872455315, Tokens per sec: 80428.04214012525, Loss: 2.2614452838897705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16934 , TFLOPS: 97.78129434456169, Tokens per sec: 79899.61272338656, Loss: 2.2488393783569336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16935 , TFLOPS: 96.09558478441348, Tokens per sec: 78522.17604776494, Loss: 2.2698965072631836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16936 , TFLOPS: 97.24071665845213, Tokens per sec: 79457.89278035883, Loss: 2.257427930831909 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16937 , TFLOPS: 97.90839341781646, Tokens per sec: 80003.46864796424, Loss: 2.2549562454223633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16938 , TFLOPS: 95.5861219931711, Tokens per sec: 78105.88088630183, Loss: 2.2513763904571533 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16939 , TFLOPS: 97.9847648837302, Tokens per sec: 80065.8737387388, Loss: 2.2439253330230713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16940 , TFLOPS: 98.40631286446485, Tokens per sec: 80410.3314454075, Loss: 2.248821496963501 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16941 , TFLOPS: 95.3140423765479, Tokens per sec: 77883.55762760658, Loss: 2.2097251415252686 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16942 , TFLOPS: 98.43876746331645, Tokens per sec: 80436.85093358484, Loss: 2.2467381954193115 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16943 , TFLOPS: 97.22092939467214, Tokens per sec: 79441.7240977547, Loss: 2.2490415573120117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16944 , TFLOPS: 96.63101968812977, Tokens per sec: 78959.69369091214, Loss: 2.2572014331817627 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16945 , TFLOPS: 97.81921268997101, Tokens per sec: 79930.59677952556, Loss: 2.2818763256073 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16946 , TFLOPS: 96.12625845496177, Tokens per sec: 78547.24029358049, Loss: 2.2435286045074463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16947 , TFLOPS: 97.7894291902133, Tokens per sec: 79906.25991517806, Loss: 2.2688491344451904 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16948 , TFLOPS: 94.90104364910938, Tokens per sec: 77546.08573588352, Loss: 2.2419016361236572 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16949 , TFLOPS: 97.81349185808564, Tokens per sec: 79925.92214052471, Loss: 2.248086452484131 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16950 , TFLOPS: 96.7477231866093, Tokens per sec: 79055.05512373509, Loss: 2.2495455741882324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16951 , TFLOPS: 95.5360781019082, Tokens per sec: 78064.98873450654, Loss: 2.2417242527008057 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16952 , TFLOPS: 97.87886363605142, Tokens per sec: 79979.33910311983, Loss: 2.2523646354675293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16953 , TFLOPS: 95.36481838259076, Tokens per sec: 77925.04800923489, Loss: 2.249610662460327 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16954 , TFLOPS: 96.24311035750729, Tokens per sec: 78642.72299118599, Loss: 2.2546424865722656 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16955 , TFLOPS: 97.11796831718141, Tokens per sec: 79357.5919508831, Loss: 2.254347085952759 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16956 , TFLOPS: 97.85405944510654, Tokens per sec: 79959.07096018209, Loss: 2.255568265914917 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16957 , TFLOPS: 97.15742759968656, Tokens per sec: 79389.83514639035, Loss: 2.2459545135498047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16958 , TFLOPS: 98.44913829132132, Tokens per sec: 80445.32520411647, Loss: 2.2490758895874023 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16959 , TFLOPS: 97.33123103169362, Tokens per sec: 79531.8544047817, Loss: 2.2618649005889893 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16960 , TFLOPS: 97.83541711879417, Tokens per sec: 79943.83783545608, Loss: 2.2572972774505615 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16961 , TFLOPS: 97.26456961557825, Tokens per sec: 79477.38364565633, Loss: 2.2609469890594482 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16962 , TFLOPS: 97.25270676232931, Tokens per sec: 79467.6902028896, Loss: 2.249962091445923 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16963 , TFLOPS: 95.35636048604798, Tokens per sec: 77918.13684424415, Loss: 2.249934196472168 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16964 , TFLOPS: 97.90460551547478, Tokens per sec: 80000.37345545158, Loss: 2.2510435581207275 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16965 , TFLOPS: 98.36072262898693, Tokens per sec: 80373.07849040143, Loss: 2.2525253295898438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16966 , TFLOPS: 97.23268978489595, Tokens per sec: 79451.33381534596, Loss: 2.2528202533721924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16967 , TFLOPS: 97.81333015085714, Tokens per sec: 79925.7900053856, Loss: 2.262777090072632 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16968 , TFLOPS: 96.73094123134005, Tokens per sec: 79041.34215607862, Loss: 2.250702142715454 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16969 , TFLOPS: 97.0918705792321, Tokens per sec: 79336.26681738917, Loss: 2.2709033489227295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16970 , TFLOPS: 97.96361931413131, Tokens per sec: 80048.59514948417, Loss: 2.2585318088531494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16971 , TFLOPS: 97.69913682576517, Tokens per sec: 79832.47969985523, Loss: 2.247429370880127 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16972 , TFLOPS: 97.80310166683525, Tokens per sec: 79917.4320477867, Loss: 2.2550034523010254 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16973 , TFLOPS: 96.14059379100337, Tokens per sec: 78558.95406568445, Loss: 2.266897678375244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16974 , TFLOPS: 97.9605941329457, Tokens per sec: 80046.12319606231, Loss: 2.242067575454712 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16975 , TFLOPS: 97.94109943517886, Tokens per sec: 80030.19357667885, Loss: 2.2523200511932373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16976 , TFLOPS: 95.00859668512874, Tokens per sec: 77633.97009027639, Loss: 2.2685210704803467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16977 , TFLOPS: 97.90371408007564, Tokens per sec: 79999.64504063936, Loss: 2.2317957878112793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16978 , TFLOPS: 98.37113758190249, Tokens per sec: 80381.58881653346, Loss: 2.2797982692718506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16979 , TFLOPS: 95.83947879301043, Tokens per sec: 78312.90524943477, Loss: 2.2585835456848145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16980 , TFLOPS: 97.74969935960988, Tokens per sec: 79873.79564785516, Loss: 2.2571284770965576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16981 , TFLOPS: 97.1926925657094, Tokens per sec: 79418.6510579286, Loss: 2.2389416694641113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16982 , TFLOPS: 96.55587635859717, Tokens per sec: 78898.29214198967, Loss: 2.259230613708496 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16983 , TFLOPS: 97.67145623483864, Tokens per sec: 79809.86117644738, Loss: 2.2539889812469482 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16984 , TFLOPS: 96.1748998657487, Tokens per sec: 78586.98644247577, Loss: 2.240705966949463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16985 , TFLOPS: 97.7239622198259, Tokens per sec: 79852.76516840477, Loss: 2.2569384574890137 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16986 , TFLOPS: 94.99882985152554, Tokens per sec: 77625.98935911855, Loss: 2.2689425945281982 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16987 , TFLOPS: 97.81834703325605, Tokens per sec: 79929.88942913979, Loss: 2.2670767307281494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16988 , TFLOPS: 95.9476449376408, Tokens per sec: 78401.29059065653, Loss: 2.2512757778167725 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16989 , TFLOPS: 95.82404487639141, Tokens per sec: 78300.29379886102, Loss: 2.2602615356445312 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16990 , TFLOPS: 97.30931040738406, Tokens per sec: 79513.94249837128, Loss: 2.241384744644165 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16991 , TFLOPS: 96.40271007611908, Tokens per sec: 78773.13602972594, Loss: 2.269348621368408 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16992 , TFLOPS: 95.14640822894278, Tokens per sec: 77746.57945031159, Loss: 2.255843162536621 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16993 , TFLOPS: 97.96059571078659, Tokens per sec: 80046.12448535672, Loss: 2.250502347946167 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16994 , TFLOPS: 97.2021851604887, Tokens per sec: 79426.40770148391, Loss: 2.249025821685791 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16995 , TFLOPS: 97.76864165540037, Tokens per sec: 79889.27388536365, Loss: 2.2803406715393066 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16996 , TFLOPS: 97.7785937364871, Tokens per sec: 79897.40598700898, Loss: 2.259119749069214 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16997 , TFLOPS: 97.90526106574464, Tokens per sec: 80000.90912245245, Loss: 2.2396399974823 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16998 , TFLOPS: 97.27374048228494, Tokens per sec: 79484.87739692147, Loss: 2.244189977645874 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 16999 , TFLOPS: 97.79235752808385, Tokens per sec: 79908.65273543421, Loss: 2.266730546951294 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17000 , TFLOPS: 97.42656026776368, Tokens per sec: 79609.7503775674, Loss: 2.253953695297241 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017000 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017000/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017000/model.pt +[rank0]:[2024-08-30 10:46:29,412] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007807054003933445, 'preprocessing_with_comm': 0.0017308520036749542, 'state_converting': 2.647278971999185, : 2.6585015149903484}) +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017000/optimizer.pt +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017000/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017000/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017000/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017000/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017000/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017000, took 14.99s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015000 + eval ppl=7.653107166290283, eval loss=2.035111665725708 +------------------------------------------------------------------ +iteration: 17001 , TFLOPS: 97.18962966585707, Tokens per sec: 79416.14828361322, Loss: 2.2586264610290527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17002 , TFLOPS: 95.96585109807906, Tokens per sec: 78416.1673130189, Loss: 2.2533257007598877 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17003 , TFLOPS: 97.72261299537158, Tokens per sec: 79851.662682371, Loss: 2.254558563232422 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17004 , TFLOPS: 96.69355175066681, Tokens per sec: 79010.79024892965, Loss: 2.254936933517456 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17005 , TFLOPS: 96.65917108344341, Tokens per sec: 78982.69692070431, Loss: 2.2394914627075195 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17006 , TFLOPS: 97.75747382203117, Tokens per sec: 79880.14836123207, Loss: 2.26011323928833 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17007 , TFLOPS: 96.75087572878269, Tokens per sec: 79057.63114709861, Loss: 2.2685132026672363 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17008 , TFLOPS: 97.73802342383797, Tokens per sec: 79864.25494017062, Loss: 2.2400145530700684 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17009 , TFLOPS: 97.39542677787406, Tokens per sec: 79584.31040153127, Loss: 2.2645888328552246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17010 , TFLOPS: 97.23987050340146, Tokens per sec: 79457.20136528496, Loss: 2.255411148071289 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17011 , TFLOPS: 97.72879185556428, Tokens per sec: 79856.71158809248, Loss: 2.2666618824005127 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17012 , TFLOPS: 97.13292308608467, Tokens per sec: 79369.81187752397, Loss: 2.2586190700531006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17013 , TFLOPS: 97.79630725761368, Tokens per sec: 79911.8801611081, Loss: 2.256974458694458 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17014 , TFLOPS: 96.58394604324886, Tokens per sec: 78921.22860389654, Loss: 2.2657535076141357 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17015 , TFLOPS: 96.29738911592857, Tokens per sec: 78687.07556195155, Loss: 2.2692434787750244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17016 , TFLOPS: 98.37140658500088, Tokens per sec: 80381.80862588991, Loss: 2.2616400718688965 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17017 , TFLOPS: 97.22314412351844, Tokens per sec: 79443.53380970721, Loss: 2.2677717208862305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17018 , TFLOPS: 98.34363954298756, Tokens per sec: 80359.11946107357, Loss: 2.263272762298584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17019 , TFLOPS: 97.13537360412036, Tokens per sec: 79371.81425889287, Loss: 2.259197473526001 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17020 , TFLOPS: 97.81835503675599, Tokens per sec: 79929.8959690056, Loss: 2.2529120445251465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17021 , TFLOPS: 97.90631066930854, Tokens per sec: 80001.76678054371, Loss: 2.2520875930786133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17022 , TFLOPS: 97.12088414203242, Tokens per sec: 79359.97454642889, Loss: 2.253105401992798 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17023 , TFLOPS: 97.81751314879195, Tokens per sec: 79929.20804067807, Loss: 2.2473580837249756 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17024 , TFLOPS: 97.28106535032329, Tokens per sec: 79490.86273515447, Loss: 2.252124547958374 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17025 , TFLOPS: 97.799750609567, Tokens per sec: 79914.69381262893, Loss: 2.264575958251953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17026 , TFLOPS: 97.78448165002094, Tokens per sec: 79902.21715272586, Loss: 2.2661685943603516 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17027 , TFLOPS: 97.05083084880286, Tokens per sec: 79302.73219719881, Loss: 2.293849468231201 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17028 , TFLOPS: 97.74134614915819, Tokens per sec: 79866.97002456433, Loss: 2.2384862899780273 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17029 , TFLOPS: 98.40527287390735, Tokens per sec: 80409.48164235122, Loss: 2.234076976776123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17030 , TFLOPS: 97.67396426249603, Tokens per sec: 79811.9105504086, Loss: 2.256316661834717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17031 , TFLOPS: 96.61289422071881, Tokens per sec: 78944.88290489961, Loss: 2.2382946014404297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17032 , TFLOPS: 97.81323746946903, Tokens per sec: 79925.71427303753, Loss: 2.2604782581329346 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17033 , TFLOPS: 96.89273412398424, Tokens per sec: 79173.54729357777, Loss: 2.2747962474823 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17034 , TFLOPS: 97.20551877234946, Tokens per sec: 79429.13168155015, Loss: 2.2445907592773438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17035 , TFLOPS: 97.16106787838378, Tokens per sec: 79392.80971182296, Loss: 2.2469804286956787 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17036 , TFLOPS: 97.89735440360803, Tokens per sec: 79994.44838528502, Loss: 2.269139528274536 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17037 , TFLOPS: 97.00691288074658, Tokens per sec: 79266.84569495094, Loss: 2.2623283863067627 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17038 , TFLOPS: 97.3472869182034, Tokens per sec: 79544.97408296412, Loss: 2.24363112449646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17039 , TFLOPS: 95.96228174420492, Tokens per sec: 78413.25070208499, Loss: 2.23699688911438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17040 , TFLOPS: 97.75090435140363, Tokens per sec: 79874.7802776691, Loss: 2.257885456085205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17041 , TFLOPS: 97.75826190048953, Tokens per sec: 79880.79232042734, Loss: 2.2563610076904297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17042 , TFLOPS: 96.59268273946682, Tokens per sec: 78928.3675832788, Loss: 2.2617502212524414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17043 , TFLOPS: 97.74155567627955, Tokens per sec: 79867.1412345689, Loss: 2.2262420654296875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17044 , TFLOPS: 97.02154641256656, Tokens per sec: 79278.80313050168, Loss: 2.2596802711486816 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17045 , TFLOPS: 97.22059071404173, Tokens per sec: 79441.44735309319, Loss: 2.2481675148010254 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17046 , TFLOPS: 97.83185224440415, Tokens per sec: 79940.92488482395, Loss: 2.290797233581543 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17047 , TFLOPS: 97.22485065492256, Tokens per sec: 79444.92826044754, Loss: 2.245438814163208 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17048 , TFLOPS: 97.18620879402476, Tokens per sec: 79413.35300117797, Loss: 2.251889705657959 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17049 , TFLOPS: 97.80030390270481, Tokens per sec: 79915.14592269486, Loss: 2.2619552612304688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17050 , TFLOPS: 97.87326439225279, Tokens per sec: 79974.7638168746, Loss: 2.2521870136260986 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17051 , TFLOPS: 97.77654841504572, Tokens per sec: 79895.73470220796, Loss: 2.244262218475342 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17052 , TFLOPS: 96.70164188779538, Tokens per sec: 79017.40090823606, Loss: 2.2599124908447266 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17053 , TFLOPS: 97.34801014282846, Tokens per sec: 79545.56504842272, Loss: 2.2462639808654785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17054 , TFLOPS: 98.41274574805178, Tokens per sec: 80415.58792018352, Loss: 2.2404532432556152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17055 , TFLOPS: 96.80813908815668, Tokens per sec: 79104.4225121331, Loss: 2.273623466491699 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17056 , TFLOPS: 98.40942989747074, Tokens per sec: 80412.87845331618, Loss: 2.239567995071411 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17057 , TFLOPS: 97.10772060504235, Tokens per sec: 79349.2182814947, Loss: 2.2542922496795654 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17058 , TFLOPS: 97.15502134692838, Tokens per sec: 79387.86893531919, Loss: 2.27163028717041 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17059 , TFLOPS: 97.87923104787787, Tokens per sec: 79979.63932478124, Loss: 2.255004644393921 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17060 , TFLOPS: 97.84485055690017, Tokens per sec: 79951.54614057079, Loss: 2.244971513748169 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17061 , TFLOPS: 97.7237903343392, Tokens per sec: 79852.62471634884, Loss: 2.243453025817871 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17062 , TFLOPS: 97.25478193024712, Tokens per sec: 79469.38587601483, Loss: 2.239893674850464 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17063 , TFLOPS: 97.27257000220591, Tokens per sec: 79483.92096751805, Loss: 2.260939121246338 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17064 , TFLOPS: 97.73019181349989, Tokens per sec: 79857.85552975978, Loss: 2.2545042037963867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17065 , TFLOPS: 97.15534873008865, Tokens per sec: 79388.13644852705, Loss: 2.256345272064209 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17066 , TFLOPS: 98.4618034381903, Tokens per sec: 80455.67422165301, Loss: 2.25760555267334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17067 , TFLOPS: 97.79031355468132, Tokens per sec: 79906.98255215044, Loss: 2.254443645477295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17068 , TFLOPS: 97.23657234009436, Tokens per sec: 79454.50635114443, Loss: 2.2372634410858154 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17069 , TFLOPS: 97.09801555080179, Tokens per sec: 79341.28803184442, Loss: 2.251828908920288 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17070 , TFLOPS: 97.32622170795383, Tokens per sec: 79527.76115740246, Loss: 2.2476487159729004 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17071 , TFLOPS: 97.06679227918788, Tokens per sec: 79315.77469285017, Loss: 2.2548017501831055 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17072 , TFLOPS: 97.67266387939678, Tokens per sec: 79810.84797390336, Loss: 2.257075786590576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17073 , TFLOPS: 97.17300122100389, Tokens per sec: 79402.56075326943, Loss: 2.2553977966308594 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17074 , TFLOPS: 98.41970274341367, Tokens per sec: 80421.27266018272, Loss: 2.248643398284912 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17075 , TFLOPS: 97.61232031854094, Tokens per sec: 79761.53969694661, Loss: 2.2442777156829834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17076 , TFLOPS: 97.00103803004646, Tokens per sec: 79262.04520321176, Loss: 2.2645788192749023 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17077 , TFLOPS: 97.07283274560749, Tokens per sec: 79320.71051345709, Loss: 2.2463531494140625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17078 , TFLOPS: 96.98618314486366, Tokens per sec: 79249.90689413014, Loss: 2.2372262477874756 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17079 , TFLOPS: 97.30102446064238, Tokens per sec: 79507.17183798942, Loss: 2.2423970699310303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17080 , TFLOPS: 97.28019595452803, Tokens per sec: 79490.15232946993, Loss: 2.259988307952881 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17081 , TFLOPS: 97.11946538144157, Tokens per sec: 79358.81524062796, Loss: 2.26168155670166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17082 , TFLOPS: 97.76964056173914, Tokens per sec: 79890.0901174472, Loss: 2.258838176727295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17083 , TFLOPS: 97.16851808026382, Tokens per sec: 79398.89746356441, Loss: 2.255614757537842 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17084 , TFLOPS: 97.4416964627693, Tokens per sec: 79622.11855214681, Loss: 2.256542682647705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17085 , TFLOPS: 97.71576713826165, Tokens per sec: 79846.06875629845, Loss: 2.254255533218384 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17086 , TFLOPS: 97.26154698995897, Tokens per sec: 79474.9137804535, Loss: 2.249387741088867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17087 , TFLOPS: 97.09505152003456, Tokens per sec: 79338.86604600358, Loss: 2.245189905166626 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17088 , TFLOPS: 97.82735355416402, Tokens per sec: 79937.24888922216, Loss: 2.259505271911621 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17089 , TFLOPS: 97.10468944997405, Tokens per sec: 79346.74144665952, Loss: 2.266059398651123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17090 , TFLOPS: 95.73934575900405, Tokens per sec: 78231.08396969456, Loss: 2.2380592823028564 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17091 , TFLOPS: 97.34927121544092, Tokens per sec: 79546.59550331726, Loss: 2.2433619499206543 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17092 , TFLOPS: 98.42815577029346, Tokens per sec: 80428.17984604648, Loss: 2.248598575592041 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17093 , TFLOPS: 96.74844056150896, Tokens per sec: 79055.64130923247, Loss: 2.2324140071868896 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17094 , TFLOPS: 98.37714172240995, Tokens per sec: 80386.49495430243, Loss: 2.2598369121551514 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17095 , TFLOPS: 96.55565552738359, Tokens per sec: 78898.11169512034, Loss: 2.256288528442383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17096 , TFLOPS: 98.35631128057321, Tokens per sec: 80369.47386405435, Loss: 2.294917345046997 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17097 , TFLOPS: 97.31624769417058, Tokens per sec: 79519.61113398621, Loss: 2.244741201400757 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17098 , TFLOPS: 97.9663568895437, Tokens per sec: 80050.83209282509, Loss: 2.259298801422119 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17099 , TFLOPS: 97.1679664086555, Tokens per sec: 79398.44667849196, Loss: 2.2513961791992188 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17100 , TFLOPS: 96.66905457130127, Tokens per sec: 78990.77297305656, Loss: 2.249467611312866 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17101 , TFLOPS: 97.80367291808611, Tokens per sec: 79917.89883188909, Loss: 2.223677158355713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17102 , TFLOPS: 97.9394463342492, Tokens per sec: 80028.84278535463, Loss: 2.2738091945648193 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17103 , TFLOPS: 97.256396352631, Tokens per sec: 79470.70506210362, Loss: 2.2593700885772705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17104 , TFLOPS: 98.36434598270206, Tokens per sec: 80376.03922599548, Loss: 2.2509570121765137 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17105 , TFLOPS: 95.41180876874789, Tokens per sec: 77963.44506340403, Loss: 2.2667694091796875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17106 , TFLOPS: 97.68673003771046, Tokens per sec: 79822.3417939566, Loss: 2.2439322471618652 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17107 , TFLOPS: 97.79893674162155, Tokens per sec: 79914.02878017987, Loss: 2.2689414024353027 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17108 , TFLOPS: 97.73526846757521, Tokens per sec: 79862.00379448944, Loss: 2.2458016872406006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17109 , TFLOPS: 97.56110905786076, Tokens per sec: 79719.69365754971, Loss: 2.24668288230896 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17110 , TFLOPS: 96.90252896708064, Tokens per sec: 79181.55091201352, Loss: 2.2332699298858643 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17111 , TFLOPS: 96.54148145744036, Tokens per sec: 78886.52969769672, Loss: 2.2445578575134277 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17112 , TFLOPS: 98.36771350356275, Tokens per sec: 80378.79091397845, Loss: 2.2406551837921143 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17113 , TFLOPS: 97.68616595244677, Tokens per sec: 79821.8808653668, Loss: 2.2602365016937256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17114 , TFLOPS: 97.10752700660773, Tokens per sec: 79349.0600872301, Loss: 2.2598447799682617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17115 , TFLOPS: 97.14515757540629, Tokens per sec: 79379.80899369242, Loss: 2.234435796737671 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17116 , TFLOPS: 97.15504943549878, Tokens per sec: 79387.89188721313, Loss: 2.2634410858154297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17117 , TFLOPS: 97.07610680981935, Tokens per sec: 79323.3858356065, Loss: 2.2509613037109375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17118 , TFLOPS: 96.87676362036704, Tokens per sec: 79160.49738395482, Loss: 2.2480363845825195 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17119 , TFLOPS: 97.15220172360975, Tokens per sec: 79385.56494852199, Loss: 2.259674072265625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17120 , TFLOPS: 97.5216766700852, Tokens per sec: 79687.47243841832, Loss: 2.2845754623413086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17121 , TFLOPS: 97.74153009146572, Tokens per sec: 79867.12032855891, Loss: 2.2542030811309814 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17122 , TFLOPS: 96.23675500468929, Tokens per sec: 78637.52986879706, Loss: 2.2756853103637695 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17123 , TFLOPS: 98.44352699921463, Tokens per sec: 80440.74007288858, Loss: 2.238830327987671 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17124 , TFLOPS: 97.27716058244684, Tokens per sec: 79487.67204880496, Loss: 2.255452871322632 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17125 , TFLOPS: 97.18782438771456, Tokens per sec: 79414.67314437099, Loss: 2.2503280639648438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17126 , TFLOPS: 97.7991075810518, Tokens per sec: 79914.16837747613, Loss: 2.2477598190307617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17127 , TFLOPS: 96.55951871693843, Tokens per sec: 78901.26840675295, Loss: 2.2545864582061768 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17128 , TFLOPS: 96.62056127339407, Tokens per sec: 78951.14785100806, Loss: 2.2402827739715576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17129 , TFLOPS: 96.73682651571148, Tokens per sec: 79046.15117343921, Loss: 2.256784439086914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17130 , TFLOPS: 97.7574842377483, Tokens per sec: 79880.15687218268, Loss: 2.275336503982544 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17131 , TFLOPS: 96.69490442779839, Tokens per sec: 79011.89555623487, Loss: 2.2480218410491943 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17132 , TFLOPS: 97.32861034200062, Tokens per sec: 79529.71297177099, Loss: 2.2359344959259033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17133 , TFLOPS: 97.19214300216547, Tokens per sec: 79418.20199540985, Loss: 2.2492589950561523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17134 , TFLOPS: 97.93846852587538, Tokens per sec: 80028.04379296135, Loss: 2.239548683166504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17135 , TFLOPS: 97.38191466816473, Tokens per sec: 79573.26930885512, Loss: 2.2514796257019043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17136 , TFLOPS: 97.99926083798633, Tokens per sec: 80077.71875612054, Loss: 2.2660648822784424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17137 , TFLOPS: 98.43583777938633, Tokens per sec: 80434.45701342876, Loss: 2.2324485778808594 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17138 , TFLOPS: 97.1839799299105, Tokens per sec: 79411.53173893414, Loss: 2.259429454803467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17139 , TFLOPS: 97.6983107267934, Tokens per sec: 79831.80467311972, Loss: 2.253384828567505 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17140 , TFLOPS: 98.42900385196461, Tokens per sec: 80428.87283541153, Loss: 2.257091522216797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17141 , TFLOPS: 97.03599375599812, Tokens per sec: 79290.60842672731, Loss: 2.26352596282959 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17142 , TFLOPS: 98.39371989659926, Tokens per sec: 80400.0414071927, Loss: 2.2503271102905273 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17143 , TFLOPS: 97.87366116883227, Tokens per sec: 79975.08803323164, Loss: 2.2453551292419434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17144 , TFLOPS: 97.59234656212783, Tokens per sec: 79745.21862641122, Loss: 2.2608633041381836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17145 , TFLOPS: 97.80818459984069, Tokens per sec: 79921.58544319178, Loss: 2.2559409141540527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17146 , TFLOPS: 97.76015159167866, Tokens per sec: 79882.33643574297, Loss: 2.257119655609131 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17147 , TFLOPS: 97.00360765110973, Tokens per sec: 79264.1449067305, Loss: 2.236234426498413 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17148 , TFLOPS: 97.9337056965092, Tokens per sec: 80024.15196248011, Loss: 2.2174072265625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17149 , TFLOPS: 97.21853449379563, Tokens per sec: 79439.76716259828, Loss: 2.2631237506866455 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17150 , TFLOPS: 98.4268667177951, Tokens per sec: 80427.12652806689, Loss: 2.270679235458374 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17151 , TFLOPS: 97.16081773616041, Tokens per sec: 79392.60531417302, Loss: 2.256722927093506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17152 , TFLOPS: 97.1122617597905, Tokens per sec: 79352.92897593914, Loss: 2.2576441764831543 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17153 , TFLOPS: 97.23255187872425, Tokens per sec: 79451.22112866305, Loss: 2.2472870349884033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17154 , TFLOPS: 97.12203203435294, Tokens per sec: 79360.91251879334, Loss: 2.255232810974121 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17155 , TFLOPS: 97.11176514478618, Tokens per sec: 79352.5231790351, Loss: 2.242258071899414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17156 , TFLOPS: 97.18398664166494, Tokens per sec: 79411.53722328147, Loss: 2.273991584777832 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17157 , TFLOPS: 96.84991919825103, Tokens per sec: 79138.56211560691, Loss: 2.2352652549743652 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17158 , TFLOPS: 97.80760544933639, Tokens per sec: 79921.11220440606, Loss: 2.2847466468811035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17159 , TFLOPS: 96.6346286582558, Tokens per sec: 78962.64267330528, Loss: 2.2543246746063232 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17160 , TFLOPS: 97.92961281861962, Tokens per sec: 80020.80756659649, Loss: 2.2495198249816895 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17161 , TFLOPS: 97.15346910336687, Tokens per sec: 79386.60055714678, Loss: 2.285071849822998 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17162 , TFLOPS: 97.17723667755995, Tokens per sec: 79406.02165385174, Loss: 2.2448713779449463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17163 , TFLOPS: 97.94288839209362, Tokens per sec: 80031.65537942575, Loss: 2.254206418991089 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17164 , TFLOPS: 96.62719100946474, Tokens per sec: 78956.56517901599, Loss: 2.2698771953582764 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17165 , TFLOPS: 97.75172225851588, Tokens per sec: 79875.44861062533, Loss: 2.255995750427246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17166 , TFLOPS: 95.98315466798626, Tokens per sec: 78430.30650542359, Loss: 2.2584404945373535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17167 , TFLOPS: 96.90268083450111, Tokens per sec: 79181.6750067922, Loss: 2.253075361251831 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17168 , TFLOPS: 97.76408366564752, Tokens per sec: 79885.54943460351, Loss: 2.2570841312408447 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17169 , TFLOPS: 97.27476874475198, Tokens per sec: 79485.71761665311, Loss: 2.263859748840332 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17170 , TFLOPS: 97.63801036343733, Tokens per sec: 79782.53169395204, Loss: 2.2440943717956543 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17171 , TFLOPS: 97.12794106312056, Tokens per sec: 79365.74093831131, Loss: 2.2651357650756836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17172 , TFLOPS: 97.77139396845095, Tokens per sec: 79891.52287120813, Loss: 2.2534308433532715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17173 , TFLOPS: 97.22741138855099, Tokens per sec: 79447.02070181449, Loss: 2.265920639038086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17174 , TFLOPS: 97.8980693620728, Tokens per sec: 79995.03259625127, Loss: 2.250065326690674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17175 , TFLOPS: 97.81668749023835, Tokens per sec: 79928.53337381959, Loss: 2.2526700496673584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17176 , TFLOPS: 97.17158741222106, Tokens per sec: 79401.40549371831, Loss: 2.2527334690093994 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17177 , TFLOPS: 97.7708166527156, Tokens per sec: 79891.05113165933, Loss: 2.276191473007202 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17178 , TFLOPS: 98.41579333437717, Tokens per sec: 80418.07818142185, Loss: 2.2441444396972656 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17179 , TFLOPS: 97.76277183412908, Tokens per sec: 79884.47750330024, Loss: 2.2630975246429443 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17180 , TFLOPS: 97.86662901033084, Tokens per sec: 79969.34187549626, Loss: 2.2535147666931152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17181 , TFLOPS: 98.42693495866706, Tokens per sec: 80427.18228944005, Loss: 2.248340129852295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17182 , TFLOPS: 96.96190556013714, Tokens per sec: 79230.06905468948, Loss: 2.2430002689361572 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17183 , TFLOPS: 97.26247473443387, Tokens per sec: 79475.67186434621, Loss: 2.249082088470459 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17184 , TFLOPS: 97.7666972668026, Tokens per sec: 79887.6850753874, Loss: 2.2586727142333984 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17185 , TFLOPS: 97.678274064436, Tokens per sec: 79815.43220051793, Loss: 2.2594046592712402 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17186 , TFLOPS: 97.79253593262531, Tokens per sec: 79908.79851437757, Loss: 2.2516796588897705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17187 , TFLOPS: 97.21220346787742, Tokens per sec: 79434.5939183454, Loss: 2.2474095821380615 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17188 , TFLOPS: 97.69610278840497, Tokens per sec: 79830.00050982516, Loss: 2.2577321529388428 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17189 , TFLOPS: 96.7869433729425, Tokens per sec: 79087.10294760535, Loss: 2.2692761421203613 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17190 , TFLOPS: 97.72721347321607, Tokens per sec: 79855.42185124458, Loss: 2.2627384662628174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17191 , TFLOPS: 96.42603610660912, Tokens per sec: 78792.1963297048, Loss: 2.258150577545166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17192 , TFLOPS: 97.69945966442793, Tokens per sec: 79832.74349963719, Loss: 2.247615337371826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17193 , TFLOPS: 96.64178299184663, Tokens per sec: 78968.48866345127, Loss: 2.2478132247924805 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17194 , TFLOPS: 97.29039971738311, Tokens per sec: 79498.49008676705, Loss: 2.254971981048584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17195 , TFLOPS: 96.98442512900641, Tokens per sec: 79248.47037411782, Loss: 2.268061637878418 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17196 , TFLOPS: 97.3918306555938, Tokens per sec: 79581.37191744364, Loss: 2.2410218715667725 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17197 , TFLOPS: 97.17246559449218, Tokens per sec: 79402.12307905847, Loss: 2.242018222808838 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17198 , TFLOPS: 97.36891497411423, Tokens per sec: 79562.64692420457, Loss: 2.2705864906311035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17199 , TFLOPS: 97.90840219416509, Tokens per sec: 80003.47581934463, Loss: 2.2557108402252197 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17200 , TFLOPS: 96.58380481770055, Tokens per sec: 78921.11320486554, Loss: 2.257399797439575 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017200 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017200/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017200/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017200/optimizer.pt +[rank0]:[2024-08-30 11:41:53,017] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007752464996883646, 'preprocessing_with_comm': 0.0017261770262848586, 'state_converting': 2.663602145999903, : 2.6746823970170226}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017200/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017200/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017200/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017200/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017200/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017200, took 14.86s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015200 + eval ppl=7.789254188537598, eval loss=2.0527451038360596 +------------------------------------------------------------------ +iteration: 17201 , TFLOPS: 97.44269991748874, Tokens per sec: 79622.93850082929, Loss: 2.2416367530822754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17202 , TFLOPS: 97.00584490541306, Tokens per sec: 79265.9730248138, Loss: 2.247431755065918 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17203 , TFLOPS: 97.21723766807003, Tokens per sec: 79438.70749291463, Loss: 2.2186217308044434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17204 , TFLOPS: 96.0024002533648, Tokens per sec: 78446.03256866183, Loss: 2.2573087215423584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17205 , TFLOPS: 98.39508658266213, Tokens per sec: 80401.15816155629, Loss: 2.265613555908203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17206 , TFLOPS: 96.6401286553317, Tokens per sec: 78967.13686249852, Loss: 2.240161180496216 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17207 , TFLOPS: 97.89335162526923, Tokens per sec: 79991.17761206372, Loss: 2.2642600536346436 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17208 , TFLOPS: 97.76516843161781, Tokens per sec: 79886.43582480219, Loss: 2.252930164337158 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17209 , TFLOPS: 97.24949682318558, Tokens per sec: 79465.06727898408, Loss: 2.2561349868774414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17210 , TFLOPS: 98.3692053511519, Tokens per sec: 80380.00994104712, Loss: 2.251674175262451 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17211 , TFLOPS: 97.91999965594471, Tokens per sec: 80012.95240390999, Loss: 2.2556629180908203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17212 , TFLOPS: 97.63469563393463, Tokens per sec: 79779.82314314666, Loss: 2.2591824531555176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17213 , TFLOPS: 96.77659775997445, Tokens per sec: 79078.64928092938, Loss: 2.238663673400879 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17214 , TFLOPS: 97.08529062933026, Tokens per sec: 79330.8901709414, Loss: 2.239713668823242 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17215 , TFLOPS: 97.87722468160503, Tokens per sec: 79977.99987125125, Loss: 2.2304739952087402 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17216 , TFLOPS: 97.23564715651881, Tokens per sec: 79453.75035982853, Loss: 2.255547046661377 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17217 , TFLOPS: 97.72512131349819, Tokens per sec: 79853.71229368211, Loss: 2.253279685974121 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17218 , TFLOPS: 96.68475475815269, Tokens per sec: 79003.60199989163, Loss: 2.241482973098755 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17219 , TFLOPS: 97.0692601532259, Tokens per sec: 79317.79125625537, Loss: 2.2454326152801514 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17220 , TFLOPS: 97.80284862500902, Tokens per sec: 79917.22528079661, Loss: 2.2664952278137207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17221 , TFLOPS: 97.1466051175749, Tokens per sec: 79380.99181766144, Loss: 2.2731006145477295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17222 , TFLOPS: 98.39209808862057, Tokens per sec: 80398.71618614424, Loss: 2.231215715408325 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17223 , TFLOPS: 96.93927189560698, Tokens per sec: 79211.57450475951, Loss: 2.242526054382324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17224 , TFLOPS: 96.16015856814144, Tokens per sec: 78574.94094872613, Loss: 2.24330997467041 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17225 , TFLOPS: 97.86209850864348, Tokens per sec: 79965.63988594203, Loss: 2.2611708641052246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17226 , TFLOPS: 98.43534217555147, Tokens per sec: 80434.05204277732, Loss: 2.2628793716430664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17227 , TFLOPS: 95.6837189819037, Tokens per sec: 78185.62989816524, Loss: 2.2750866413116455 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17228 , TFLOPS: 97.24062896925653, Tokens per sec: 79457.82112725978, Loss: 2.276644468307495 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17229 , TFLOPS: 97.19683568617327, Tokens per sec: 79422.03650831306, Loss: 2.2314236164093018 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17230 , TFLOPS: 97.9564142417723, Tokens per sec: 80042.70769938454, Loss: 2.2505083084106445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17231 , TFLOPS: 95.81419717366157, Tokens per sec: 78292.24698745806, Loss: 2.2730906009674072 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17232 , TFLOPS: 97.84823001827746, Tokens per sec: 79954.30758545725, Loss: 2.257181406021118 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17233 , TFLOPS: 95.84261976827734, Tokens per sec: 78315.47182118129, Loss: 2.2716333866119385 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17234 , TFLOPS: 97.7871564626367, Tokens per sec: 79904.40281097, Loss: 2.2562952041625977 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17235 , TFLOPS: 97.14905721953835, Tokens per sec: 79382.9954932985, Loss: 2.2660558223724365 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17236 , TFLOPS: 96.52105282561556, Tokens per sec: 78869.8369367529, Loss: 2.24941349029541 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17237 , TFLOPS: 97.75120306145226, Tokens per sec: 79875.02436133931, Loss: 2.250220775604248 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17238 , TFLOPS: 95.97677494990138, Tokens per sec: 78425.09347354788, Loss: 2.282465934753418 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17239 , TFLOPS: 97.9566833422116, Tokens per sec: 80042.92758828077, Loss: 2.2491281032562256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17240 , TFLOPS: 96.54746570545332, Tokens per sec: 78891.41958079636, Loss: 2.264742374420166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17241 , TFLOPS: 97.27334494270444, Tokens per sec: 79484.55419134814, Loss: 2.2539761066436768 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17242 , TFLOPS: 96.7359880523801, Tokens per sec: 79045.46604346717, Loss: 2.2560739517211914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17243 , TFLOPS: 97.77819018605206, Tokens per sec: 79897.07623556019, Loss: 2.2556307315826416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17244 , TFLOPS: 96.7259836461783, Tokens per sec: 79037.29118562335, Loss: 2.262805700302124 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17245 , TFLOPS: 97.30306856949555, Tokens per sec: 79508.8421319534, Loss: 2.2608139514923096 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17246 , TFLOPS: 97.03511695387998, Tokens per sec: 79289.89196914568, Loss: 2.249734401702881 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17247 , TFLOPS: 97.36501429789017, Tokens per sec: 79559.45958124954, Loss: 2.2285404205322266 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17248 , TFLOPS: 98.43075778165705, Tokens per sec: 80430.30601651338, Loss: 2.2644200325012207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17249 , TFLOPS: 97.79113849417418, Tokens per sec: 79907.65663144589, Loss: 2.2711076736450195 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17250 , TFLOPS: 97.76658160588107, Tokens per sec: 79887.59056587104, Loss: 2.2413458824157715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17251 , TFLOPS: 97.8027311984516, Tokens per sec: 79917.12932853372, Loss: 2.2693140506744385 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17252 , TFLOPS: 97.01033770006026, Tokens per sec: 79269.64420297484, Loss: 2.2595300674438477 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17253 , TFLOPS: 97.80957138963132, Tokens per sec: 79922.71862482885, Loss: 2.2443578243255615 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17254 , TFLOPS: 96.62404025406961, Tokens per sec: 78953.99061567496, Loss: 2.2465901374816895 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17255 , TFLOPS: 97.6936850799629, Tokens per sec: 79828.0249380198, Loss: 2.2361650466918945 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17256 , TFLOPS: 96.5802203299676, Tokens per sec: 78918.18422766603, Loss: 2.2475006580352783 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17257 , TFLOPS: 97.48164578372958, Tokens per sec: 79654.76217069046, Loss: 2.2715275287628174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17258 , TFLOPS: 97.80523323480253, Tokens per sec: 79919.1738068442, Loss: 2.2474920749664307 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17259 , TFLOPS: 97.13074715640667, Tokens per sec: 79368.03386937066, Loss: 2.271552085876465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17260 , TFLOPS: 97.68648427784497, Tokens per sec: 79822.1409772441, Loss: 2.2485287189483643 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17261 , TFLOPS: 97.28913827716988, Tokens per sec: 79497.45933149646, Loss: 2.2720515727996826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17262 , TFLOPS: 97.02924420415326, Tokens per sec: 79285.09319416595, Loss: 2.2815041542053223 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17263 , TFLOPS: 97.8614875486642, Tokens per sec: 79965.14065481545, Loss: 2.262263059616089 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17264 , TFLOPS: 98.34212123948126, Tokens per sec: 80357.87881619396, Loss: 2.2678680419921875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17265 , TFLOPS: 95.95931548676835, Tokens per sec: 78410.82689677531, Loss: 2.2654473781585693 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17266 , TFLOPS: 97.2319661944211, Tokens per sec: 79450.74255094209, Loss: 2.2550735473632812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17267 , TFLOPS: 97.20559864164153, Tokens per sec: 79429.1969448047, Loss: 2.2484817504882812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17268 , TFLOPS: 97.94419772515296, Tokens per sec: 80032.72526917377, Loss: 2.2565503120422363 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17269 , TFLOPS: 95.92653133315328, Tokens per sec: 78384.03812091725, Loss: 2.2619025707244873 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17270 , TFLOPS: 97.79368759995522, Tokens per sec: 79909.73957139939, Loss: 2.2568604946136475 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17271 , TFLOPS: 96.48502716233361, Tokens per sec: 78840.39943990215, Loss: 2.2654495239257812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17272 , TFLOPS: 96.98102322101272, Tokens per sec: 79245.69058752329, Loss: 2.242108106613159 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17273 , TFLOPS: 97.23988010845682, Tokens per sec: 79457.20921382295, Loss: 2.2404470443725586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17274 , TFLOPS: 97.08724378354822, Tokens per sec: 79332.48614353157, Loss: 2.272764205932617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17275 , TFLOPS: 97.14504015953527, Tokens per sec: 79379.71305016168, Loss: 2.230844020843506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17276 , TFLOPS: 96.66874537981292, Tokens per sec: 78990.52032473213, Loss: 2.2544801235198975 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17277 , TFLOPS: 96.59530621189785, Tokens per sec: 78930.51129013638, Loss: 2.2524068355560303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17278 , TFLOPS: 97.04589045982506, Tokens per sec: 79298.69527818813, Loss: 2.237406015396118 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17279 , TFLOPS: 96.6009697668456, Tokens per sec: 78935.13912667705, Loss: 2.2400963306427 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17280 , TFLOPS: 97.3842005106427, Tokens per sec: 79575.1371295867, Loss: 2.2787890434265137 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17281 , TFLOPS: 97.68483410873158, Tokens per sec: 79820.7925815824, Loss: 2.2360146045684814 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17282 , TFLOPS: 96.78167506701064, Tokens per sec: 79082.79807921017, Loss: 2.254495620727539 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17283 , TFLOPS: 97.2471842339983, Tokens per sec: 79463.17760282781, Loss: 2.246119737625122 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17284 , TFLOPS: 96.55560797839252, Tokens per sec: 78898.07284161572, Loss: 2.2463598251342773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17285 , TFLOPS: 96.68008826691498, Tokens per sec: 78999.78888977514, Loss: 2.2687718868255615 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17286 , TFLOPS: 98.42249841770688, Tokens per sec: 80423.55707761477, Loss: 2.2798588275909424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17287 , TFLOPS: 97.21715374727836, Tokens per sec: 79438.63891907544, Loss: 2.2460036277770996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17288 , TFLOPS: 97.24556453981184, Tokens per sec: 79461.85410901341, Loss: 2.231386423110962 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17289 , TFLOPS: 97.65277938298519, Tokens per sec: 79794.59984001298, Loss: 2.2546870708465576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17290 , TFLOPS: 97.16219700643093, Tokens per sec: 79393.73235141663, Loss: 2.2279975414276123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17291 , TFLOPS: 97.28295169766544, Tokens per sec: 79492.40411812667, Loss: 2.263449192047119 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17292 , TFLOPS: 97.31986778336257, Tokens per sec: 79522.56920205436, Loss: 2.246635913848877 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17293 , TFLOPS: 97.86064608130478, Tokens per sec: 79964.45307017476, Loss: 2.252504348754883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17294 , TFLOPS: 95.43588831411854, Tokens per sec: 77983.12107978904, Loss: 2.247718572616577 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17295 , TFLOPS: 97.88886198816148, Tokens per sec: 79987.50901400943, Loss: 2.252984046936035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17296 , TFLOPS: 97.67537317331536, Tokens per sec: 79813.06180770767, Loss: 2.27603816986084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17297 , TFLOPS: 96.62825034941007, Tokens per sec: 78957.43079295514, Loss: 2.258559465408325 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17298 , TFLOPS: 97.89241262411139, Tokens per sec: 79990.4103300456, Loss: 2.270273447036743 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17299 , TFLOPS: 96.6546186397751, Tokens per sec: 78978.97700179259, Loss: 2.2619054317474365 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17300 , TFLOPS: 97.06189204752897, Tokens per sec: 79311.7705874185, Loss: 2.231978178024292 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17301 , TFLOPS: 96.57031037322234, Tokens per sec: 78910.08654690448, Loss: 2.269026756286621 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17302 , TFLOPS: 98.37059865767742, Tokens per sec: 80381.14844767601, Loss: 2.2561981678009033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17303 , TFLOPS: 95.4921269478928, Tokens per sec: 78029.0751151572, Loss: 2.2414040565490723 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17304 , TFLOPS: 96.52716715731, Tokens per sec: 78874.83311457733, Loss: 2.251563310623169 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17305 , TFLOPS: 96.71563263372214, Tokens per sec: 79028.83310688667, Loss: 2.2504894733428955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17306 , TFLOPS: 97.36196617404273, Tokens per sec: 79556.96888078812, Loss: 2.2592780590057373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17307 , TFLOPS: 95.50909078292281, Tokens per sec: 78042.9367014482, Loss: 2.2561726570129395 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17308 , TFLOPS: 97.80471477391195, Tokens per sec: 79918.75015910415, Loss: 2.2696340084075928 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17309 , TFLOPS: 96.63508174609481, Tokens per sec: 78963.01290304205, Loss: 2.267749309539795 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17310 , TFLOPS: 97.04117409385643, Tokens per sec: 79294.84141414509, Loss: 2.2277936935424805 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17311 , TFLOPS: 97.33722124831945, Tokens per sec: 79536.74916498856, Loss: 2.2606613636016846 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17312 , TFLOPS: 96.86235401899228, Tokens per sec: 79148.72292773535, Loss: 2.2515456676483154 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17313 , TFLOPS: 97.59885944401526, Tokens per sec: 79750.54046985794, Loss: 2.2568440437316895 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17314 , TFLOPS: 96.20569115945426, Tokens per sec: 78612.14680120102, Loss: 2.244873285293579 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17315 , TFLOPS: 97.2105197805061, Tokens per sec: 79433.21813405234, Loss: 2.2548975944519043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17316 , TFLOPS: 97.34475789725944, Tokens per sec: 79542.90755484803, Loss: 2.231323719024658 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17317 , TFLOPS: 97.11594235154729, Tokens per sec: 79355.93648221047, Loss: 2.270709753036499 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17318 , TFLOPS: 97.80490077801345, Tokens per sec: 79918.90214784365, Loss: 2.2830114364624023 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17319 , TFLOPS: 97.77128045763524, Tokens per sec: 79891.43011859858, Loss: 2.2506604194641113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17320 , TFLOPS: 95.93414672614912, Tokens per sec: 78390.26085457181, Loss: 2.2522966861724854 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17321 , TFLOPS: 97.90776593911741, Tokens per sec: 80002.95591896652, Loss: 2.264946937561035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17322 , TFLOPS: 96.07596277510613, Tokens per sec: 78506.14239884446, Loss: 2.2551381587982178 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17323 , TFLOPS: 96.69080675824416, Tokens per sec: 79008.54724495873, Loss: 2.2303390502929688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17324 , TFLOPS: 98.39482135729986, Tokens per sec: 80400.94143908526, Loss: 2.251851797103882 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17325 , TFLOPS: 97.83594053737205, Tokens per sec: 79944.26553425007, Loss: 2.251958131790161 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17326 , TFLOPS: 97.10521514471792, Tokens per sec: 79347.17100536732, Loss: 2.262481212615967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17327 , TFLOPS: 97.75871131324297, Tokens per sec: 79881.15954715709, Loss: 2.2642250061035156 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17328 , TFLOPS: 97.13924968980177, Tokens per sec: 79374.98150828248, Loss: 2.2580530643463135 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17329 , TFLOPS: 96.92090021361363, Tokens per sec: 79196.56253047359, Loss: 2.251283645629883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17330 , TFLOPS: 97.24649117854938, Tokens per sec: 79462.61128937965, Loss: 2.264977216720581 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17331 , TFLOPS: 96.28893109936898, Tokens per sec: 78680.164298892, Loss: 2.2304182052612305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17332 , TFLOPS: 96.28173202542463, Tokens per sec: 78674.28175025144, Loss: 2.2751972675323486 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17333 , TFLOPS: 97.2757573901846, Tokens per sec: 79486.52546428611, Loss: 2.25520396232605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17334 , TFLOPS: 98.36986948848052, Tokens per sec: 80380.55262475474, Loss: 2.258772850036621 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17335 , TFLOPS: 96.69645521350351, Tokens per sec: 79013.1627431553, Loss: 2.2495858669281006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17336 , TFLOPS: 97.20805087445267, Tokens per sec: 79431.20072736077, Loss: 2.26885724067688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17337 , TFLOPS: 96.46170936187669, Tokens per sec: 78821.34586489522, Loss: 2.2474637031555176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17338 , TFLOPS: 97.255397208433, Tokens per sec: 79469.88863565915, Loss: 2.2543411254882812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17339 , TFLOPS: 96.4579980332553, Tokens per sec: 78818.31324274058, Loss: 2.2279465198516846 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17340 , TFLOPS: 98.38716010321637, Tokens per sec: 80394.68123115542, Loss: 2.2595880031585693 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17341 , TFLOPS: 96.1410792330636, Tokens per sec: 78559.35073288826, Loss: 2.2432260513305664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17342 , TFLOPS: 94.71289304713524, Tokens per sec: 77392.34303558306, Loss: 2.246856927871704 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17343 , TFLOPS: 96.78001758262106, Tokens per sec: 79081.443706048, Loss: 2.2537734508514404 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17344 , TFLOPS: 97.7449081317806, Tokens per sec: 79869.8806122587, Loss: 2.2657601833343506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17345 , TFLOPS: 96.69582876594222, Tokens per sec: 79012.65085672679, Loss: 2.2459497451782227 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17346 , TFLOPS: 96.5501578488298, Tokens per sec: 78893.6194004512, Loss: 2.2588231563568115 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17347 , TFLOPS: 96.09554740707178, Tokens per sec: 78522.14550577686, Loss: 2.273751974105835 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17348 , TFLOPS: 97.74894132210453, Tokens per sec: 79873.17623589594, Loss: 2.265530586242676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17349 , TFLOPS: 96.63813980475044, Tokens per sec: 78965.511721491, Loss: 2.235668659210205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17350 , TFLOPS: 96.92578884616022, Tokens per sec: 79200.557157972, Loss: 2.2497305870056152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17351 , TFLOPS: 98.43898587555132, Tokens per sec: 80437.02940384438, Loss: 2.2596633434295654 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17352 , TFLOPS: 94.74788035233232, Tokens per sec: 77420.93206331297, Loss: 2.2613651752471924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17353 , TFLOPS: 98.4383442870233, Tokens per sec: 80436.50514534216, Loss: 2.2522199153900146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17354 , TFLOPS: 96.5053731450811, Tokens per sec: 78857.0246661573, Loss: 2.269887685775757 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17355 , TFLOPS: 97.24686119092624, Tokens per sec: 79462.91363601777, Loss: 2.265932083129883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17356 , TFLOPS: 97.09907095888309, Tokens per sec: 79342.15043295627, Loss: 2.2448108196258545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17357 , TFLOPS: 98.44241841831301, Tokens per sec: 80439.83422289646, Loss: 2.252779483795166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17358 , TFLOPS: 95.32133386403059, Tokens per sec: 77889.51568972829, Loss: 2.2929444313049316 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17359 , TFLOPS: 97.85032831735629, Tokens per sec: 79956.02215964958, Loss: 2.2681376934051514 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17360 , TFLOPS: 96.47636629818115, Tokens per sec: 78833.3224248527, Loss: 2.2582461833953857 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17361 , TFLOPS: 96.73325784786654, Tokens per sec: 79043.23512307742, Loss: 2.2533318996429443 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17362 , TFLOPS: 97.14139685584728, Tokens per sec: 79376.73601293123, Loss: 2.259357213973999 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17363 , TFLOPS: 96.65508269074422, Tokens per sec: 78979.3561897852, Loss: 2.2495229244232178 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17364 , TFLOPS: 97.20867231759598, Tokens per sec: 79431.70852455056, Loss: 2.2476308345794678 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17365 , TFLOPS: 97.2068424151893, Tokens per sec: 79430.21326418818, Loss: 2.2494843006134033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17366 , TFLOPS: 97.79380561222136, Tokens per sec: 79909.83600225997, Loss: 2.2527098655700684 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17367 , TFLOPS: 96.56785186687033, Tokens per sec: 78908.07763807666, Loss: 2.2534093856811523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17368 , TFLOPS: 96.72747866109597, Tokens per sec: 79038.51280079769, Loss: 2.2353696823120117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17369 , TFLOPS: 97.30850131429409, Tokens per sec: 79513.28136757956, Loss: 2.235450029373169 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17370 , TFLOPS: 96.79200928482403, Tokens per sec: 79091.24243461194, Loss: 2.266469955444336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17371 , TFLOPS: 96.08236638681028, Tokens per sec: 78511.37495481154, Loss: 2.259791851043701 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17372 , TFLOPS: 97.72691658724503, Tokens per sec: 79855.17925807512, Loss: 2.2424182891845703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17373 , TFLOPS: 97.19299808131709, Tokens per sec: 79418.90070259632, Loss: 2.254594564437866 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17374 , TFLOPS: 97.81351423309566, Tokens per sec: 79925.94042372139, Loss: 2.2456204891204834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17375 , TFLOPS: 96.71454767826263, Tokens per sec: 79027.94656185145, Loss: 2.2563018798828125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17376 , TFLOPS: 97.90498935262889, Tokens per sec: 80000.68709867106, Loss: 2.2389473915100098 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17377 , TFLOPS: 97.11609804313969, Tokens per sec: 79356.06370181848, Loss: 2.2416441440582275 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17378 , TFLOPS: 97.98993679938323, Tokens per sec: 80070.09984415585, Loss: 2.2602293491363525 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17379 , TFLOPS: 95.38429286166613, Tokens per sec: 77940.96110740506, Loss: 2.264967679977417 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17380 , TFLOPS: 97.19441581389691, Tokens per sec: 79420.05916838298, Loss: 2.271247625350952 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17381 , TFLOPS: 97.38063231454608, Tokens per sec: 79572.2214647026, Loss: 2.253957509994507 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17382 , TFLOPS: 97.75183112954386, Tokens per sec: 79875.53757194486, Loss: 2.2601301670074463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17383 , TFLOPS: 96.61383170551092, Tokens per sec: 78945.64894785624, Loss: 2.2662267684936523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17384 , TFLOPS: 97.40076949631525, Tokens per sec: 79588.67607430335, Loss: 2.2503504753112793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17385 , TFLOPS: 96.63192606796734, Tokens per sec: 78960.43431720971, Loss: 2.2551960945129395 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17386 , TFLOPS: 97.16022290323416, Tokens per sec: 79392.11926087663, Loss: 2.25663423538208 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17387 , TFLOPS: 96.74183886701664, Tokens per sec: 79050.24689472005, Loss: 2.2711222171783447 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17388 , TFLOPS: 97.27241118049567, Tokens per sec: 79483.79119021018, Loss: 2.2261416912078857 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17389 , TFLOPS: 98.37951847514435, Tokens per sec: 80388.43706014477, Loss: 2.2590246200561523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17390 , TFLOPS: 94.87354304310965, Tokens per sec: 77523.61428279257, Loss: 2.2532591819763184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17391 , TFLOPS: 98.32711690872046, Tokens per sec: 80345.61839128348, Loss: 2.254181146621704 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17392 , TFLOPS: 96.56474063257777, Tokens per sec: 78905.53536844593, Loss: 2.2667183876037598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17393 , TFLOPS: 96.02068808112438, Tokens per sec: 78460.97602349475, Loss: 2.2367160320281982 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17394 , TFLOPS: 97.86116934613314, Tokens per sec: 79964.88064333606, Loss: 2.2707722187042236 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17395 , TFLOPS: 97.89498985437487, Tokens per sec: 79992.5162512377, Loss: 2.271411895751953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17396 , TFLOPS: 95.44705733319816, Tokens per sec: 77992.24757279511, Loss: 2.2507872581481934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17397 , TFLOPS: 97.29307333074205, Tokens per sec: 79500.67476506755, Loss: 2.262317657470703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17398 , TFLOPS: 96.5038036051605, Tokens per sec: 78855.74215468463, Loss: 2.2624378204345703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17399 , TFLOPS: 96.09087698785, Tokens per sec: 78518.32918600345, Loss: 2.2307701110839844 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17400 , TFLOPS: 96.60936323810182, Tokens per sec: 78941.99764810769, Loss: 2.2594048976898193 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017400 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017400/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017400/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017400/optimizer.pt +[rank0]:[2024-08-30 12:37:20,974] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.00769919398589991, 'preprocessing_with_comm': 0.0018888760241679847, 'state_converting': 2.642803834984079, : 2.6540184920013417}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017400/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017400/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017400/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017400/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017400/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017400, took 14.85s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015400 + eval ppl=7.528168201446533, eval loss=2.0186517238616943 +------------------------------------------------------------------ +iteration: 17401 , TFLOPS: 96.59867906130512, Tokens per sec: 78933.26733221192, Loss: 2.2450144290924072 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17402 , TFLOPS: 96.68980364896933, Tokens per sec: 79007.72757854791, Loss: 2.2475218772888184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17403 , TFLOPS: 97.25391983171396, Tokens per sec: 79468.68143311098, Loss: 2.2472617626190186 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17404 , TFLOPS: 96.42097230462899, Tokens per sec: 78788.05856675298, Loss: 2.230776309967041 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17405 , TFLOPS: 97.83020525537609, Tokens per sec: 79939.57908769186, Loss: 2.2604260444641113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17406 , TFLOPS: 96.68463291591354, Tokens per sec: 79003.50243946153, Loss: 2.255836009979248 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17407 , TFLOPS: 97.33579656749464, Tokens per sec: 79535.58502161206, Loss: 2.2511801719665527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17408 , TFLOPS: 98.23725589564293, Tokens per sec: 80272.19064426958, Loss: 2.2556371688842773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17409 , TFLOPS: 96.02424964111718, Tokens per sec: 78463.88626584758, Loss: 2.2608466148376465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17410 , TFLOPS: 97.07204542098556, Tokens per sec: 79320.06717024097, Loss: 2.2448933124542236 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17411 , TFLOPS: 97.8421012134784, Tokens per sec: 79949.29958128647, Loss: 2.247103452682495 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17412 , TFLOPS: 96.64041922490838, Tokens per sec: 78967.37429437955, Loss: 2.2562599182128906 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17413 , TFLOPS: 96.97638349072481, Tokens per sec: 79241.89934446775, Loss: 2.2642624378204346 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17414 , TFLOPS: 96.96238707639584, Tokens per sec: 79230.46251401979, Loss: 2.2580227851867676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17415 , TFLOPS: 97.23701801138246, Tokens per sec: 79454.87052062656, Loss: 2.263101100921631 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17416 , TFLOPS: 97.6089487484036, Tokens per sec: 79758.78470019545, Loss: 2.2343358993530273 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17417 , TFLOPS: 96.65170700107062, Tokens per sec: 78976.59782685492, Loss: 2.2372634410858154 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17418 , TFLOPS: 97.22410828923861, Tokens per sec: 79444.321654337, Loss: 2.2616686820983887 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17419 , TFLOPS: 96.5081291800524, Tokens per sec: 78859.27669327903, Loss: 2.264003276824951 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17420 , TFLOPS: 98.43606104883264, Tokens per sec: 80434.63945264081, Loss: 2.2770068645477295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17421 , TFLOPS: 95.4433488988834, Tokens per sec: 77989.21731565293, Loss: 2.259552240371704 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17422 , TFLOPS: 97.25009911146516, Tokens per sec: 79465.5594242416, Loss: 2.2766029834747314 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17423 , TFLOPS: 96.00598340170879, Tokens per sec: 78448.96045141215, Loss: 2.2678849697113037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17424 , TFLOPS: 96.85453904714937, Tokens per sec: 79142.33711306725, Loss: 2.2477025985717773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17425 , TFLOPS: 96.73403994865555, Tokens per sec: 79043.8741977652, Loss: 2.2659952640533447 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17426 , TFLOPS: 97.22659416026544, Tokens per sec: 79446.35292354503, Loss: 2.272580146789551 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17427 , TFLOPS: 96.57648467446154, Tokens per sec: 78915.13172738769, Loss: 2.2495429515838623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17428 , TFLOPS: 97.7568486273907, Tokens per sec: 79879.6374985974, Loss: 2.249083995819092 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17429 , TFLOPS: 96.64414298188356, Tokens per sec: 78970.417072062, Loss: 2.2477831840515137 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17430 , TFLOPS: 97.17612446195207, Tokens per sec: 79405.11283384757, Loss: 2.261728048324585 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17431 , TFLOPS: 97.23612266276767, Tokens per sec: 79454.13890822508, Loss: 2.2643377780914307 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17432 , TFLOPS: 96.81849051836254, Tokens per sec: 79112.88093222384, Loss: 2.237614154815674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17433 , TFLOPS: 98.40192257337105, Tokens per sec: 80406.7440255386, Loss: 2.254239797592163 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17434 , TFLOPS: 95.65889099495057, Tokens per sec: 78165.3423108966, Loss: 2.2576937675476074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17435 , TFLOPS: 97.8001899313652, Tokens per sec: 79915.05279377938, Loss: 2.2771811485290527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17436 , TFLOPS: 97.19213791301217, Tokens per sec: 79418.19783693168, Loss: 2.265091896057129 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17437 , TFLOPS: 97.823147496128, Tokens per sec: 79933.81201092737, Loss: 2.258511781692505 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17438 , TFLOPS: 97.26388045435662, Tokens per sec: 79476.82051428154, Loss: 2.2573118209838867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17439 , TFLOPS: 96.01806621429775, Tokens per sec: 78458.83362861785, Loss: 2.263991594314575 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17440 , TFLOPS: 97.80053911962901, Tokens per sec: 79915.33812449865, Loss: 2.2527685165405273 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17441 , TFLOPS: 96.74409857708912, Tokens per sec: 79052.0933619904, Loss: 2.2608585357666016 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17442 , TFLOPS: 97.23506463246693, Tokens per sec: 79453.27436443022, Loss: 2.248786687850952 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17443 , TFLOPS: 97.79906366045039, Tokens per sec: 79914.1324888221, Loss: 2.255692958831787 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17444 , TFLOPS: 97.1911509607232, Tokens per sec: 79417.39137280996, Loss: 2.2799932956695557 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17445 , TFLOPS: 97.8132796450101, Tokens per sec: 79925.74873575784, Loss: 2.2391867637634277 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17446 , TFLOPS: 97.1689520080581, Tokens per sec: 79399.2520371354, Loss: 2.2537572383880615 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17447 , TFLOPS: 97.23277153042918, Tokens per sec: 79451.40061172548, Loss: 2.253079414367676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17448 , TFLOPS: 97.86003051095031, Tokens per sec: 79963.95007179194, Loss: 2.2575159072875977 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17449 , TFLOPS: 97.28974405697028, Tokens per sec: 79497.95432976552, Loss: 2.252255916595459 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17450 , TFLOPS: 95.57325652353273, Tokens per sec: 78095.36817987367, Loss: 2.261992931365967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17451 , TFLOPS: 97.81292390068681, Tokens per sec: 79925.4580479136, Loss: 2.242295265197754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17452 , TFLOPS: 97.79396347270144, Tokens per sec: 79909.96499412192, Loss: 2.242708921432495 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17453 , TFLOPS: 96.50118576754915, Tokens per sec: 78853.60305218327, Loss: 2.2581334114074707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17454 , TFLOPS: 97.76266295622673, Tokens per sec: 79884.38853636346, Loss: 2.2547171115875244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17455 , TFLOPS: 96.161990908179, Tokens per sec: 78576.43820093946, Loss: 2.2844784259796143 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17456 , TFLOPS: 97.76432151221022, Tokens per sec: 79885.74378515259, Loss: 2.2606894969940186 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17457 , TFLOPS: 96.05891899638058, Tokens per sec: 78492.21548850191, Loss: 2.2554409503936768 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17458 , TFLOPS: 98.39621315255732, Tokens per sec: 80402.07871081812, Loss: 2.2648143768310547 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17459 , TFLOPS: 96.05328244701653, Tokens per sec: 78487.60971891884, Loss: 2.2761898040771484 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17460 , TFLOPS: 96.57473873235384, Tokens per sec: 78913.70507314694, Loss: 2.2542691230773926 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17461 , TFLOPS: 96.67516421687593, Tokens per sec: 78995.76532173189, Loss: 2.2689316272735596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17462 , TFLOPS: 96.2218381317778, Tokens per sec: 78625.34090794662, Loss: 2.260399341583252 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17463 , TFLOPS: 97.42131494077182, Tokens per sec: 79605.46428585562, Loss: 2.286135196685791 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17464 , TFLOPS: 96.8511110584042, Tokens per sec: 79139.53601521927, Loss: 2.247572422027588 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17465 , TFLOPS: 97.0836166361542, Tokens per sec: 79329.52230802484, Loss: 2.245504856109619 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17466 , TFLOPS: 97.36298444752401, Tokens per sec: 79557.80093826236, Loss: 2.2360987663269043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17467 , TFLOPS: 97.05963803788977, Tokens per sec: 79309.92877811895, Loss: 2.2687175273895264 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17468 , TFLOPS: 97.19410814777675, Tokens per sec: 79419.80776647617, Loss: 2.2759580612182617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17469 , TFLOPS: 97.80153326862066, Tokens per sec: 79916.1504692314, Loss: 2.255711793899536 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17470 , TFLOPS: 96.73674794497397, Tokens per sec: 79046.086971267, Loss: 2.2640581130981445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17471 , TFLOPS: 98.34978042038479, Tokens per sec: 80364.13733007509, Loss: 2.275416374206543 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17472 , TFLOPS: 94.94213122086062, Tokens per sec: 77579.65944844965, Loss: 2.240079641342163 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17473 , TFLOPS: 97.8314414709679, Tokens per sec: 79940.58923127495, Loss: 2.265676498413086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17474 , TFLOPS: 96.57577462819525, Tokens per sec: 78914.55153030528, Loss: 2.2545816898345947 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17475 , TFLOPS: 97.7406135637013, Tokens per sec: 79866.37141012908, Loss: 2.249551296234131 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17476 , TFLOPS: 97.33833979857326, Tokens per sec: 79537.66316119499, Loss: 2.244791269302368 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17477 , TFLOPS: 96.80700594053928, Tokens per sec: 79103.49658804512, Loss: 2.2485578060150146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17478 , TFLOPS: 97.85156477932347, Tokens per sec: 79957.03250455234, Loss: 2.251349687576294 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17479 , TFLOPS: 96.83561803163347, Tokens per sec: 79126.87626421892, Loss: 2.2671191692352295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17480 , TFLOPS: 97.2498536469502, Tokens per sec: 79465.3588488676, Loss: 2.251333236694336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17481 , TFLOPS: 97.11200541964443, Tokens per sec: 79352.71951380701, Loss: 2.2567310333251953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17482 , TFLOPS: 97.16244105257208, Tokens per sec: 79393.93176780088, Loss: 2.2608885765075684 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17483 , TFLOPS: 97.95406940365802, Tokens per sec: 80040.79167179987, Loss: 2.286853313446045 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17484 , TFLOPS: 97.86511182939722, Tokens per sec: 79968.10214789968, Loss: 2.24665904045105 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17485 , TFLOPS: 96.55051469699448, Tokens per sec: 78893.91099027266, Loss: 2.2364249229431152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17486 , TFLOPS: 97.82771004721015, Tokens per sec: 79937.5401888671, Loss: 2.267972469329834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17487 , TFLOPS: 97.33749510660402, Tokens per sec: 79536.97294164263, Loss: 2.227107524871826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17488 , TFLOPS: 95.700565369023, Tokens per sec: 78199.39551474499, Loss: 2.248239755630493 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17489 , TFLOPS: 97.18411200449997, Tokens per sec: 79411.63966048109, Loss: 2.2638275623321533 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17490 , TFLOPS: 97.97539655883156, Tokens per sec: 80058.21863930198, Loss: 2.259622097015381 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17491 , TFLOPS: 96.57664332897932, Tokens per sec: 78915.2613680783, Loss: 2.2619881629943848 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17492 , TFLOPS: 97.08702774754893, Tokens per sec: 79332.30961495529, Loss: 2.270921468734741 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17493 , TFLOPS: 96.05411804701629, Tokens per sec: 78488.2925091889, Loss: 2.2507705688476562 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17494 , TFLOPS: 97.76065200925365, Tokens per sec: 79882.74533982539, Loss: 2.230980634689331 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17495 , TFLOPS: 96.19774730853476, Tokens per sec: 78605.65567612178, Loss: 2.2564656734466553 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17496 , TFLOPS: 97.84348580511269, Tokens per sec: 79950.43096675341, Loss: 2.242142915725708 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17497 , TFLOPS: 96.20886628502416, Tokens per sec: 78614.74127803918, Loss: 2.280003786087036 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17498 , TFLOPS: 96.54178898204255, Tokens per sec: 78886.78098396555, Loss: 2.2374134063720703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17499 , TFLOPS: 95.99927443031726, Tokens per sec: 78443.47837818385, Loss: 2.2608871459960938 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17500 , TFLOPS: 96.18416639591896, Tokens per sec: 78594.55836281962, Loss: 2.258758068084717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17501 , TFLOPS: 96.84103796378113, Tokens per sec: 79131.30503029824, Loss: 2.2741293907165527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17502 , TFLOPS: 96.22343322763948, Tokens per sec: 78626.64430183661, Loss: 2.2732620239257812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17503 , TFLOPS: 97.32940368055401, Tokens per sec: 79530.36122912532, Loss: 2.2580177783966064 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17504 , TFLOPS: 96.70474431378528, Tokens per sec: 79019.93598037597, Loss: 2.2693867683410645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17505 , TFLOPS: 96.69832329518542, Tokens per sec: 79014.68920078629, Loss: 2.256263494491577 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17506 , TFLOPS: 97.88616927369418, Tokens per sec: 79985.30872770157, Loss: 2.235351800918579 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17507 , TFLOPS: 96.60578501377427, Tokens per sec: 78939.07378889795, Loss: 2.248338460922241 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17508 , TFLOPS: 97.31460752667566, Tokens per sec: 79518.27091090444, Loss: 2.25933575630188 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17509 , TFLOPS: 97.88425315345873, Tokens per sec: 79983.74301653185, Loss: 2.263593912124634 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17510 , TFLOPS: 95.21500593595961, Tokens per sec: 77802.63240257712, Loss: 2.269221305847168 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17511 , TFLOPS: 97.88689688737864, Tokens per sec: 79985.90327957358, Loss: 2.2573914527893066 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17512 , TFLOPS: 96.25906360382972, Tokens per sec: 78655.75879942914, Loss: 2.2578577995300293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17513 , TFLOPS: 97.75732397134098, Tokens per sec: 79880.02591437561, Loss: 2.2490756511688232 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17514 , TFLOPS: 97.24186498815448, Tokens per sec: 79458.83111011925, Loss: 2.2662980556488037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17515 , TFLOPS: 96.7714868540785, Tokens per sec: 79074.47302815538, Loss: 2.2727785110473633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17516 , TFLOPS: 97.07839872066808, Tokens per sec: 79325.25861496002, Loss: 2.239865779876709 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17517 , TFLOPS: 96.25739614541494, Tokens per sec: 78654.39627623423, Loss: 2.2405731678009033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17518 , TFLOPS: 97.81437874343723, Tokens per sec: 79926.64683737597, Loss: 2.2815706729888916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17519 , TFLOPS: 97.86232734263405, Tokens per sec: 79965.82687208644, Loss: 2.252375841140747 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17520 , TFLOPS: 96.18851217668812, Tokens per sec: 78598.10941215644, Loss: 2.288466215133667 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17521 , TFLOPS: 97.89078941943005, Tokens per sec: 79989.08396771549, Loss: 2.2682909965515137 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17522 , TFLOPS: 97.72413280347752, Tokens per sec: 79852.90455669771, Loss: 2.2473833560943604 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17523 , TFLOPS: 96.63768785150712, Tokens per sec: 78965.14241886156, Loss: 2.2639000415802 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17524 , TFLOPS: 97.87573820876217, Tokens per sec: 79976.78523602613, Loss: 2.2419629096984863 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17525 , TFLOPS: 96.57804169063793, Tokens per sec: 78916.40400538658, Loss: 2.250526189804077 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17526 , TFLOPS: 95.51035231705674, Tokens per sec: 78043.9675334638, Loss: 2.2844579219818115 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17527 , TFLOPS: 97.80140135111967, Tokens per sec: 79916.04267604547, Loss: 2.2437832355499268 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17528 , TFLOPS: 97.38791429723548, Tokens per sec: 79578.17176021302, Loss: 2.2329282760620117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17529 , TFLOPS: 97.20202698623976, Tokens per sec: 79426.27845323333, Loss: 2.2597665786743164 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17530 , TFLOPS: 96.78578625872576, Tokens per sec: 79086.15743979209, Loss: 2.247469902038574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17531 , TFLOPS: 95.36562319106358, Tokens per sec: 77925.70563895564, Loss: 2.2352254390716553 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17532 , TFLOPS: 98.46121097839423, Tokens per sec: 80455.19010750238, Loss: 2.2613840103149414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17533 , TFLOPS: 96.62419605850472, Tokens per sec: 78954.11792748966, Loss: 2.2662718296051025 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17534 , TFLOPS: 96.57951939001151, Tokens per sec: 78917.6114715841, Loss: 2.2539024353027344 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17535 , TFLOPS: 96.4128588644812, Tokens per sec: 78781.42886594968, Loss: 2.2348837852478027 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17536 , TFLOPS: 96.64865180120772, Tokens per sec: 78974.10134439911, Loss: 2.2426509857177734 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17537 , TFLOPS: 95.9777662219996, Tokens per sec: 78425.90346749694, Loss: 2.274768829345703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17538 , TFLOPS: 96.68309472946508, Tokens per sec: 79002.24554771805, Loss: 2.2358953952789307 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17539 , TFLOPS: 95.9452859489136, Tokens per sec: 78399.36300024175, Loss: 2.2560412883758545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17540 , TFLOPS: 97.91236284360481, Tokens per sec: 80006.71216795802, Loss: 2.2583184242248535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17541 , TFLOPS: 96.18536690478156, Tokens per sec: 78595.53932951509, Loss: 2.2593584060668945 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17542 , TFLOPS: 97.75681163077093, Tokens per sec: 79879.607267707, Loss: 2.2449591159820557 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17543 , TFLOPS: 96.5924399607593, Tokens per sec: 78928.1692025472, Loss: 2.270535707473755 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17544 , TFLOPS: 97.10335482816065, Tokens per sec: 79345.65089281957, Loss: 2.253180980682373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17545 , TFLOPS: 96.67339310473966, Tokens per sec: 78994.31810041287, Loss: 2.269695520401001 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17546 , TFLOPS: 97.1659965399809, Tokens per sec: 79396.83704808902, Loss: 2.2780821323394775 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17547 , TFLOPS: 96.79048333447008, Tokens per sec: 79089.99554129626, Loss: 2.2513203620910645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17548 , TFLOPS: 95.07763813809682, Tokens per sec: 77690.38563878172, Loss: 2.231900691986084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17549 , TFLOPS: 97.25808030401835, Tokens per sec: 79472.08106213098, Loss: 2.23508358001709 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17550 , TFLOPS: 96.69951024502944, Tokens per sec: 79015.65908805864, Loss: 2.2430191040039062 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17551 , TFLOPS: 98.38586614979121, Tokens per sec: 80393.6239085024, Loss: 2.262850761413574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17552 , TFLOPS: 96.76744992201799, Tokens per sec: 79071.17434704886, Loss: 2.2547173500061035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17553 , TFLOPS: 97.31930262415032, Tokens per sec: 79522.10739591354, Loss: 2.2380881309509277 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17554 , TFLOPS: 97.69378040010874, Tokens per sec: 79828.1028265647, Loss: 2.253323554992676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17555 , TFLOPS: 96.648188899829, Tokens per sec: 78973.72309576641, Loss: 2.2285385131835938 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17556 , TFLOPS: 98.45507879331157, Tokens per sec: 80450.17934121483, Loss: 2.2623677253723145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17557 , TFLOPS: 97.07959754938916, Tokens per sec: 79326.23820876863, Loss: 2.248253107070923 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17558 , TFLOPS: 95.41974798743422, Tokens per sec: 77969.9324033663, Loss: 2.261167049407959 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17559 , TFLOPS: 97.84088721717306, Tokens per sec: 79948.30759365433, Loss: 2.244722604751587 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17560 , TFLOPS: 97.76955647012386, Tokens per sec: 79890.02140402363, Loss: 2.2655441761016846 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17561 , TFLOPS: 97.28878738449802, Tokens per sec: 79497.1726080615, Loss: 2.2507543563842773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17562 , TFLOPS: 97.34076077032766, Tokens per sec: 79539.64139953688, Loss: 2.2407939434051514 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17563 , TFLOPS: 97.33975668188684, Tokens per sec: 79538.82093302436, Loss: 2.2510972023010254 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17564 , TFLOPS: 94.22518730533773, Tokens per sec: 76993.82611929764, Loss: 2.267732620239258 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17565 , TFLOPS: 97.85046672948856, Tokens per sec: 79956.1352597659, Loss: 2.247472047805786 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17566 , TFLOPS: 97.91778623045747, Tokens per sec: 80011.14375696573, Loss: 2.26387882232666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17567 , TFLOPS: 96.74529467765264, Tokens per sec: 79053.0707265512, Loss: 2.2751286029815674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17568 , TFLOPS: 96.52136416572782, Tokens per sec: 78870.09134077324, Loss: 2.2712485790252686 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17569 , TFLOPS: 96.69705505593502, Tokens per sec: 79013.65288984742, Loss: 2.2681996822357178 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17570 , TFLOPS: 98.394012068389, Tokens per sec: 80400.28014828324, Loss: 2.2301506996154785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17571 , TFLOPS: 97.1640363470705, Tokens per sec: 79395.23532400209, Loss: 2.2390780448913574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17572 , TFLOPS: 96.58556665278508, Tokens per sec: 78922.55284566677, Loss: 2.2596094608306885 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17573 , TFLOPS: 96.85308339346741, Tokens per sec: 79141.14766097185, Loss: 2.247783660888672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17574 , TFLOPS: 97.13865540878577, Tokens per sec: 79374.49590596615, Loss: 2.2669711112976074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17575 , TFLOPS: 95.92516609553601, Tokens per sec: 78382.92255011584, Loss: 2.265401840209961 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17576 , TFLOPS: 96.8677757231125, Tokens per sec: 79153.15314173907, Loss: 2.2486772537231445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17577 , TFLOPS: 96.00190536773684, Tokens per sec: 78445.62818487574, Loss: 2.2700862884521484 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17578 , TFLOPS: 95.00414131397642, Tokens per sec: 77630.32949181637, Loss: 2.246406078338623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17579 , TFLOPS: 96.67265252433326, Tokens per sec: 78993.71295309855, Loss: 2.2438955307006836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17580 , TFLOPS: 97.25551667051778, Tokens per sec: 79469.98625120387, Loss: 2.2551655769348145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17581 , TFLOPS: 95.78297385353599, Tokens per sec: 78266.73360882368, Loss: 2.2414450645446777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17582 , TFLOPS: 96.90026415407111, Tokens per sec: 79179.70027500196, Loss: 2.2519454956054688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17583 , TFLOPS: 96.5627485887678, Tokens per sec: 78903.90761816903, Loss: 2.2286124229431152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17584 , TFLOPS: 97.22242140498435, Tokens per sec: 79442.94325778857, Loss: 2.228055953979492 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17585 , TFLOPS: 96.70029318617708, Tokens per sec: 79016.29884942509, Loss: 2.246028423309326 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17586 , TFLOPS: 95.75551090708372, Tokens per sec: 78244.29292831839, Loss: 2.240813732147217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17587 , TFLOPS: 96.61689719226331, Tokens per sec: 78948.15383599426, Loss: 2.260448932647705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17588 , TFLOPS: 96.04995468338633, Tokens per sec: 78484.89051759236, Loss: 2.2555899620056152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17589 , TFLOPS: 98.37229102612022, Tokens per sec: 80382.53132549606, Loss: 2.2592804431915283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17590 , TFLOPS: 96.10825349266251, Tokens per sec: 78532.52797539679, Loss: 2.264347553253174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17591 , TFLOPS: 97.33622174690468, Tokens per sec: 79535.9324466531, Loss: 2.2481985092163086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17592 , TFLOPS: 97.73518546476286, Tokens per sec: 79861.93597075478, Loss: 2.246455192565918 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17593 , TFLOPS: 96.67510231253004, Tokens per sec: 78995.71473809732, Loss: 2.233142852783203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17594 , TFLOPS: 98.36952012219011, Tokens per sec: 80380.26714856536, Loss: 2.2431812286376953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17595 , TFLOPS: 97.88328091638941, Tokens per sec: 79982.9485765949, Loss: 2.2682676315307617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17596 , TFLOPS: 95.48216508465642, Tokens per sec: 78020.93502026558, Loss: 2.2686781883239746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17597 , TFLOPS: 97.91699338642483, Tokens per sec: 80010.49590369704, Loss: 2.28121018409729 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17598 , TFLOPS: 97.85906195205517, Tokens per sec: 79963.1586373858, Loss: 2.239611864089966 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17599 , TFLOPS: 96.56812332394043, Tokens per sec: 78908.29945263657, Loss: 2.2455904483795166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17600 , TFLOPS: 96.02124122708632, Tokens per sec: 78461.42801329949, Loss: 2.26435923576355 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017600 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017600/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017600/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017600/optimizer.pt +[rank0]:[2024-08-30 13:32:53,116] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007727959018666297, 'preprocessing_with_comm': 0.0014901010144967586, 'state_converting': 2.6811186140112113, : 2.6919993790215813}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017600/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017600/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017600/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017600/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017600/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017600, took 14.92s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015600 + eval ppl=7.625924110412598, eval loss=2.0315535068511963 +------------------------------------------------------------------ +iteration: 17601 , TFLOPS: 96.36643076043275, Tokens per sec: 78743.49126696597, Loss: 2.267338275909424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17602 , TFLOPS: 95.40919646252014, Tokens per sec: 77961.3104807386, Loss: 2.272554397583008 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17603 , TFLOPS: 96.24270994436507, Tokens per sec: 78642.39580329994, Loss: 2.2316133975982666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17604 , TFLOPS: 97.07994750971945, Tokens per sec: 79326.52417036328, Loss: 2.2520391941070557 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17605 , TFLOPS: 97.3712244285079, Tokens per sec: 79564.53403884028, Loss: 2.2166953086853027 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17606 , TFLOPS: 97.01991514360469, Tokens per sec: 79277.47017864035, Loss: 2.2593319416046143 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17607 , TFLOPS: 97.80870632093418, Tokens per sec: 79922.01175492757, Loss: 2.257601737976074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17608 , TFLOPS: 97.11206178211376, Tokens per sec: 79352.76556903159, Loss: 2.2534353733062744 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17609 , TFLOPS: 97.0957194784026, Tokens per sec: 79339.41185198088, Loss: 2.268630027770996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17610 , TFLOPS: 96.64625028779595, Tokens per sec: 78972.13900596989, Loss: 2.258674383163452 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17611 , TFLOPS: 97.91861009153932, Tokens per sec: 80011.81695506378, Loss: 2.253326177597046 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17612 , TFLOPS: 96.04472458558442, Tokens per sec: 78480.61687004373, Loss: 2.2613155841827393 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17613 , TFLOPS: 98.29747558532364, Tokens per sec: 80321.39770290039, Loss: 2.2434773445129395 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17614 , TFLOPS: 96.4223842860882, Tokens per sec: 78789.21233315094, Loss: 2.238778591156006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17615 , TFLOPS: 97.70865856065578, Tokens per sec: 79840.26015454561, Loss: 2.2285449504852295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17616 , TFLOPS: 97.13564833152645, Tokens per sec: 79372.03874572851, Loss: 2.233485460281372 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17617 , TFLOPS: 97.18820242514457, Tokens per sec: 79414.98204848659, Loss: 2.2631521224975586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17618 , TFLOPS: 97.01395015338616, Tokens per sec: 79272.59603158027, Loss: 2.234370708465576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17619 , TFLOPS: 97.19029379626895, Tokens per sec: 79416.690961669, Loss: 2.262160301208496 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17620 , TFLOPS: 97.26615257438822, Tokens per sec: 79478.67712204893, Loss: 2.2674920558929443 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17621 , TFLOPS: 98.4196786785671, Tokens per sec: 80421.25299617709, Loss: 2.2378451824188232 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17622 , TFLOPS: 96.70925050678672, Tokens per sec: 79023.61810667717, Loss: 2.2708821296691895 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17623 , TFLOPS: 97.24898850195683, Tokens per sec: 79464.65191662271, Loss: 2.240542411804199 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17624 , TFLOPS: 97.03445946389708, Tokens per sec: 79289.35471715535, Loss: 2.2490780353546143 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17625 , TFLOPS: 97.1949750109742, Tokens per sec: 79420.51610270985, Loss: 2.2445943355560303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17626 , TFLOPS: 97.15156618877705, Tokens per sec: 79385.04563665006, Loss: 2.254615306854248 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17627 , TFLOPS: 97.26687485925719, Tokens per sec: 79479.26731960863, Loss: 2.242664337158203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17628 , TFLOPS: 96.58156135040075, Tokens per sec: 78919.28000998287, Loss: 2.2647950649261475 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17629 , TFLOPS: 96.58540917746276, Tokens per sec: 78922.42416852714, Loss: 2.269012928009033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17630 , TFLOPS: 97.71541954228941, Tokens per sec: 79845.78472668161, Loss: 2.2602968215942383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17631 , TFLOPS: 96.06058660839135, Tokens per sec: 78493.57813720405, Loss: 2.2544705867767334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17632 , TFLOPS: 97.74254895966443, Tokens per sec: 79867.95287199208, Loss: 2.2433977127075195 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17633 , TFLOPS: 94.29355925317937, Tokens per sec: 77049.69459792957, Loss: 2.2343297004699707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17634 , TFLOPS: 98.39403577111725, Tokens per sec: 80400.29951639267, Loss: 2.242011785507202 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17635 , TFLOPS: 95.58707675937264, Tokens per sec: 78106.66105034297, Loss: 2.2615232467651367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17636 , TFLOPS: 96.5892149121819, Tokens per sec: 78925.53393233447, Loss: 2.268174171447754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17637 , TFLOPS: 95.72678498106436, Tokens per sec: 78220.82023469685, Loss: 2.2419002056121826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17638 , TFLOPS: 96.17516430757611, Tokens per sec: 78587.2025247003, Loss: 2.241391181945801 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17639 , TFLOPS: 95.98717660685206, Tokens per sec: 78433.59293520475, Loss: 2.245654821395874 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17640 , TFLOPS: 96.30259991850535, Tokens per sec: 78691.33344287504, Loss: 2.2541892528533936 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17641 , TFLOPS: 95.44835356020266, Tokens per sec: 77993.30675324834, Loss: 2.2369885444641113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17642 , TFLOPS: 97.93157151755389, Tokens per sec: 80022.40806991706, Loss: 2.2675135135650635 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17643 , TFLOPS: 97.39013390587195, Tokens per sec: 79579.98545956769, Loss: 2.267780065536499 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17644 , TFLOPS: 97.0292937394053, Tokens per sec: 79285.13367069555, Loss: 2.2293596267700195 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17645 , TFLOPS: 97.8248427865869, Tokens per sec: 79935.19727640199, Loss: 2.2516396045684814 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17646 , TFLOPS: 97.2060539453051, Tokens per sec: 79429.56898514877, Loss: 2.255795478820801 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17647 , TFLOPS: 97.124938203509, Tokens per sec: 79363.28722442222, Loss: 2.2603883743286133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17648 , TFLOPS: 96.64152999030026, Tokens per sec: 78968.28192937489, Loss: 2.248082399368286 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17649 , TFLOPS: 97.38148413414528, Tokens per sec: 79572.91750842489, Loss: 2.238405704498291 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17650 , TFLOPS: 96.09236242920608, Tokens per sec: 78519.54297837417, Loss: 2.255068778991699 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17651 , TFLOPS: 97.70683112524883, Tokens per sec: 79838.76691003222, Loss: 2.229846954345703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17652 , TFLOPS: 96.81715681066785, Tokens per sec: 79111.79112533381, Loss: 2.2481272220611572 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17653 , TFLOPS: 97.8394369395304, Tokens per sec: 79947.12253445905, Loss: 2.2567367553710938 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17654 , TFLOPS: 97.22015642514404, Tokens per sec: 79441.0924844553, Loss: 2.258218765258789 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17655 , TFLOPS: 97.3439352427361, Tokens per sec: 79542.23534266, Loss: 2.2401998043060303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17656 , TFLOPS: 97.82149677717655, Tokens per sec: 79932.46316597884, Loss: 2.2515764236450195 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17657 , TFLOPS: 97.25513975272939, Tokens per sec: 79469.67826197624, Loss: 2.2576582431793213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17658 , TFLOPS: 97.2503651109698, Tokens per sec: 79465.77677928426, Loss: 2.234128952026367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17659 , TFLOPS: 97.68699285032953, Tokens per sec: 79822.55654491305, Loss: 2.2843832969665527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17660 , TFLOPS: 96.70927970136889, Tokens per sec: 79023.64196232178, Loss: 2.2442450523376465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17661 , TFLOPS: 97.27181318885437, Tokens per sec: 79483.3025558464, Loss: 2.2611122131347656 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17662 , TFLOPS: 97.83311255492723, Tokens per sec: 79941.95471699565, Loss: 2.2590432167053223 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17663 , TFLOPS: 97.34879493257954, Tokens per sec: 79546.20632033062, Loss: 2.244924306869507 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17664 , TFLOPS: 96.46468747311688, Tokens per sec: 78823.77935625298, Loss: 2.247987747192383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17665 , TFLOPS: 97.23708912315975, Tokens per sec: 79454.92862789036, Loss: 2.244361162185669 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17666 , TFLOPS: 96.56870747903255, Tokens per sec: 78908.77678079983, Loss: 2.244051456451416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17667 , TFLOPS: 97.09813526639216, Tokens per sec: 79341.38585453507, Loss: 2.2662787437438965 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17668 , TFLOPS: 96.97625032253896, Tokens per sec: 79241.79052931503, Loss: 2.260866165161133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17669 , TFLOPS: 96.1829641137087, Tokens per sec: 78593.57594707611, Loss: 2.246105670928955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17670 , TFLOPS: 97.76713024346435, Tokens per sec: 79888.03887176352, Loss: 2.2446839809417725 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17671 , TFLOPS: 94.35699449539581, Tokens per sec: 77101.52916731303, Loss: 2.2330002784729004 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17672 , TFLOPS: 98.43554995145703, Tokens per sec: 80434.22182181838, Loss: 2.2561850547790527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17673 , TFLOPS: 95.54046129546292, Tokens per sec: 78068.57035479356, Loss: 2.256948232650757 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17674 , TFLOPS: 96.59885468899058, Tokens per sec: 78933.41084211478, Loss: 2.2683165073394775 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17675 , TFLOPS: 95.66328797326457, Tokens per sec: 78168.93519506525, Loss: 2.2707109451293945 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17676 , TFLOPS: 95.60928874800284, Tokens per sec: 78124.81103804016, Loss: 2.266049861907959 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17677 , TFLOPS: 96.74753861110416, Tokens per sec: 79054.90430233843, Loss: 2.2863929271698 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17678 , TFLOPS: 95.8481105725112, Tokens per sec: 78319.95849866663, Loss: 2.2466940879821777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17679 , TFLOPS: 96.24708569854796, Tokens per sec: 78645.97134468472, Loss: 2.258434295654297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17680 , TFLOPS: 97.11260990004801, Tokens per sec: 79353.21345030548, Loss: 2.239628314971924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17681 , TFLOPS: 97.31687978211012, Tokens per sec: 79520.127629313, Loss: 2.258789539337158 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17682 , TFLOPS: 97.07816349559027, Tokens per sec: 79325.06640649372, Loss: 2.2400426864624023 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17683 , TFLOPS: 97.89443566273448, Tokens per sec: 79992.06340698218, Loss: 2.243539333343506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17684 , TFLOPS: 97.11102676040761, Tokens per sec: 79351.91982615167, Loss: 2.2527573108673096 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17685 , TFLOPS: 96.42175537429841, Tokens per sec: 78788.69843313786, Loss: 2.2420976161956787 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17686 , TFLOPS: 97.29417778425262, Tokens per sec: 79501.57724246223, Loss: 2.2543444633483887 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17687 , TFLOPS: 97.91375478053678, Tokens per sec: 80007.84955545678, Loss: 2.2530252933502197 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17688 , TFLOPS: 96.06387625614133, Tokens per sec: 78496.26619306364, Loss: 2.253706216812134 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17689 , TFLOPS: 97.82985639771007, Tokens per sec: 79939.29402711257, Loss: 2.2485733032226562 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17690 , TFLOPS: 96.26194174155476, Tokens per sec: 78658.11059985413, Loss: 2.26122784614563 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17691 , TFLOPS: 98.42225996410305, Tokens per sec: 80423.36223103674, Loss: 2.264019012451172 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17692 , TFLOPS: 97.73989920173088, Tokens per sec: 79865.78768657373, Loss: 2.2475907802581787 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17693 , TFLOPS: 96.11078763389169, Tokens per sec: 78534.59868742975, Loss: 2.241548538208008 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17694 , TFLOPS: 97.59515816267363, Tokens per sec: 79747.51605759423, Loss: 2.2501635551452637 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17695 , TFLOPS: 96.53719388169986, Tokens per sec: 78883.02620918714, Loss: 2.2596895694732666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17696 , TFLOPS: 97.16289037789961, Tokens per sec: 79394.29892309268, Loss: 2.2691688537597656 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17697 , TFLOPS: 98.42466604202279, Tokens per sec: 80425.32829924292, Loss: 2.259166717529297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17698 , TFLOPS: 96.05838817377689, Tokens per sec: 78491.78173968784, Loss: 2.2374556064605713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17699 , TFLOPS: 97.77857725431953, Tokens per sec: 79897.39251900557, Loss: 2.258970022201538 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17700 , TFLOPS: 97.88169645402661, Tokens per sec: 79981.65387161057, Loss: 2.2549514770507812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17701 , TFLOPS: 95.58844795421236, Tokens per sec: 78107.78148894421, Loss: 2.238823413848877 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17702 , TFLOPS: 97.87626321029889, Tokens per sec: 79977.21422829652, Loss: 2.253713369369507 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17703 , TFLOPS: 96.09176215257575, Tokens per sec: 78519.05247688704, Loss: 2.2680532932281494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17704 , TFLOPS: 96.5398473803117, Tokens per sec: 78885.19445121, Loss: 2.249983787536621 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17705 , TFLOPS: 97.87286149818814, Tokens per sec: 79974.43460176296, Loss: 2.2585840225219727 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17706 , TFLOPS: 95.45584794177573, Tokens per sec: 77999.43060535424, Loss: 2.263172149658203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17707 , TFLOPS: 97.19604045919361, Tokens per sec: 79421.38670787701, Loss: 2.221750020980835 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17708 , TFLOPS: 96.74938218842306, Tokens per sec: 79056.41073682361, Loss: 2.238487958908081 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17709 , TFLOPS: 94.57644698370237, Tokens per sec: 77280.84944471793, Loss: 2.2692296504974365 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17710 , TFLOPS: 98.39784823585988, Tokens per sec: 80403.41477947545, Loss: 2.2627646923065186 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17711 , TFLOPS: 96.11468726041733, Tokens per sec: 78537.78517264916, Loss: 2.2417335510253906 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17712 , TFLOPS: 98.42248153091819, Tokens per sec: 80423.54327898502, Loss: 2.243983268737793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17713 , TFLOPS: 94.6059572540764, Tokens per sec: 77304.9630463025, Loss: 2.2728307247161865 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17714 , TFLOPS: 97.42157163978057, Tokens per sec: 79605.67404122368, Loss: 2.2479305267333984 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17715 , TFLOPS: 96.68347801964156, Tokens per sec: 79002.55874398802, Loss: 2.2534830570220947 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17716 , TFLOPS: 96.74474641976423, Tokens per sec: 79052.62273091718, Loss: 2.2686922550201416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17717 , TFLOPS: 95.0843284754368, Tokens per sec: 77695.85248564683, Loss: 2.2625160217285156 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17718 , TFLOPS: 97.24558148471675, Tokens per sec: 79461.86795513141, Loss: 2.2532198429107666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17719 , TFLOPS: 97.34797931613865, Tokens per sec: 79545.53985914093, Loss: 2.2432258129119873 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17720 , TFLOPS: 96.5931519487252, Tokens per sec: 78928.75098624233, Loss: 2.2474424839019775 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17721 , TFLOPS: 97.81535726941777, Tokens per sec: 79927.44641614416, Loss: 2.2428791522979736 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17722 , TFLOPS: 96.65542713034512, Tokens per sec: 78979.63764024976, Loss: 2.232788562774658 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17723 , TFLOPS: 97.7923378073249, Tokens per sec: 79908.63662109441, Loss: 2.2488205432891846 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17724 , TFLOPS: 97.09994337382787, Tokens per sec: 79342.86330566561, Loss: 2.244464874267578 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17725 , TFLOPS: 97.17147972373388, Tokens per sec: 79401.31749868329, Loss: 2.2622909545898438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17726 , TFLOPS: 96.55013134545811, Tokens per sec: 78893.59774386395, Loss: 2.2317416667938232 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17727 , TFLOPS: 97.1946782258063, Tokens per sec: 79420.27359190922, Loss: 2.2401061058044434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17728 , TFLOPS: 96.61951865466118, Tokens per sec: 78950.29590040201, Loss: 2.2568793296813965 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17729 , TFLOPS: 98.46921840090468, Tokens per sec: 80461.73317856505, Loss: 2.2567830085754395 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17730 , TFLOPS: 96.74356296425482, Tokens per sec: 79051.65569895558, Loss: 2.2468626499176025 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17731 , TFLOPS: 96.65149663991994, Tokens per sec: 78976.42593534352, Loss: 2.230001926422119 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17732 , TFLOPS: 97.77381728109968, Tokens per sec: 79893.50302235501, Loss: 2.272040605545044 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17733 , TFLOPS: 96.77413076889297, Tokens per sec: 79076.63343901072, Loss: 2.2669785022735596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17734 , TFLOPS: 97.69258854949435, Tokens per sec: 79827.12893474672, Loss: 2.243278741836548 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17735 , TFLOPS: 97.45198715492796, Tokens per sec: 79630.52734161564, Loss: 2.241699695587158 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17736 , TFLOPS: 94.90529752729095, Tokens per sec: 77549.56168925026, Loss: 2.2496213912963867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17737 , TFLOPS: 98.43582097250048, Tokens per sec: 80434.44328008969, Loss: 2.2551052570343018 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17738 , TFLOPS: 97.33971974662691, Tokens per sec: 79538.79075227266, Loss: 2.276333808898926 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17739 , TFLOPS: 96.70247366385607, Tokens per sec: 79018.08057386708, Loss: 2.2675061225891113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17740 , TFLOPS: 97.34169814689659, Tokens per sec: 79540.40735406156, Loss: 2.269320011138916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17741 , TFLOPS: 95.42457213149112, Tokens per sec: 77973.87433565971, Loss: 2.2585549354553223 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17742 , TFLOPS: 96.56865403948775, Tokens per sec: 78908.73311397214, Loss: 2.237114906311035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17743 , TFLOPS: 98.37542352858927, Tokens per sec: 80385.0909739013, Loss: 2.2658534049987793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17744 , TFLOPS: 95.97522973352979, Tokens per sec: 78423.83083747324, Loss: 2.2802181243896484 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17745 , TFLOPS: 97.02705213179289, Tokens per sec: 79283.30199541133, Loss: 2.2337658405303955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17746 , TFLOPS: 97.24812391439393, Tokens per sec: 79463.9454398686, Loss: 2.2293028831481934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17747 , TFLOPS: 95.50853525000751, Tokens per sec: 78042.48276120241, Loss: 2.246978282928467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17748 , TFLOPS: 97.63071795010575, Tokens per sec: 79776.57287529539, Loss: 2.244643211364746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17749 , TFLOPS: 95.61190324351666, Tokens per sec: 78126.94740962761, Loss: 2.2634449005126953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17750 , TFLOPS: 97.6653696692192, Tokens per sec: 79804.88768699758, Loss: 2.25637149810791 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17751 , TFLOPS: 95.6809432123262, Tokens per sec: 78183.36174538879, Loss: 2.267413854598999 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17752 , TFLOPS: 97.3554650289378, Tokens per sec: 79551.6566277685, Loss: 2.2639431953430176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17753 , TFLOPS: 96.63924752228877, Tokens per sec: 78966.41686600675, Loss: 2.247265338897705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17754 , TFLOPS: 96.81800043547148, Tokens per sec: 79112.4804728777, Loss: 2.2646028995513916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17755 , TFLOPS: 94.40098568713428, Tokens per sec: 77137.47550251654, Loss: 2.2513527870178223 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17756 , TFLOPS: 97.90564598857995, Tokens per sec: 80001.22365280997, Loss: 2.248229503631592 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17757 , TFLOPS: 96.62647941487681, Tokens per sec: 78955.98371676012, Loss: 2.2359390258789062 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17758 , TFLOPS: 97.36902520271028, Tokens per sec: 79562.73699482792, Loss: 2.2688002586364746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17759 , TFLOPS: 97.10143237197931, Tokens per sec: 79344.08000438538, Loss: 2.2712182998657227 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17760 , TFLOPS: 96.72239288594474, Tokens per sec: 79034.35708300212, Loss: 2.2578961849212646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17761 , TFLOPS: 97.73899499664485, Tokens per sec: 79865.04883732162, Loss: 2.2458722591400146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17762 , TFLOPS: 97.06184871008065, Tokens per sec: 79311.73517527388, Loss: 2.2454652786254883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17763 , TFLOPS: 97.78716340078559, Tokens per sec: 79904.40848031007, Loss: 2.257918357849121 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17764 , TFLOPS: 96.61279571976436, Tokens per sec: 78944.8024172341, Loss: 2.258209466934204 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17765 , TFLOPS: 97.81525259478546, Tokens per sec: 79927.36088380753, Loss: 2.2564098834991455 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17766 , TFLOPS: 96.59357558109976, Tokens per sec: 78929.09714716085, Loss: 2.2727744579315186 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17767 , TFLOPS: 98.39424679748286, Tokens per sec: 80400.47195146832, Loss: 2.2386715412139893 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17768 , TFLOPS: 96.60498364817512, Tokens per sec: 78938.4189724379, Loss: 2.258902072906494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17769 , TFLOPS: 96.7215044082317, Tokens per sec: 79033.63108499133, Loss: 2.231754779815674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17770 , TFLOPS: 97.77999449044495, Tokens per sec: 79898.55057912655, Loss: 2.243635416030884 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17771 , TFLOPS: 95.92202666426915, Tokens per sec: 78380.35724001154, Loss: 2.2685928344726562 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17772 , TFLOPS: 97.18923183309784, Tokens per sec: 79415.82320422452, Loss: 2.2502760887145996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17773 , TFLOPS: 97.87472697888425, Tokens per sec: 79975.9589340615, Loss: 2.240478754043579 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17774 , TFLOPS: 96.13565220454096, Tokens per sec: 78554.91616817826, Loss: 2.2534339427948 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17775 , TFLOPS: 98.31718537829433, Tokens per sec: 80337.50308211196, Loss: 2.261967897415161 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17776 , TFLOPS: 96.71419127690359, Tokens per sec: 79027.65533712639, Loss: 2.261482000350952 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17777 , TFLOPS: 96.05241089693327, Tokens per sec: 78486.89755291013, Loss: 2.249882698059082 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17778 , TFLOPS: 97.3421478627783, Tokens per sec: 79540.77482848523, Loss: 2.253047227859497 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17779 , TFLOPS: 96.12198087656664, Tokens per sec: 78543.74497415914, Loss: 2.2498042583465576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17780 , TFLOPS: 96.59095115670591, Tokens per sec: 78926.95266242995, Loss: 2.2573165893554688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17781 , TFLOPS: 97.87356631464054, Tokens per sec: 79975.01052542981, Loss: 2.243098258972168 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17782 , TFLOPS: 96.5786480233211, Tokens per sec: 78916.89945543038, Loss: 2.268589735031128 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17783 , TFLOPS: 96.56400752933665, Tokens per sec: 78904.93633091585, Loss: 2.2604105472564697 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17784 , TFLOPS: 97.87588922805828, Tokens per sec: 79976.90863778057, Loss: 2.264448404312134 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17785 , TFLOPS: 94.99806538438851, Tokens per sec: 77625.36469334176, Loss: 2.277097225189209 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17786 , TFLOPS: 97.66561998641883, Tokens per sec: 79805.09222762515, Loss: 2.2588560581207275 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17787 , TFLOPS: 95.10063780996525, Tokens per sec: 77709.17926272964, Loss: 2.241828441619873 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17788 , TFLOPS: 98.41651135507615, Tokens per sec: 80418.66489461849, Loss: 2.2715771198272705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17789 , TFLOPS: 94.88052357434248, Tokens per sec: 77529.31825455776, Loss: 2.2635960578918457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17790 , TFLOPS: 96.89931862291616, Tokens per sec: 79178.9276571551, Loss: 2.2635645866394043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17791 , TFLOPS: 97.23598971669665, Tokens per sec: 79454.03027456811, Loss: 2.2349672317504883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17792 , TFLOPS: 95.7207437355699, Tokens per sec: 78215.88377748782, Loss: 2.2830581665039062 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17793 , TFLOPS: 96.18473434529244, Tokens per sec: 78595.022448873, Loss: 2.264949321746826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17794 , TFLOPS: 97.27838754620001, Tokens per sec: 79488.67463247254, Loss: 2.248656988143921 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17795 , TFLOPS: 97.41337984267511, Tokens per sec: 79598.98031293321, Loss: 2.254934072494507 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17796 , TFLOPS: 97.09441727267563, Tokens per sec: 79338.3477861596, Loss: 2.2387993335723877 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17797 , TFLOPS: 98.46455773907276, Tokens per sec: 80457.92483180607, Loss: 2.2550644874572754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17798 , TFLOPS: 95.91915124797143, Tokens per sec: 78378.00766333487, Loss: 2.2771048545837402 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17799 , TFLOPS: 98.41499163913198, Tokens per sec: 80417.42309559954, Loss: 2.2433407306671143 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17800 , TFLOPS: 95.92464479016878, Tokens per sec: 78382.49657808068, Loss: 2.2578933238983154 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017800 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017800/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017800/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017800/optimizer.pt +[rank0]:[2024-08-30 14:28:27,276] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.0076612840057350695, 'preprocessing_with_comm': 0.0015057380078360438, 'state_converting': 2.5603021870192606, : 2.5710661849880125}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017800/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017800/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017800/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017800/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017800/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017800, took 14.67s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0015800 + eval ppl=7.74461555480957, eval loss=2.0469977855682373 +------------------------------------------------------------------ +iteration: 17801 , TFLOPS: 95.94183135429583, Tokens per sec: 78396.54016205041, Loss: 2.232053279876709 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17802 , TFLOPS: 96.82605427101281, Tokens per sec: 79119.0614692227, Loss: 2.2458479404449463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17803 , TFLOPS: 97.26436856840095, Tokens per sec: 79477.21936483233, Loss: 2.255598545074463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17804 , TFLOPS: 96.91174684744274, Tokens per sec: 79189.08307934675, Loss: 2.2606897354125977 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17805 , TFLOPS: 95.81436071490414, Tokens per sec: 78292.38062121728, Loss: 2.2410664558410645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17806 , TFLOPS: 97.95896031112144, Tokens per sec: 80044.7881581914, Loss: 2.26541805267334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17807 , TFLOPS: 98.44158570720525, Tokens per sec: 80439.15379321441, Loss: 2.2427544593811035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17808 , TFLOPS: 96.58760792750773, Tokens per sec: 78924.22082378974, Loss: 2.264894485473633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17809 , TFLOPS: 97.81770053044558, Tokens per sec: 79929.36115505088, Loss: 2.2493972778320312 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17810 , TFLOPS: 98.36603778646291, Tokens per sec: 80377.42164239936, Loss: 2.253599166870117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17811 , TFLOPS: 95.35692571216556, Tokens per sec: 77918.59870505505, Loss: 2.275019645690918 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17812 , TFLOPS: 97.70118858234102, Tokens per sec: 79834.15624297029, Loss: 2.246473789215088 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17813 , TFLOPS: 96.5879196795448, Tokens per sec: 78924.47556440443, Loss: 2.2723915576934814 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17814 , TFLOPS: 95.00838926101952, Tokens per sec: 77633.80059869717, Loss: 2.267515182495117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17815 , TFLOPS: 98.29118037731607, Tokens per sec: 80316.25372638433, Loss: 2.2511708736419678 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17816 , TFLOPS: 96.10873035878667, Tokens per sec: 78532.91763498247, Loss: 2.222745895385742 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17817 , TFLOPS: 97.72669099733241, Tokens per sec: 79854.99492275031, Loss: 2.270486354827881 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17818 , TFLOPS: 97.22566631129499, Tokens per sec: 79445.59475426632, Loss: 2.2578608989715576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17819 , TFLOPS: 96.49894693715466, Tokens per sec: 78851.77364623532, Loss: 2.262676954269409 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17820 , TFLOPS: 97.21932831110001, Tokens per sec: 79440.41581115237, Loss: 2.2816050052642822 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17821 , TFLOPS: 96.53154399462575, Tokens per sec: 78878.40954101786, Loss: 2.244779586791992 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17822 , TFLOPS: 97.16290395438541, Tokens per sec: 79394.3100167887, Loss: 2.2392661571502686 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17823 , TFLOPS: 96.31140014463914, Tokens per sec: 78698.52433418694, Loss: 2.250575304031372 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17824 , TFLOPS: 95.8004847145359, Tokens per sec: 78281.04218411636, Loss: 2.2451939582824707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17825 , TFLOPS: 97.9320200463833, Tokens per sec: 80022.77457436975, Loss: 2.247471570968628 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17826 , TFLOPS: 97.30805003981362, Tokens per sec: 79512.91261958473, Loss: 2.2611732482910156 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17827 , TFLOPS: 96.69520153469341, Tokens per sec: 79012.138329927, Loss: 2.2629196643829346 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17828 , TFLOPS: 98.38565083683271, Tokens per sec: 80393.44797074136, Loss: 2.2636704444885254 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17829 , TFLOPS: 98.44269996107882, Tokens per sec: 80440.06427873796, Loss: 2.260010004043579 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17830 , TFLOPS: 96.02114104029592, Tokens per sec: 78461.34614809403, Loss: 2.2535977363586426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17831 , TFLOPS: 98.42954833401348, Tokens per sec: 80429.31774570988, Loss: 2.269308567047119 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17832 , TFLOPS: 97.12644907819883, Tokens per sec: 79364.52179902465, Loss: 2.263148546218872 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17833 , TFLOPS: 95.59345057664915, Tokens per sec: 78111.86926051627, Loss: 2.2571308612823486 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17834 , TFLOPS: 97.31577881815369, Tokens per sec: 79519.22800332286, Loss: 2.248522996902466 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17835 , TFLOPS: 96.59908534794296, Tokens per sec: 78933.59931948246, Loss: 2.2612056732177734 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17836 , TFLOPS: 97.33396079610061, Tokens per sec: 79534.08496554905, Loss: 2.258139133453369 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17837 , TFLOPS: 96.66423984150634, Tokens per sec: 78986.83873339897, Loss: 2.2477002143859863 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17838 , TFLOPS: 97.13070190679349, Tokens per sec: 79367.9968947469, Loss: 2.243025779724121 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17839 , TFLOPS: 95.89933745890896, Tokens per sec: 78361.81730623961, Loss: 2.2547149658203125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17840 , TFLOPS: 96.76540885900906, Tokens per sec: 79069.50654192263, Loss: 2.2694733142852783 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17841 , TFLOPS: 96.65297789706675, Tokens per sec: 78977.63630868912, Loss: 2.2666714191436768 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17842 , TFLOPS: 97.81588863117287, Tokens per sec: 79927.88060551272, Loss: 2.267711639404297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17843 , TFLOPS: 95.64922628700448, Tokens per sec: 78157.44502924212, Loss: 2.2291371822357178 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17844 , TFLOPS: 97.84728407144338, Tokens per sec: 79953.53462794829, Loss: 2.2452127933502197 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17845 , TFLOPS: 98.40673963113125, Tokens per sec: 80410.68016743656, Loss: 2.24983286857605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17846 , TFLOPS: 96.71682162202346, Tokens per sec: 79029.80465983495, Loss: 2.236771583557129 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17847 , TFLOPS: 97.87239030365045, Tokens per sec: 79974.0495765766, Loss: 2.2527613639831543 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17848 , TFLOPS: 98.2926933654986, Tokens per sec: 80317.49002797606, Loss: 2.2655677795410156 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17849 , TFLOPS: 97.0954744462099, Tokens per sec: 79339.21162986854, Loss: 2.264324903488159 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17850 , TFLOPS: 97.11716048790119, Tokens per sec: 79356.93185278286, Loss: 2.24709153175354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17851 , TFLOPS: 97.17109764714817, Tokens per sec: 79401.00529406958, Loss: 2.240083694458008 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17852 , TFLOPS: 97.21205118201448, Tokens per sec: 79434.4694816466, Loss: 2.287609338760376 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17853 , TFLOPS: 97.68383267363892, Tokens per sec: 79819.97428318896, Loss: 2.2486448287963867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17854 , TFLOPS: 96.08764889326622, Tokens per sec: 78515.6914268205, Loss: 2.256094217300415 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17855 , TFLOPS: 97.8445385743321, Tokens per sec: 79951.29121158327, Loss: 2.2813656330108643 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17856 , TFLOPS: 96.6373371055022, Tokens per sec: 78964.85581527195, Loss: 2.2453062534332275 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17857 , TFLOPS: 97.26250320942623, Tokens per sec: 79475.69513199547, Loss: 2.264385223388672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17858 , TFLOPS: 96.01414172627197, Tokens per sec: 78455.62682842668, Loss: 2.265148162841797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17859 , TFLOPS: 97.26534992584415, Tokens per sec: 79478.02125726156, Loss: 2.2445812225341797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17860 , TFLOPS: 96.60208364750595, Tokens per sec: 78936.04930723847, Loss: 2.260495185852051 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17861 , TFLOPS: 96.83266575911625, Tokens per sec: 79124.46388634686, Loss: 2.2358970642089844 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17862 , TFLOPS: 96.12044428490101, Tokens per sec: 78542.48938555378, Loss: 2.2577691078186035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17863 , TFLOPS: 97.89121183406638, Tokens per sec: 79989.42913358874, Loss: 2.2588448524475098 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17864 , TFLOPS: 96.59333210044586, Tokens per sec: 78928.8981928508, Loss: 2.2547426223754883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17865 , TFLOPS: 97.1632404102389, Tokens per sec: 79394.58494352773, Loss: 2.2567009925842285 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17866 , TFLOPS: 97.27388456951243, Tokens per sec: 79484.99513430419, Loss: 2.258650779724121 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17867 , TFLOPS: 97.78328380393535, Tokens per sec: 79901.23836185402, Loss: 2.258249044418335 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17868 , TFLOPS: 96.12976952608086, Tokens per sec: 78550.1092801749, Loss: 2.2693400382995605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17869 , TFLOPS: 98.4011816083452, Tokens per sec: 80406.13856394184, Loss: 2.2312071323394775 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17870 , TFLOPS: 97.18221714782369, Tokens per sec: 79410.09132431295, Loss: 2.25072979927063 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17871 , TFLOPS: 95.99290542456096, Tokens per sec: 78438.27409962757, Loss: 2.2622249126434326 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17872 , TFLOPS: 97.31106988867575, Tokens per sec: 79515.38021582781, Loss: 2.238959789276123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17873 , TFLOPS: 96.70821793153233, Tokens per sec: 79022.77436285595, Loss: 2.2578420639038086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17874 , TFLOPS: 97.82832496052222, Tokens per sec: 79938.04265036367, Loss: 2.2451746463775635 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17875 , TFLOPS: 96.26892912818818, Tokens per sec: 78663.82017334363, Loss: 2.2416982650756836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17876 , TFLOPS: 97.10313926651855, Tokens per sec: 79345.47475185277, Loss: 2.263951301574707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17877 , TFLOPS: 96.07761897468708, Tokens per sec: 78507.49572215643, Loss: 2.25107479095459 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17878 , TFLOPS: 97.15497027802407, Tokens per sec: 79387.82720560285, Loss: 2.226778984069824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17879 , TFLOPS: 97.83832953954682, Tokens per sec: 79946.21764942545, Loss: 2.249492645263672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17880 , TFLOPS: 97.79550058879028, Tokens per sec: 79911.221011247, Loss: 2.2501630783081055 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17881 , TFLOPS: 95.40261985717714, Tokens per sec: 77955.93656721595, Loss: 2.248873710632324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17882 , TFLOPS: 98.001942849926, Tokens per sec: 80079.91029711757, Loss: 2.241527557373047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17883 , TFLOPS: 97.77073871518134, Tokens per sec: 79890.9874468938, Loss: 2.240910053253174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17884 , TFLOPS: 97.3200640460672, Tokens per sec: 79522.72957336258, Loss: 2.244135856628418 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17885 , TFLOPS: 97.17906972390159, Tokens per sec: 79407.5194832043, Loss: 2.267244815826416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17886 , TFLOPS: 98.48158795099472, Tokens per sec: 80471.84065636416, Loss: 2.2556068897247314 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17887 , TFLOPS: 97.23809877378383, Tokens per sec: 79455.75363940602, Loss: 2.2452640533447266 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17888 , TFLOPS: 97.61334880859079, Tokens per sec: 79762.38010264203, Loss: 2.2575981616973877 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17889 , TFLOPS: 97.20674615387935, Tokens per sec: 79430.13460659381, Loss: 2.272291421890259 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17890 , TFLOPS: 97.19472009323961, Tokens per sec: 79420.30780286675, Loss: 2.2429778575897217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17891 , TFLOPS: 97.68463151050615, Tokens per sec: 79820.62703335707, Loss: 2.2662150859832764 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17892 , TFLOPS: 96.07754470794595, Tokens per sec: 78507.43503689044, Loss: 2.212031364440918 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17893 , TFLOPS: 97.24673780990007, Tokens per sec: 79462.81281820516, Loss: 2.243211269378662 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17894 , TFLOPS: 97.34841899150739, Tokens per sec: 79545.89912920298, Loss: 2.252040386199951 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17895 , TFLOPS: 97.15546361368045, Tokens per sec: 79388.23032286728, Loss: 2.2719767093658447 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17896 , TFLOPS: 96.63122292915332, Tokens per sec: 78959.85976438437, Loss: 2.2600834369659424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17897 , TFLOPS: 96.56933925031555, Tokens per sec: 78909.29301737841, Loss: 2.2811930179595947 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17898 , TFLOPS: 97.21733416207537, Tokens per sec: 79438.78634065037, Loss: 2.2526659965515137 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17899 , TFLOPS: 96.30005933717537, Tokens per sec: 78689.25746846994, Loss: 2.2737622261047363 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17900 , TFLOPS: 96.6877493422141, Tokens per sec: 79006.04895161567, Loss: 2.2439489364624023 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17901 , TFLOPS: 97.22820942506941, Tokens per sec: 79447.67279799697, Loss: 2.2613725662231445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17902 , TFLOPS: 96.6655318724248, Tokens per sec: 78987.89448512226, Loss: 2.2525107860565186 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17903 , TFLOPS: 97.1691328085988, Tokens per sec: 79399.39977391144, Loss: 2.2305309772491455 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17904 , TFLOPS: 97.87787363035535, Tokens per sec: 79978.53014398055, Loss: 2.264533519744873 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17905 , TFLOPS: 98.49228882282739, Tokens per sec: 80480.58461420359, Loss: 2.249725103378296 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17906 , TFLOPS: 96.17842617503354, Tokens per sec: 78589.86788056769, Loss: 2.2240357398986816 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17907 , TFLOPS: 98.41620423405413, Tokens per sec: 80418.41393812539, Loss: 2.262418270111084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17908 , TFLOPS: 97.23599067642277, Tokens per sec: 79454.03105878504, Loss: 2.266648054122925 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17909 , TFLOPS: 95.95738475285071, Tokens per sec: 78409.24924438969, Loss: 2.272857666015625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17910 , TFLOPS: 97.33456751184282, Tokens per sec: 79534.58072860025, Loss: 2.2737817764282227 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17911 , TFLOPS: 96.1588012883214, Tokens per sec: 78573.8318804457, Loss: 2.2467079162597656 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17912 , TFLOPS: 97.53137638141631, Tokens per sec: 79695.39832223977, Loss: 2.2585277557373047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17913 , TFLOPS: 96.14950852420505, Tokens per sec: 78566.23852366919, Loss: 2.2650771141052246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17914 , TFLOPS: 97.78289191652688, Tokens per sec: 79900.91814056448, Loss: 2.2629237174987793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17915 , TFLOPS: 96.23070607739292, Tokens per sec: 78632.58713458996, Loss: 2.243927001953125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17916 , TFLOPS: 97.32394964738552, Tokens per sec: 79525.90459822403, Loss: 2.246537208557129 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17917 , TFLOPS: 97.79823558486814, Tokens per sec: 79913.45584694728, Loss: 2.2481765747070312 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17918 , TFLOPS: 97.14524106044526, Tokens per sec: 79379.87721146687, Loss: 2.2652676105499268 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17919 , TFLOPS: 96.18097590411962, Tokens per sec: 78591.95132983981, Loss: 2.239588737487793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17920 , TFLOPS: 98.00808802944178, Tokens per sec: 80084.93168149091, Loss: 2.2643792629241943 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17921 , TFLOPS: 97.84869479658235, Tokens per sec: 79954.68736777463, Loss: 2.282400131225586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17922 , TFLOPS: 96.6271785303598, Tokens per sec: 78956.5549820181, Loss: 2.2354423999786377 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17923 , TFLOPS: 97.80193730928325, Tokens per sec: 79916.48062125772, Loss: 2.2371811866760254 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17924 , TFLOPS: 98.3397006808487, Tokens per sec: 80355.90091542457, Loss: 2.2240543365478516 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17925 , TFLOPS: 96.57688139271511, Tokens per sec: 78915.45589608511, Loss: 2.2467823028564453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17926 , TFLOPS: 97.20292502014205, Tokens per sec: 79427.01225985233, Loss: 2.266515016555786 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17927 , TFLOPS: 97.1811527731492, Tokens per sec: 79409.22159636697, Loss: 2.2508738040924072 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17928 , TFLOPS: 97.26495649324374, Tokens per sec: 79477.69977335591, Loss: 2.2647838592529297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17929 , TFLOPS: 97.66354739918926, Tokens per sec: 79803.39866324671, Loss: 2.256781816482544 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17930 , TFLOPS: 96.10211319679405, Tokens per sec: 78527.51058157739, Loss: 2.2365472316741943 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17931 , TFLOPS: 97.79782607649116, Tokens per sec: 79913.12122711078, Loss: 2.2706735134124756 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17932 , TFLOPS: 96.00067772698216, Tokens per sec: 78444.62504801281, Loss: 2.2665984630584717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17933 , TFLOPS: 97.88420028273781, Tokens per sec: 79983.69981450478, Loss: 2.253499746322632 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17934 , TFLOPS: 96.6415313979515, Tokens per sec: 78968.28307960294, Loss: 2.2371678352355957 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17935 , TFLOPS: 96.59887378028918, Tokens per sec: 78933.42644210631, Loss: 2.2726778984069824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17936 , TFLOPS: 96.6796280448648, Tokens per sec: 78999.41283049075, Loss: 2.268237590789795 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17937 , TFLOPS: 96.77904199826345, Tokens per sec: 79080.64653095562, Loss: 2.237318515777588 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17938 , TFLOPS: 96.0681136593826, Tokens per sec: 78499.72868433254, Loss: 2.2621583938598633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17939 , TFLOPS: 97.1140496174698, Tokens per sec: 79354.38988047244, Loss: 2.2621448040008545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17940 , TFLOPS: 96.75458890255395, Tokens per sec: 79060.66527697271, Loss: 2.263291120529175 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17941 , TFLOPS: 96.51092805686812, Tokens per sec: 78861.56372757466, Loss: 2.2437009811401367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17942 , TFLOPS: 97.6807595516504, Tokens per sec: 79817.46315610294, Loss: 2.2249178886413574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17943 , TFLOPS: 97.35267348460395, Tokens per sec: 79549.37558503257, Loss: 2.2563421726226807 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17944 , TFLOPS: 96.07401911787397, Tokens per sec: 78504.55418648587, Loss: 2.251596450805664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17945 , TFLOPS: 97.69111177481189, Tokens per sec: 79825.92222413812, Loss: 2.251007556915283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17946 , TFLOPS: 97.26111658303205, Tokens per sec: 79474.56208387385, Loss: 2.271574020385742 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17947 , TFLOPS: 96.6037143279735, Tokens per sec: 78937.38177822596, Loss: 2.254647970199585 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17948 , TFLOPS: 96.78525664266796, Tokens per sec: 79085.72467687773, Loss: 2.2378933429718018 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17949 , TFLOPS: 97.83004295630708, Tokens per sec: 79939.44646894462, Loss: 2.274724006652832 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17950 , TFLOPS: 96.73422523626756, Tokens per sec: 79044.02560104268, Loss: 2.2500832080841064 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17951 , TFLOPS: 96.29569304720206, Tokens per sec: 78685.68966053445, Loss: 2.231356620788574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17952 , TFLOPS: 98.41947048699686, Tokens per sec: 80421.08287748572, Loss: 2.268272638320923 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17953 , TFLOPS: 96.05734555564398, Tokens per sec: 78490.92978957207, Loss: 2.261444091796875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17954 , TFLOPS: 97.29749772153058, Tokens per sec: 79504.29004867483, Loss: 2.263291597366333 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17955 , TFLOPS: 97.208918351383, Tokens per sec: 79431.9095650914, Loss: 2.257760763168335 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17956 , TFLOPS: 98.42413276382109, Tokens per sec: 80424.89254389648, Loss: 2.25931978225708 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17957 , TFLOPS: 96.0579126172589, Tokens per sec: 78491.39315021508, Loss: 2.2702231407165527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17958 , TFLOPS: 97.917316269377, Tokens per sec: 80010.759739669, Loss: 2.254939079284668 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17959 , TFLOPS: 96.60021495833398, Tokens per sec: 78934.52235321171, Loss: 2.2418630123138428 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17960 , TFLOPS: 96.75218521748303, Tokens per sec: 79058.7011640249, Loss: 2.251894474029541 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17961 , TFLOPS: 97.76575989633966, Tokens per sec: 79886.91912585207, Loss: 2.2670159339904785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17962 , TFLOPS: 97.35448446114968, Tokens per sec: 79550.85538058679, Loss: 2.245907783508301 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17963 , TFLOPS: 96.63168917501362, Tokens per sec: 78960.24074587907, Loss: 2.2770094871520996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17964 , TFLOPS: 96.4408588846648, Tokens per sec: 78804.30840322636, Loss: 2.230724811553955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17965 , TFLOPS: 97.2636424203343, Tokens per sec: 79476.62601055436, Loss: 2.245051860809326 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17966 , TFLOPS: 97.3039808192347, Tokens per sec: 79509.58755469865, Loss: 2.2489142417907715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17967 , TFLOPS: 96.557238808684, Tokens per sec: 78899.40543502821, Loss: 2.270160675048828 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17968 , TFLOPS: 96.6814332320593, Tokens per sec: 79000.88789541705, Loss: 2.273249626159668 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17969 , TFLOPS: 96.2454557049907, Tokens per sec: 78644.63943498931, Loss: 2.248894214630127 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17970 , TFLOPS: 96.67135469794543, Tokens per sec: 78992.65246574805, Loss: 2.2362091541290283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17971 , TFLOPS: 97.92863729723966, Tokens per sec: 80020.01044296478, Loss: 2.2637343406677246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17972 , TFLOPS: 96.63543876865049, Tokens per sec: 78963.30463536286, Loss: 2.255169630050659 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17973 , TFLOPS: 97.23200596819423, Tokens per sec: 79450.77505111597, Loss: 2.250070095062256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17974 , TFLOPS: 96.63328062721492, Tokens per sec: 78961.54116244036, Loss: 2.2560646533966064 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17975 , TFLOPS: 97.06957406016035, Tokens per sec: 79318.0477576922, Loss: 2.253336191177368 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17976 , TFLOPS: 96.67902097913547, Tokens per sec: 78998.91678145606, Loss: 2.2365236282348633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17977 , TFLOPS: 97.29086238276044, Tokens per sec: 79498.86814255692, Loss: 2.2747716903686523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17978 , TFLOPS: 96.01324089316543, Tokens per sec: 78454.89073450583, Loss: 2.2626779079437256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17979 , TFLOPS: 95.37916077183708, Tokens per sec: 77936.767544694, Loss: 2.24940824508667 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17980 , TFLOPS: 97.8344663824025, Tokens per sec: 79943.0609642763, Loss: 2.2646708488464355 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17981 , TFLOPS: 97.46047600547972, Tokens per sec: 79637.46379992392, Loss: 2.2666513919830322 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17982 , TFLOPS: 95.6748699595356, Tokens per sec: 78178.39913419414, Loss: 2.267906665802002 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17983 , TFLOPS: 97.31175279506263, Tokens per sec: 79515.93823621611, Loss: 2.252244710922241 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17984 , TFLOPS: 97.73530936839668, Tokens per sec: 79862.03721560354, Loss: 2.2597928047180176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17985 , TFLOPS: 96.62812948679132, Tokens per sec: 78957.33203299809, Loss: 2.2516448497772217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17986 , TFLOPS: 96.70196443466429, Tokens per sec: 79017.66446958574, Loss: 2.2319490909576416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17987 , TFLOPS: 97.77729778827957, Tokens per sec: 79896.34703436792, Loss: 2.272914171218872 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17988 , TFLOPS: 96.6788511371398, Tokens per sec: 78998.77799918927, Loss: 2.246255874633789 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17989 , TFLOPS: 95.58489316729528, Tokens per sec: 78104.8767810459, Loss: 2.2466580867767334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17990 , TFLOPS: 98.36822427646617, Tokens per sec: 80379.20827966629, Loss: 2.2498600482940674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17991 , TFLOPS: 95.49411677915492, Tokens per sec: 78030.70105750431, Loss: 2.2741098403930664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17992 , TFLOPS: 97.93799002081043, Tokens per sec: 80027.65279415497, Loss: 2.235684871673584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17993 , TFLOPS: 96.76698996432464, Tokens per sec: 79070.79850377726, Loss: 2.24607515335083 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17994 , TFLOPS: 97.69140348023056, Tokens per sec: 79826.16058414488, Loss: 2.292362928390503 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17995 , TFLOPS: 96.78537609596361, Tokens per sec: 79085.8222852406, Loss: 2.265801191329956 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17996 , TFLOPS: 97.83526906211024, Tokens per sec: 79943.71685452832, Loss: 2.2516326904296875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17997 , TFLOPS: 96.00157616048241, Tokens per sec: 78445.35918115359, Loss: 2.2592251300811768 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17998 , TFLOPS: 97.29192364990986, Tokens per sec: 79499.73533126416, Loss: 2.2466037273406982 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 17999 , TFLOPS: 97.23353915980091, Tokens per sec: 79452.02786144566, Loss: 2.2339465618133545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18000 , TFLOPS: 97.42561692303946, Tokens per sec: 79608.97954630938, Loss: 2.2657217979431152 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018000 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018000/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018000/model.pt +[rank0]:[2024-08-30 15:23:57,293] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007684319018153474, 'preprocessing_with_comm': 0.0016953779850155115, 'state_converting': 2.659255098988069, : 2.670237824000651}) +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018000/optimizer.pt +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018000/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018000/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018000/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018000/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018000/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018000, took 15.10s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016000 + eval ppl=7.8411688804626465, eval loss=2.0593879222869873 +------------------------------------------------------------------ +iteration: 18001 , TFLOPS: 96.25118261026257, Tokens per sec: 78649.31903671061, Loss: 2.24857759475708 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18002 , TFLOPS: 95.4841695313395, Tokens per sec: 78022.57290524937, Loss: 2.255661964416504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18003 , TFLOPS: 97.08731742977135, Tokens per sec: 79332.54632175631, Loss: 2.2470717430114746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18004 , TFLOPS: 95.55729506702909, Tokens per sec: 78082.3256628801, Loss: 2.2661662101745605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18005 , TFLOPS: 96.22668988682007, Tokens per sec: 78629.30540188694, Loss: 2.256775140762329 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18006 , TFLOPS: 97.80988388831693, Tokens per sec: 79922.97397554929, Loss: 2.2412490844726562 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18007 , TFLOPS: 97.18034015151784, Tokens per sec: 79408.55758231325, Loss: 2.264127731323242 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18008 , TFLOPS: 96.80981582935755, Tokens per sec: 79105.79262053149, Loss: 2.249176502227783 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18009 , TFLOPS: 98.39841773011044, Tokens per sec: 80403.88012788766, Loss: 2.2507429122924805 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18010 , TFLOPS: 97.84817310209601, Tokens per sec: 79954.26107778023, Loss: 2.257107734680176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18011 , TFLOPS: 97.27661684099067, Tokens per sec: 79487.22774366399, Loss: 2.2592735290527344 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18012 , TFLOPS: 97.14826227447615, Tokens per sec: 79382.34592322448, Loss: 2.2491507530212402 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18013 , TFLOPS: 97.55598799914318, Tokens per sec: 79715.50910864379, Loss: 2.245382785797119 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18014 , TFLOPS: 97.65588379793247, Tokens per sec: 79797.13653738116, Loss: 2.2723798751831055 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18015 , TFLOPS: 97.81487613489952, Tokens per sec: 79927.05326874378, Loss: 2.2488579750061035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18016 , TFLOPS: 97.34989505610636, Tokens per sec: 79547.10525958407, Loss: 2.241624593734741 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18017 , TFLOPS: 97.30430828134199, Tokens per sec: 79509.85513241614, Loss: 2.2654762268066406 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18018 , TFLOPS: 97.12717058210278, Tokens per sec: 79365.11135843773, Loss: 2.2349653244018555 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18019 , TFLOPS: 97.1459413314433, Tokens per sec: 79380.44942092596, Loss: 2.2513439655303955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18020 , TFLOPS: 97.7725842776984, Tokens per sec: 79892.4955035355, Loss: 2.2708592414855957 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18021 , TFLOPS: 97.77682969882369, Tokens per sec: 79895.96454642383, Loss: 2.277599573135376 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18022 , TFLOPS: 95.45363083978684, Tokens per sec: 77997.61895424585, Loss: 2.252351760864258 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18023 , TFLOPS: 98.36310284882437, Tokens per sec: 80375.02342930289, Loss: 2.244349241256714 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18024 , TFLOPS: 97.33052413968883, Tokens per sec: 79531.27678512757, Loss: 2.2515103816986084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18025 , TFLOPS: 97.78324199360694, Tokens per sec: 79901.20419755837, Loss: 2.256495475769043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18026 , TFLOPS: 97.22230298981353, Tokens per sec: 79442.84649770422, Loss: 2.2583706378936768 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18027 , TFLOPS: 97.05115101275858, Tokens per sec: 79302.99381140879, Loss: 2.2361388206481934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18028 , TFLOPS: 97.74686472112404, Tokens per sec: 79871.4793917783, Loss: 2.254164934158325 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18029 , TFLOPS: 97.73818081286112, Tokens per sec: 79864.38354679297, Loss: 2.2510440349578857 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18030 , TFLOPS: 96.10351548662508, Tokens per sec: 78528.65642869649, Loss: 2.2443714141845703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18031 , TFLOPS: 97.11587053574472, Tokens per sec: 79355.87779966948, Loss: 2.245234966278076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18032 , TFLOPS: 97.81226046358665, Tokens per sec: 79924.91593638067, Loss: 2.257626533508301 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18033 , TFLOPS: 95.62842798546747, Tokens per sec: 78140.45020166032, Loss: 2.247771739959717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18034 , TFLOPS: 97.66900591150753, Tokens per sec: 79807.85895417658, Loss: 2.2661325931549072 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18035 , TFLOPS: 96.575990869639, Tokens per sec: 78914.72822675553, Loss: 2.231537103652954 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18036 , TFLOPS: 96.46135740018174, Tokens per sec: 78821.05826793397, Loss: 2.2396302223205566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18037 , TFLOPS: 96.63029900590269, Tokens per sec: 78959.10480291238, Loss: 2.257606029510498 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18038 , TFLOPS: 96.59349841101793, Tokens per sec: 78929.03408950046, Loss: 2.2579803466796875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18039 , TFLOPS: 96.07604818644286, Tokens per sec: 78506.21219064639, Loss: 2.2358598709106445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18040 , TFLOPS: 96.23374043587933, Tokens per sec: 78635.06658702056, Loss: 2.2564330101013184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18041 , TFLOPS: 97.11048765832506, Tokens per sec: 79351.47931196229, Loss: 2.2469751834869385 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18042 , TFLOPS: 95.73006602890835, Tokens per sec: 78223.50126335186, Loss: 2.2852189540863037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18043 , TFLOPS: 96.20540971758503, Tokens per sec: 78611.9168278047, Loss: 2.242980718612671 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18044 , TFLOPS: 97.79804849240986, Tokens per sec: 79913.30296888344, Loss: 2.2253689765930176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18045 , TFLOPS: 97.30606023989675, Tokens per sec: 79511.28670285066, Loss: 2.251291036605835 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18046 , TFLOPS: 96.74122386198411, Tokens per sec: 79049.74435827634, Loss: 2.2695677280426025 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18047 , TFLOPS: 96.6031568570614, Tokens per sec: 78936.9262543931, Loss: 2.246842384338379 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18048 , TFLOPS: 97.38313021268984, Tokens per sec: 79574.26256157747, Loss: 2.257516860961914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18049 , TFLOPS: 97.1226193146658, Tokens per sec: 79361.39240065492, Loss: 2.2695071697235107 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18050 , TFLOPS: 97.19788442944167, Tokens per sec: 79422.89346343471, Loss: 2.2445247173309326 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18051 , TFLOPS: 97.75975912779279, Tokens per sec: 79882.01574339891, Loss: 2.238807439804077 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18052 , TFLOPS: 97.80280624398743, Tokens per sec: 79917.1906501729, Loss: 2.2605299949645996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18053 , TFLOPS: 97.8756735340265, Tokens per sec: 79976.73238863473, Loss: 2.271522045135498 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18054 , TFLOPS: 97.75235989123425, Tokens per sec: 79875.96963673363, Loss: 2.268301010131836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18055 , TFLOPS: 97.81924193866863, Tokens per sec: 79930.62067938932, Loss: 2.245906352996826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18056 , TFLOPS: 97.14748936688972, Tokens per sec: 79381.71436053919, Loss: 2.261474847793579 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18057 , TFLOPS: 94.69726078154513, Tokens per sec: 77379.56950895907, Loss: 2.2557103633880615 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18058 , TFLOPS: 97.17666539263917, Tokens per sec: 79405.55484223679, Loss: 2.2273333072662354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18059 , TFLOPS: 97.72205593604126, Tokens per sec: 79851.20749485222, Loss: 2.252284049987793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18060 , TFLOPS: 96.72498746340298, Tokens per sec: 79036.47717903361, Loss: 2.2619659900665283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18061 , TFLOPS: 98.3717325259831, Tokens per sec: 80382.07496065696, Loss: 2.2634735107421875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18062 , TFLOPS: 96.76889339156752, Tokens per sec: 79072.353843176, Loss: 2.2551870346069336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18063 , TFLOPS: 96.70028118328237, Tokens per sec: 79016.28904155083, Loss: 2.232954502105713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18064 , TFLOPS: 97.32416652101259, Tokens per sec: 79526.08181124754, Loss: 2.258336067199707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18065 , TFLOPS: 97.20007178839113, Tokens per sec: 79424.68081073994, Loss: 2.2552390098571777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18066 , TFLOPS: 97.80693865855308, Tokens per sec: 79920.56735249229, Loss: 2.2697181701660156 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18067 , TFLOPS: 97.17508049317023, Tokens per sec: 79404.25978008177, Loss: 2.245830535888672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18068 , TFLOPS: 96.56385079801532, Tokens per sec: 78904.80826171857, Loss: 2.2388148307800293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18069 , TFLOPS: 97.04113418280222, Tokens per sec: 79294.8088017953, Loss: 2.2663519382476807 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18070 , TFLOPS: 97.26925508326055, Tokens per sec: 79481.21226191419, Loss: 2.247319459915161 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18071 , TFLOPS: 95.75061727747007, Tokens per sec: 78240.29421758784, Loss: 2.2452542781829834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18072 , TFLOPS: 97.173066340733, Tokens per sec: 79402.61396427642, Loss: 2.266345739364624 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18073 , TFLOPS: 96.05264341846994, Tokens per sec: 78487.08755224338, Loss: 2.2678182125091553 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18074 , TFLOPS: 97.8066378802165, Tokens per sec: 79920.32157877076, Loss: 2.2466671466827393 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18075 , TFLOPS: 95.33269323012962, Tokens per sec: 77898.7977201839, Loss: 2.2726876735687256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18076 , TFLOPS: 97.17860260976772, Tokens per sec: 79407.13779222094, Loss: 2.253568410873413 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18077 , TFLOPS: 96.35763837238422, Tokens per sec: 78736.30678035531, Loss: 2.2747154235839844 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18078 , TFLOPS: 95.51773749078903, Tokens per sec: 78050.00214903174, Loss: 2.268054723739624 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18079 , TFLOPS: 97.85983168852177, Tokens per sec: 79963.78760886742, Loss: 2.26781964302063 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18080 , TFLOPS: 95.83705133314857, Tokens per sec: 78310.92170949328, Loss: 2.2458369731903076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18081 , TFLOPS: 97.16953363463394, Tokens per sec: 79399.72729918292, Loss: 2.260590076446533 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18082 , TFLOPS: 97.82839513376295, Tokens per sec: 79938.0999907251, Loss: 2.2482361793518066 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18083 , TFLOPS: 97.72767672777509, Tokens per sec: 79855.80038846999, Loss: 2.2363288402557373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18084 , TFLOPS: 97.2399350354379, Tokens per sec: 79457.25409607313, Loss: 2.25980281829834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18085 , TFLOPS: 97.69665776869762, Tokens per sec: 79830.45399850875, Loss: 2.267871856689453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18086 , TFLOPS: 96.71827861840328, Tokens per sec: 79030.99520908332, Loss: 2.261448383331299 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18087 , TFLOPS: 97.25929755464489, Tokens per sec: 79473.07570895275, Loss: 2.2561562061309814 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18088 , TFLOPS: 96.50821396364638, Tokens per sec: 78859.34597213619, Loss: 2.2481307983398438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18089 , TFLOPS: 97.86445293944367, Tokens per sec: 79967.56375195729, Loss: 2.2620420455932617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18090 , TFLOPS: 96.17301910603007, Tokens per sec: 78585.44962529463, Loss: 2.2679240703582764 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18091 , TFLOPS: 97.7461752954537, Tokens per sec: 79870.91604431567, Loss: 2.2560513019561768 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18092 , TFLOPS: 98.45579918044851, Tokens per sec: 80450.76798808883, Loss: 2.261370897293091 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18093 , TFLOPS: 97.92145571509839, Tokens per sec: 80014.14218732677, Loss: 2.2362043857574463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18094 , TFLOPS: 96.59935899124073, Tokens per sec: 78933.82292046527, Loss: 2.252976894378662 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18095 , TFLOPS: 97.17992399084464, Tokens per sec: 79408.21752671426, Loss: 2.2581775188446045 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18096 , TFLOPS: 97.7392418893565, Tokens per sec: 79865.25057971191, Loss: 2.265923500061035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18097 , TFLOPS: 97.20508309378559, Tokens per sec: 79428.77567738024, Loss: 2.237928628921509 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18098 , TFLOPS: 96.7583106062221, Tokens per sec: 79063.70637684512, Loss: 2.2466862201690674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18099 , TFLOPS: 97.1294048260304, Tokens per sec: 79366.93701666554, Loss: 2.268301248550415 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18100 , TFLOPS: 97.21572254308506, Tokens per sec: 79437.46944528662, Loss: 2.2378225326538086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18101 , TFLOPS: 97.26952819377043, Tokens per sec: 79481.43542754231, Loss: 2.2480854988098145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18102 , TFLOPS: 97.3173483384497, Tokens per sec: 79520.51049875977, Loss: 2.276899576187134 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18103 , TFLOPS: 96.35380069806698, Tokens per sec: 78733.17091788017, Loss: 2.261775255203247 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18104 , TFLOPS: 97.75053321300774, Tokens per sec: 79874.47701093186, Loss: 2.25679874420166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18105 , TFLOPS: 97.16562971806853, Tokens per sec: 79396.53730846159, Loss: 2.2562708854675293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18106 , TFLOPS: 97.13615545096677, Tokens per sec: 79372.4531260776, Loss: 2.27095627784729 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18107 , TFLOPS: 96.036130814324, Tokens per sec: 78473.59467832348, Loss: 2.2801342010498047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18108 , TFLOPS: 97.24397727456225, Tokens per sec: 79460.55711371813, Loss: 2.25278377532959 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18109 , TFLOPS: 96.25111191188455, Tokens per sec: 78649.261267246, Loss: 2.2556841373443604 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18110 , TFLOPS: 97.20080394276233, Tokens per sec: 79425.279072924, Loss: 2.2774155139923096 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18111 , TFLOPS: 96.80388528873694, Tokens per sec: 79100.94662312469, Loss: 2.242807388305664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18112 , TFLOPS: 97.74003850336655, Tokens per sec: 79865.90151352617, Loss: 2.2417895793914795 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18113 , TFLOPS: 95.74296340285409, Tokens per sec: 78234.04003961086, Loss: 2.254646062850952 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18114 , TFLOPS: 97.1547227187109, Tokens per sec: 79387.62491851533, Loss: 2.2569167613983154 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18115 , TFLOPS: 96.06540266544, Tokens per sec: 78497.51346139485, Loss: 2.246941566467285 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18116 , TFLOPS: 96.72377309711304, Tokens per sec: 79035.48488907753, Loss: 2.240480661392212 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18117 , TFLOPS: 97.89367747989584, Tokens per sec: 79991.44387626737, Loss: 2.2500405311584473 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18118 , TFLOPS: 95.11524671861098, Tokens per sec: 77721.11657805083, Loss: 2.254593849182129 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18119 , TFLOPS: 97.19055196758517, Tokens per sec: 79416.9019200974, Loss: 2.2440998554229736 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18120 , TFLOPS: 97.10798815771379, Tokens per sec: 79349.43690566995, Loss: 2.2533955574035645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18121 , TFLOPS: 97.74870268770331, Tokens per sec: 79872.98124158371, Loss: 2.250849485397339 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18122 , TFLOPS: 96.78394237314085, Tokens per sec: 79084.65075341481, Loss: 2.24275541305542 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18123 , TFLOPS: 97.84361043657623, Tokens per sec: 79950.53280633056, Loss: 2.2668120861053467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18124 , TFLOPS: 97.93454181425868, Tokens per sec: 80024.83517581678, Loss: 2.2479605674743652 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18125 , TFLOPS: 96.55491421225022, Tokens per sec: 78897.50594744174, Loss: 2.2361714839935303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18126 , TFLOPS: 96.65274891238937, Tokens per sec: 78977.44919941465, Loss: 2.257476806640625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18127 , TFLOPS: 97.82593785352941, Tokens per sec: 79936.09208379024, Loss: 2.254615306854248 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18128 , TFLOPS: 97.74224506050892, Tokens per sec: 79867.70454816904, Loss: 2.2540392875671387 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18129 , TFLOPS: 97.27813541461201, Tokens per sec: 79488.46860926154, Loss: 2.2393712997436523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18130 , TFLOPS: 98.39650336764436, Tokens per sec: 80402.31585303655, Loss: 2.2361695766448975 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18131 , TFLOPS: 97.12360411779441, Tokens per sec: 79362.19710864242, Loss: 2.26352596282959 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18132 , TFLOPS: 97.16679801405971, Tokens per sec: 79397.49195319059, Loss: 2.251075267791748 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18133 , TFLOPS: 96.75837045417752, Tokens per sec: 79063.75528015012, Loss: 2.249854326248169 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18134 , TFLOPS: 97.05355148962487, Tokens per sec: 79304.95530285, Loss: 2.2427923679351807 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18135 , TFLOPS: 97.37589845739984, Tokens per sec: 79568.35330816808, Loss: 2.283555269241333 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18136 , TFLOPS: 96.71064492899147, Tokens per sec: 79024.7575249563, Loss: 2.2637174129486084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18137 , TFLOPS: 97.86238647646566, Tokens per sec: 79965.87519186248, Loss: 2.251582622528076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18138 , TFLOPS: 97.34490594850816, Tokens per sec: 79543.02853133454, Loss: 2.247905731201172 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18139 , TFLOPS: 97.24796673929826, Tokens per sec: 79463.81700805189, Loss: 2.246699094772339 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18140 , TFLOPS: 95.99696628477356, Tokens per sec: 78441.59233304314, Loss: 2.2597830295562744 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18141 , TFLOPS: 97.83224670006325, Tokens per sec: 79941.24720469721, Loss: 2.2506439685821533 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18142 , TFLOPS: 97.31418585073864, Tokens per sec: 79517.9263486414, Loss: 2.2541635036468506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18143 , TFLOPS: 97.73305176728881, Tokens per sec: 79860.19247162247, Loss: 2.26457142829895 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18144 , TFLOPS: 97.20722932241796, Tokens per sec: 79430.52941604458, Loss: 2.231165885925293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18145 , TFLOPS: 96.51193010897104, Tokens per sec: 78862.38253014305, Loss: 2.243701457977295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18146 , TFLOPS: 97.78347151288878, Tokens per sec: 79901.39174367192, Loss: 2.274305582046509 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18147 , TFLOPS: 96.09422654352052, Tokens per sec: 78521.06619416707, Loss: 2.266890525817871 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18148 , TFLOPS: 98.39748093633743, Tokens per sec: 80403.11464958052, Loss: 2.2363505363464355 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18149 , TFLOPS: 96.15337198825151, Tokens per sec: 78569.39545959595, Loss: 2.2489521503448486 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18150 , TFLOPS: 97.19865916884258, Tokens per sec: 79423.52652294276, Loss: 2.243051528930664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18151 , TFLOPS: 96.61254099823006, Tokens per sec: 78944.5942777113, Loss: 2.2487826347351074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18152 , TFLOPS: 96.40917260237, Tokens per sec: 78778.4167262858, Loss: 2.2666358947753906 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18153 , TFLOPS: 96.74876003919361, Tokens per sec: 79055.90236267268, Loss: 2.2587080001831055 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18154 , TFLOPS: 96.17927033718104, Tokens per sec: 78590.55766718909, Loss: 2.2574570178985596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18155 , TFLOPS: 97.21868785940661, Tokens per sec: 79439.89248158698, Loss: 2.2463057041168213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18156 , TFLOPS: 96.28152404529875, Tokens per sec: 78674.11180433669, Loss: 2.235384941101074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18157 , TFLOPS: 96.48289334725115, Tokens per sec: 78838.65584466899, Loss: 2.2530758380889893 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18158 , TFLOPS: 97.26615249207492, Tokens per sec: 79478.67705478863, Loss: 2.2549009323120117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18159 , TFLOPS: 97.74276669711713, Tokens per sec: 79868.13079086975, Loss: 2.256737470626831 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18160 , TFLOPS: 97.30492607333417, Tokens per sec: 79510.35994615633, Loss: 2.2780942916870117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18161 , TFLOPS: 97.1307271198131, Tokens per sec: 79368.01749695427, Loss: 2.271165370941162 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18162 , TFLOPS: 97.97287577135423, Tokens per sec: 80056.15883896341, Loss: 2.2334909439086914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18163 , TFLOPS: 96.10692380602211, Tokens per sec: 78531.44145421372, Loss: 2.267306327819824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18164 , TFLOPS: 97.18214112832172, Tokens per sec: 79410.02920682097, Loss: 2.2638864517211914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18165 , TFLOPS: 96.6619802293411, Tokens per sec: 78984.99234613114, Loss: 2.259943723678589 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18166 , TFLOPS: 97.13549666732095, Tokens per sec: 79371.914817002, Loss: 2.2562460899353027 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18167 , TFLOPS: 97.25876029875705, Tokens per sec: 79472.6367033366, Loss: 2.2231640815734863 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18168 , TFLOPS: 98.39567872938437, Tokens per sec: 80401.64201988623, Loss: 2.253906011581421 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18169 , TFLOPS: 97.23991173842946, Tokens per sec: 79457.23505948782, Loss: 2.263617992401123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18170 , TFLOPS: 97.12572233832016, Tokens per sec: 79363.92796116184, Loss: 2.2592222690582275 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18171 , TFLOPS: 96.67801358549166, Tokens per sec: 78998.09361417707, Loss: 2.2401070594787598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18172 , TFLOPS: 97.30059044919935, Tokens per sec: 79506.81719606687, Loss: 2.280251979827881 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18173 , TFLOPS: 97.82136693351845, Tokens per sec: 79932.35706738335, Loss: 2.2322030067443848 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18174 , TFLOPS: 96.31859821924176, Tokens per sec: 78704.40606623967, Loss: 2.2353901863098145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18175 , TFLOPS: 97.614241195185, Tokens per sec: 79763.10929470025, Loss: 2.2651379108428955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18176 , TFLOPS: 97.2649457106416, Tokens per sec: 79477.69096261413, Loss: 2.2477705478668213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18177 , TFLOPS: 97.83028046798668, Tokens per sec: 79939.64054585212, Loss: 2.2550861835479736 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18178 , TFLOPS: 97.2982511728102, Tokens per sec: 79504.90571311096, Loss: 2.269622325897217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18179 , TFLOPS: 97.87572232640616, Tokens per sec: 79976.77225814418, Loss: 2.225173234939575 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18180 , TFLOPS: 97.80308926949274, Tokens per sec: 79917.421917599, Loss: 2.2494935989379883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18181 , TFLOPS: 97.7536833998106, Tokens per sec: 79877.0511096615, Loss: 2.2741212844848633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18182 , TFLOPS: 95.93996282915617, Tokens per sec: 78395.01334205868, Loss: 2.2402541637420654 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18183 , TFLOPS: 97.16264995128213, Tokens per sec: 79394.1024643144, Loss: 2.246788740158081 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18184 , TFLOPS: 97.81337441221248, Tokens per sec: 79925.82617247844, Loss: 2.250187635421753 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18185 , TFLOPS: 95.97566696935041, Tokens per sec: 78424.18811411773, Loss: 2.2409815788269043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18186 , TFLOPS: 97.60885682764236, Tokens per sec: 79758.7095893752, Loss: 2.2585158348083496 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18187 , TFLOPS: 96.81148668635058, Tokens per sec: 79107.15792079219, Loss: 2.2604219913482666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18188 , TFLOPS: 97.20845360445209, Tokens per sec: 79431.5298084105, Loss: 2.2631590366363525 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18189 , TFLOPS: 96.43737071827971, Tokens per sec: 78801.4581326797, Loss: 2.2750020027160645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18190 , TFLOPS: 96.50947802588936, Tokens per sec: 78860.37886993478, Loss: 2.2636752128601074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18191 , TFLOPS: 96.36967286517707, Tokens per sec: 78746.14047421176, Loss: 2.265012741088867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18192 , TFLOPS: 96.20126980615444, Tokens per sec: 78608.53399960398, Loss: 2.256598949432373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18193 , TFLOPS: 97.1446857269912, Tokens per sec: 79379.42343420601, Loss: 2.2393364906311035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18194 , TFLOPS: 96.3618529448574, Tokens per sec: 78739.75061601591, Loss: 2.247098207473755 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18195 , TFLOPS: 97.31633783776704, Tokens per sec: 79519.68479263938, Loss: 2.264742374420166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18196 , TFLOPS: 96.5652474013907, Tokens per sec: 78905.94946228836, Loss: 2.250319004058838 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18197 , TFLOPS: 98.4215684110264, Tokens per sec: 80422.79714521563, Loss: 2.2506041526794434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18198 , TFLOPS: 96.60586515644488, Tokens per sec: 78939.13927553716, Loss: 2.2687947750091553 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18199 , TFLOPS: 97.83251614404091, Tokens per sec: 79941.46737430747, Loss: 2.2585105895996094 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18200 , TFLOPS: 97.96582700012219, Tokens per sec: 80050.3991065382, Loss: 2.2816758155822754 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018200 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018200/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018200/model.pt +[rank0]:[2024-08-30 16:19:26,104] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.00771340899518691, 'preprocessing_with_comm': 0.0015151039988268167, 'state_converting': 2.6284999450144824, : 2.6394016979902517}) +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018200/optimizer.pt +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018200/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018200/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018200/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018200/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018200/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018200, took 14.89s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016200 + eval ppl=7.388164043426514, eval loss=1.999879240989685 +------------------------------------------------------------------ +iteration: 18201 , TFLOPS: 93.8185693543788, Tokens per sec: 76661.56812429184, Loss: 2.2735469341278076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18202 , TFLOPS: 97.42813654985054, Tokens per sec: 79611.03839823768, Loss: 2.2659006118774414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18203 , TFLOPS: 96.14451691309911, Tokens per sec: 78562.15974973902, Loss: 2.2537665367126465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18204 , TFLOPS: 96.99871118330378, Tokens per sec: 79260.14387683995, Loss: 2.244034767150879 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18205 , TFLOPS: 97.15608228012462, Tokens per sec: 79388.73585114471, Loss: 2.2545361518859863 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18206 , TFLOPS: 97.328872953945, Tokens per sec: 79529.92755875098, Loss: 2.266753673553467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18207 , TFLOPS: 96.49473661915079, Tokens per sec: 78848.33328701112, Loss: 2.239696741104126 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18208 , TFLOPS: 96.74730157647492, Tokens per sec: 79054.71061524107, Loss: 2.258833408355713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18209 , TFLOPS: 97.79502618407255, Tokens per sec: 79910.83336293988, Loss: 2.242696762084961 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18210 , TFLOPS: 97.77576150635464, Tokens per sec: 79895.09169885966, Loss: 2.258481025695801 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18211 , TFLOPS: 98.43328657536217, Tokens per sec: 80432.3723589468, Loss: 2.257431745529175 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18212 , TFLOPS: 96.44261473225124, Tokens per sec: 78805.74315148877, Loss: 2.268787384033203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18213 , TFLOPS: 97.21423445821468, Tokens per sec: 79436.25349283231, Loss: 2.245318651199341 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18214 , TFLOPS: 97.95368337107521, Tokens per sec: 80040.47623463909, Loss: 2.278651475906372 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18215 , TFLOPS: 97.65598761506452, Tokens per sec: 79797.22136903224, Loss: 2.26655650138855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18216 , TFLOPS: 96.45312415950455, Tokens per sec: 78814.33067503417, Loss: 2.270534038543701 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18217 , TFLOPS: 97.70836377331841, Tokens per sec: 79840.01927622376, Loss: 2.269803285598755 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18218 , TFLOPS: 96.59523301390676, Tokens per sec: 78930.45147817356, Loss: 2.2447638511657715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18219 , TFLOPS: 96.50034746931793, Tokens per sec: 78852.91805711882, Loss: 2.2447855472564697 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18220 , TFLOPS: 97.84477643075658, Tokens per sec: 79951.48557019069, Loss: 2.2441632747650146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18221 , TFLOPS: 97.79658723144334, Tokens per sec: 79912.10893493149, Loss: 2.224290132522583 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18222 , TFLOPS: 95.92897768483715, Tokens per sec: 78386.03709785284, Loss: 2.25982928276062 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18223 , TFLOPS: 97.13333498357218, Tokens per sec: 79370.14844956419, Loss: 2.2496445178985596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18224 , TFLOPS: 95.86624777487424, Tokens per sec: 78334.77887361059, Loss: 2.2444937229156494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18225 , TFLOPS: 97.23325993893536, Tokens per sec: 79451.79970288863, Loss: 2.2613658905029297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18226 , TFLOPS: 95.59459212247732, Tokens per sec: 78112.80204699846, Loss: 2.2422034740448 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18227 , TFLOPS: 97.84031226397559, Tokens per sec: 79947.83778459606, Loss: 2.250737190246582 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18228 , TFLOPS: 95.48808494860323, Tokens per sec: 78025.77229348737, Loss: 2.23462176322937 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18229 , TFLOPS: 96.74608379924544, Tokens per sec: 79053.71553811849, Loss: 2.247838020324707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18230 , TFLOPS: 95.94475335396235, Tokens per sec: 78398.92780319689, Loss: 2.2600998878479004 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18231 , TFLOPS: 96.11909847765169, Tokens per sec: 78541.3896918062, Loss: 2.259650945663452 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18232 , TFLOPS: 96.31461049216965, Tokens per sec: 78701.14759179582, Loss: 2.2588295936584473 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18233 , TFLOPS: 95.65735309097857, Tokens per sec: 78164.08564997191, Loss: 2.255539655685425 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18234 , TFLOPS: 96.911920193383, Tokens per sec: 79189.22472477685, Loss: 2.2563037872314453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18235 , TFLOPS: 95.92441329505772, Tokens per sec: 78382.3074174662, Loss: 2.263249635696411 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18236 , TFLOPS: 96.72855668633481, Tokens per sec: 79039.39368297119, Loss: 2.2477784156799316 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18237 , TFLOPS: 95.66963199906948, Tokens per sec: 78174.11906186018, Loss: 2.2469255924224854 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18238 , TFLOPS: 98.45546240689642, Tokens per sec: 80450.49280175011, Loss: 2.245978355407715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18239 , TFLOPS: 96.21170903875623, Tokens per sec: 78617.06416529264, Loss: 2.2365241050720215 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18240 , TFLOPS: 97.94874649118755, Tokens per sec: 80036.44218299628, Loss: 2.2598910331726074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18241 , TFLOPS: 96.61590579589586, Tokens per sec: 78947.3437405016, Loss: 2.2522988319396973 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18242 , TFLOPS: 97.23065105222871, Tokens per sec: 79449.66791440191, Loss: 2.241994857788086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18243 , TFLOPS: 97.26334344826239, Tokens per sec: 79476.38171277818, Loss: 2.2742693424224854 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18244 , TFLOPS: 97.246171526396, Tokens per sec: 79462.35009337655, Loss: 2.266576051712036 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18245 , TFLOPS: 97.79994012137492, Tokens per sec: 79914.84866760565, Loss: 2.2664074897766113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18246 , TFLOPS: 96.13181269734368, Tokens per sec: 78551.77880800965, Loss: 2.258305311203003 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18247 , TFLOPS: 97.81855498974424, Tokens per sec: 79930.05935573956, Loss: 2.260392427444458 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18248 , TFLOPS: 97.83910138236212, Tokens per sec: 79946.84834205883, Loss: 2.260139226913452 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18249 , TFLOPS: 97.01678057443408, Tokens per sec: 79274.90884148004, Loss: 2.2819788455963135 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18250 , TFLOPS: 97.8487531685579, Tokens per sec: 79954.73506501848, Loss: 2.254403591156006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18251 , TFLOPS: 97.08920997884795, Tokens per sec: 79334.09277232536, Loss: 2.2257046699523926 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18252 , TFLOPS: 97.81068816066723, Tokens per sec: 79923.63116719054, Loss: 2.248433828353882 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18253 , TFLOPS: 97.0796839032642, Tokens per sec: 79326.30877074286, Loss: 2.262108325958252 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18254 , TFLOPS: 97.24658538633608, Tokens per sec: 79462.6882689873, Loss: 2.249558448791504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18255 , TFLOPS: 97.77190684880483, Tokens per sec: 79891.941958948, Loss: 2.258376121520996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18256 , TFLOPS: 96.6719588679439, Tokens per sec: 78993.14614860651, Loss: 2.261033058166504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18257 , TFLOPS: 96.57769401474572, Tokens per sec: 78916.11991046507, Loss: 2.2539942264556885 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18258 , TFLOPS: 97.77627137813118, Tokens per sec: 79895.50832821355, Loss: 2.255847930908203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18259 , TFLOPS: 97.85656983407983, Tokens per sec: 79961.12226363532, Loss: 2.263279438018799 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18260 , TFLOPS: 95.28877395610633, Tokens per sec: 77862.91015079677, Loss: 2.2433834075927734 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18261 , TFLOPS: 97.85603406429817, Tokens per sec: 79960.6844723548, Loss: 2.2656283378601074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18262 , TFLOPS: 96.09651047376047, Tokens per sec: 78522.93245235973, Loss: 2.2729978561401367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18263 , TFLOPS: 97.01460886026061, Tokens per sec: 79273.13427792401, Loss: 2.250190258026123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18264 , TFLOPS: 96.09672991144296, Tokens per sec: 78523.11176053893, Loss: 2.252664089202881 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18265 , TFLOPS: 96.62334512055327, Tokens per sec: 78953.42260418444, Loss: 2.2574195861816406 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18266 , TFLOPS: 96.55183663859394, Tokens per sec: 78894.99118278339, Loss: 2.272040843963623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18267 , TFLOPS: 96.18134228892855, Tokens per sec: 78592.25071229877, Loss: 2.2538866996765137 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18268 , TFLOPS: 96.05036957115226, Tokens per sec: 78485.22953306606, Loss: 2.2401041984558105 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18269 , TFLOPS: 96.8145655112386, Tokens per sec: 79109.67370786419, Loss: 2.26056170463562 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18270 , TFLOPS: 95.99830052969044, Tokens per sec: 78442.68257891123, Loss: 2.2448136806488037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18271 , TFLOPS: 95.98040521224961, Tokens per sec: 78428.05985436389, Loss: 2.2554843425750732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18272 , TFLOPS: 97.42896340092362, Tokens per sec: 79611.71403953455, Loss: 2.253993511199951 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18273 , TFLOPS: 95.93596946541459, Tokens per sec: 78391.7502617471, Loss: 2.2385153770446777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18274 , TFLOPS: 97.30506727512906, Tokens per sec: 79510.47532577779, Loss: 2.2824301719665527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18275 , TFLOPS: 95.19498000353693, Tokens per sec: 77786.26869768118, Loss: 2.261343479156494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18276 , TFLOPS: 98.39725307201967, Tokens per sec: 80402.92845578073, Loss: 2.2593994140625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18277 , TFLOPS: 95.61513876728726, Tokens per sec: 78129.59123939036, Loss: 2.2752225399017334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18278 , TFLOPS: 97.86285786077072, Tokens per sec: 79966.26037211265, Loss: 2.2463951110839844 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18279 , TFLOPS: 96.67573446379089, Tokens per sec: 78996.23128516556, Loss: 2.2526767253875732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18280 , TFLOPS: 97.9731462894262, Tokens per sec: 80056.37988624377, Loss: 2.2464661598205566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18281 , TFLOPS: 96.53138954517748, Tokens per sec: 78878.28333639778, Loss: 2.2316901683807373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18282 , TFLOPS: 97.82506101018235, Tokens per sec: 79935.3755925194, Loss: 2.2509546279907227 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18283 , TFLOPS: 97.79782871546657, Tokens per sec: 79913.12338348551, Loss: 2.278057098388672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18284 , TFLOPS: 94.1925916784943, Tokens per sec: 76967.19139351779, Loss: 2.2512474060058594 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18285 , TFLOPS: 97.66744436558768, Tokens per sec: 79806.58297480772, Loss: 2.2359421253204346 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18286 , TFLOPS: 97.73100582263676, Tokens per sec: 79858.52067757999, Loss: 2.254554510116577 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18287 , TFLOPS: 97.69774732819766, Tokens per sec: 79831.34430562399, Loss: 2.2704665660858154 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18288 , TFLOPS: 97.81429212002539, Tokens per sec: 79926.57605515626, Loss: 2.267179250717163 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18289 , TFLOPS: 97.16784017016325, Tokens per sec: 79398.34352577027, Loss: 2.2376534938812256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18290 , TFLOPS: 97.03854195847896, Tokens per sec: 79292.69062857096, Loss: 2.2592921257019043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18291 , TFLOPS: 97.7634139145597, Tokens per sec: 79885.00216374872, Loss: 2.2332329750061035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18292 , TFLOPS: 96.66412260176301, Tokens per sec: 78986.74293378668, Loss: 2.250194787979126 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18293 , TFLOPS: 98.27247043343017, Tokens per sec: 80300.96534959837, Loss: 2.2613844871520996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18294 , TFLOPS: 96.066260679885, Tokens per sec: 78498.21456708512, Loss: 2.2521750926971436 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18295 , TFLOPS: 94.44330179419197, Tokens per sec: 77172.05308290689, Loss: 2.2393221855163574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18296 , TFLOPS: 97.7558575589381, Tokens per sec: 79878.82767105242, Loss: 2.2521724700927734 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18297 , TFLOPS: 97.24420237280545, Tokens per sec: 79460.74104728719, Loss: 2.2268404960632324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18298 , TFLOPS: 96.65377686481155, Tokens per sec: 78978.28916580067, Loss: 2.2681570053100586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18299 , TFLOPS: 97.15955853110928, Tokens per sec: 79391.57638531094, Loss: 2.2717995643615723 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18300 , TFLOPS: 96.09520799875986, Tokens per sec: 78521.86816650804, Loss: 2.269470691680908 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18301 , TFLOPS: 97.88246289493448, Tokens per sec: 79982.28015020635, Loss: 2.2453560829162598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18302 , TFLOPS: 96.05327016755155, Tokens per sec: 78487.59968505192, Loss: 2.2443947792053223 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18303 , TFLOPS: 97.76596527773081, Tokens per sec: 79887.08694827378, Loss: 2.2633304595947266 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18304 , TFLOPS: 96.2513921036627, Tokens per sec: 78649.49021916068, Loss: 2.241401433944702 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18305 , TFLOPS: 95.97562726423502, Tokens per sec: 78424.15567004583, Loss: 2.255002737045288 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18306 , TFLOPS: 97.22474443204307, Tokens per sec: 79444.8414629983, Loss: 2.2518701553344727 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18307 , TFLOPS: 96.10522701944603, Tokens per sec: 78530.05496622325, Loss: 2.2619149684906006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18308 , TFLOPS: 96.81446454950252, Tokens per sec: 79109.59120943065, Loss: 2.246685743331909 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18309 , TFLOPS: 95.41998988020613, Tokens per sec: 77970.13006017709, Loss: 2.260415554046631 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18310 , TFLOPS: 97.25798545516983, Tokens per sec: 79472.00355869523, Loss: 2.261509656906128 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18311 , TFLOPS: 95.83381546829278, Tokens per sec: 78308.27760102104, Loss: 2.245539665222168 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18312 , TFLOPS: 97.31933406976165, Tokens per sec: 79522.13309093205, Loss: 2.2549123764038086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18313 , TFLOPS: 94.52658543550393, Tokens per sec: 77240.10629013454, Loss: 2.2352731227874756 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18314 , TFLOPS: 98.39747805395173, Tokens per sec: 80403.11229430897, Loss: 2.25919771194458 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18315 , TFLOPS: 95.72060301436531, Tokens per sec: 78215.76879056908, Loss: 2.2580175399780273 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18316 , TFLOPS: 96.68509854188484, Tokens per sec: 79003.88291442895, Loss: 2.2586417198181152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18317 , TFLOPS: 96.73420857409226, Tokens per sec: 79044.01198595033, Loss: 2.247406005859375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18318 , TFLOPS: 97.93792486371999, Tokens per sec: 80027.59955261907, Loss: 2.2407259941101074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18319 , TFLOPS: 96.79012206715156, Tokens per sec: 79089.70034047044, Loss: 2.236025094985962 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18320 , TFLOPS: 97.86693492997304, Tokens per sec: 79969.59185031097, Loss: 2.2648143768310547 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18321 , TFLOPS: 97.82242996025407, Tokens per sec: 79933.22569389376, Loss: 2.241689920425415 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18322 , TFLOPS: 95.6436887499427, Tokens per sec: 78152.92016516005, Loss: 2.2590384483337402 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18323 , TFLOPS: 97.78783188129012, Tokens per sec: 79904.95471293853, Loss: 2.251690149307251 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18324 , TFLOPS: 97.26419644577028, Tokens per sec: 79477.07871899998, Loss: 2.264070510864258 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18325 , TFLOPS: 97.70158292421493, Tokens per sec: 79834.47846986672, Loss: 2.272298812866211 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18326 , TFLOPS: 97.20288588190849, Tokens per sec: 79426.98027899419, Loss: 2.2548816204071045 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18327 , TFLOPS: 97.24786801084889, Tokens per sec: 79463.73633449442, Loss: 2.256993293762207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18328 , TFLOPS: 97.74723931877485, Tokens per sec: 79871.78548516177, Loss: 2.2662949562072754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18329 , TFLOPS: 97.8582691939422, Tokens per sec: 79962.5108543243, Loss: 2.2521512508392334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18330 , TFLOPS: 96.68588314764091, Tokens per sec: 79004.52403598979, Loss: 2.24505877494812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18331 , TFLOPS: 97.7426126850352, Tokens per sec: 79868.00494363296, Loss: 2.2464311122894287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18332 , TFLOPS: 95.86236100139114, Tokens per sec: 78331.6028909431, Loss: 2.2424402236938477 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18333 , TFLOPS: 96.61606048122711, Tokens per sec: 78947.47013786773, Loss: 2.248368740081787 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18334 , TFLOPS: 98.37133318368203, Tokens per sec: 80381.74864778276, Loss: 2.2564618587493896 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18335 , TFLOPS: 97.87411302139029, Tokens per sec: 79975.45725358851, Loss: 2.255488634109497 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18336 , TFLOPS: 95.91434269879207, Tokens per sec: 78374.07847401734, Loss: 2.231950283050537 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18337 , TFLOPS: 97.82759357419225, Tokens per sec: 79937.44501576589, Loss: 2.2493908405303955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18338 , TFLOPS: 96.61215968858943, Tokens per sec: 78944.28269978815, Loss: 2.2586302757263184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18339 , TFLOPS: 97.78110606633601, Tokens per sec: 79899.45887640165, Loss: 2.244091272354126 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18340 , TFLOPS: 96.07441955439538, Tokens per sec: 78504.88139347566, Loss: 2.2417447566986084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18341 , TFLOPS: 97.25197225326521, Tokens per sec: 79467.09001662537, Loss: 2.2581369876861572 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18342 , TFLOPS: 96.70596827310652, Tokens per sec: 79020.93610904487, Loss: 2.260834217071533 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18343 , TFLOPS: 96.04670300646258, Tokens per sec: 78482.23348867235, Loss: 2.2442891597747803 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18344 , TFLOPS: 97.22170856336456, Tokens per sec: 79442.36077655088, Loss: 2.2680277824401855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18345 , TFLOPS: 96.76415188107971, Tokens per sec: 79068.47943289907, Loss: 2.2533369064331055 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18346 , TFLOPS: 96.08427740032056, Tokens per sec: 78512.93649314465, Loss: 2.2579779624938965 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18347 , TFLOPS: 96.19631711060784, Tokens per sec: 78604.48702457911, Loss: 2.2445740699768066 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18348 , TFLOPS: 96.61863813007122, Tokens per sec: 78949.57640109284, Loss: 2.252117395401001 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18349 , TFLOPS: 96.57811371815227, Tokens per sec: 78916.4628609227, Loss: 2.2584035396575928 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18350 , TFLOPS: 97.40421483791326, Tokens per sec: 79591.49135161407, Loss: 2.231780767440796 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18351 , TFLOPS: 94.46769227273697, Tokens per sec: 77191.98317079278, Loss: 2.2662837505340576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18352 , TFLOPS: 98.40491237004144, Tokens per sec: 80409.1870653622, Loss: 2.254512310028076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18353 , TFLOPS: 95.64884939249843, Tokens per sec: 78157.1370590391, Loss: 2.2559165954589844 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18354 , TFLOPS: 96.63228806861625, Tokens per sec: 78960.73011725866, Loss: 2.271191358566284 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18355 , TFLOPS: 96.56875432658133, Tokens per sec: 78908.81506113791, Loss: 2.247539758682251 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18356 , TFLOPS: 97.40523693801245, Tokens per sec: 79592.32653591633, Loss: 2.2633144855499268 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18357 , TFLOPS: 96.7371521940381, Tokens per sec: 79046.4172935836, Loss: 2.2463393211364746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18358 , TFLOPS: 97.1431846465691, Tokens per sec: 79378.19686275187, Loss: 2.2346153259277344 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18359 , TFLOPS: 97.85663540785497, Tokens per sec: 79961.17584565502, Loss: 2.2715811729431152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18360 , TFLOPS: 95.6054918239051, Tokens per sec: 78121.70847361838, Loss: 2.251624584197998 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18361 , TFLOPS: 97.78491593350537, Tokens per sec: 79902.57201694044, Loss: 2.2394726276397705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18362 , TFLOPS: 97.23458482066508, Tokens per sec: 79452.88229785548, Loss: 2.2579469680786133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18363 , TFLOPS: 98.42320177894526, Tokens per sec: 80424.13181218876, Loss: 2.2411487102508545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18364 , TFLOPS: 96.59979732934835, Tokens per sec: 78934.18109781682, Loss: 2.234469413757324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18365 , TFLOPS: 97.22691091646352, Tokens per sec: 79446.61175318851, Loss: 2.2562613487243652 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18366 , TFLOPS: 97.83488860898638, Tokens per sec: 79943.40597648708, Loss: 2.2419612407684326 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18367 , TFLOPS: 97.0855583755142, Tokens per sec: 79331.1089532407, Loss: 2.2528252601623535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18368 , TFLOPS: 96.04296464003784, Tokens per sec: 78479.17877323258, Loss: 2.239596128463745 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18369 , TFLOPS: 97.87146241927026, Tokens per sec: 79973.29137836368, Loss: 2.271435499191284 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18370 , TFLOPS: 96.55098208016489, Tokens per sec: 78894.2929010927, Loss: 2.265843152999878 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18371 , TFLOPS: 95.97056743400678, Tokens per sec: 78420.02115251473, Loss: 2.2623097896575928 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18372 , TFLOPS: 98.37909174560737, Tokens per sec: 80388.08836845511, Loss: 2.2752771377563477 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18373 , TFLOPS: 97.82634074888834, Tokens per sec: 79936.42129995947, Loss: 2.250948667526245 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18374 , TFLOPS: 96.21561696307734, Tokens per sec: 78620.25743085434, Loss: 2.2343688011169434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18375 , TFLOPS: 97.17235886270014, Tokens per sec: 79402.03586576367, Loss: 2.2415757179260254 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18376 , TFLOPS: 96.56192595109324, Tokens per sec: 78903.23541974858, Loss: 2.2564358711242676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18377 , TFLOPS: 97.86632665974145, Tokens per sec: 79969.09481704647, Loss: 2.2631094455718994 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18378 , TFLOPS: 95.437882700158, Tokens per sec: 77984.75074396194, Loss: 2.255023717880249 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18379 , TFLOPS: 97.14856572415066, Tokens per sec: 79382.593879765, Loss: 2.2541561126708984 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18380 , TFLOPS: 96.19172224327129, Tokens per sec: 78600.732440196, Loss: 2.247922897338867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18381 , TFLOPS: 96.09001186869845, Tokens per sec: 78517.62227487458, Loss: 2.267547845840454 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18382 , TFLOPS: 97.29907638083579, Tokens per sec: 79505.58001183142, Loss: 2.2520952224731445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18383 , TFLOPS: 95.32183665256282, Tokens per sec: 77889.92653118088, Loss: 2.2433624267578125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18384 , TFLOPS: 96.84587101170659, Tokens per sec: 79135.25423817185, Loss: 2.242175340652466 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18385 , TFLOPS: 96.63993216466388, Tokens per sec: 78966.97630491572, Loss: 2.2558953762054443 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18386 , TFLOPS: 97.75586228482075, Tokens per sec: 79878.83153269278, Loss: 2.2463693618774414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18387 , TFLOPS: 96.55730920275552, Tokens per sec: 78899.46295583613, Loss: 2.237370252609253 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18388 , TFLOPS: 97.72506435002711, Tokens per sec: 79853.66574736353, Loss: 2.257627010345459 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18389 , TFLOPS: 95.50326660484002, Tokens per sec: 78038.17761560905, Loss: 2.273174285888672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18390 , TFLOPS: 98.37672658009639, Tokens per sec: 80386.1557308313, Loss: 2.2297425270080566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18391 , TFLOPS: 96.21344406424952, Tokens per sec: 78618.48189928674, Loss: 2.247868299484253 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18392 , TFLOPS: 95.50801192997176, Tokens per sec: 78042.05514292973, Loss: 2.2657461166381836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18393 , TFLOPS: 96.64008195366883, Tokens per sec: 78967.09870136758, Loss: 2.240797519683838 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18394 , TFLOPS: 97.93077546964771, Tokens per sec: 80021.75759868082, Loss: 2.2660763263702393 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18395 , TFLOPS: 96.66402934433037, Tokens per sec: 78986.66673073781, Loss: 2.2582051753997803 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18396 , TFLOPS: 97.79624472513859, Tokens per sec: 79911.82906421299, Loss: 2.253936290740967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18397 , TFLOPS: 96.77840122704056, Tokens per sec: 79080.12294029449, Loss: 2.2275688648223877 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18398 , TFLOPS: 96.14112747423802, Tokens per sec: 78559.3901519937, Loss: 2.2634365558624268 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18399 , TFLOPS: 98.38326141732209, Tokens per sec: 80391.4955145501, Loss: 2.272749423980713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18400 , TFLOPS: 96.14019815067068, Tokens per sec: 78558.63077778392, Loss: 2.241210699081421 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018400 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018400/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018400/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018400/optimizer.pt +[rank0]:[2024-08-30 17:15:01,549] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.0076681589998770505, 'preprocessing_with_comm': 0.0017957759846467525, 'state_converting': 2.644042169005843, : 2.655120956012979}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018400/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018400/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018400/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018400/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018400/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018400, took 14.94s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016400 + eval ppl=8.050795555114746, eval loss=2.085770845413208 +------------------------------------------------------------------ +iteration: 18401 , TFLOPS: 95.7779182613886, Tokens per sec: 78262.60255433753, Loss: 2.237636089324951 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18402 , TFLOPS: 96.18599661450695, Tokens per sec: 78596.05388154188, Loss: 2.2607524394989014 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18403 , TFLOPS: 97.15651283415727, Tokens per sec: 79389.08766792822, Loss: 2.2870988845825195 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18404 , TFLOPS: 95.54681553060308, Tokens per sec: 78073.76256389913, Loss: 2.2723934650421143 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18405 , TFLOPS: 96.76863604977233, Tokens per sec: 79072.14356257058, Loss: 2.246157169342041 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18406 , TFLOPS: 97.41082925531553, Tokens per sec: 79596.89616234374, Loss: 2.261984348297119 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18407 , TFLOPS: 96.09389228017909, Tokens per sec: 78520.79305898609, Loss: 2.2600226402282715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18408 , TFLOPS: 97.04571889260725, Tokens per sec: 79298.55508619791, Loss: 2.2614545822143555 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18409 , TFLOPS: 97.76508712800833, Tokens per sec: 79886.36938952995, Loss: 2.2686216831207275 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18410 , TFLOPS: 96.5237415059427, Tokens per sec: 78872.033926661, Loss: 2.235339879989624 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18411 , TFLOPS: 98.3166991319169, Tokens per sec: 80337.10575767986, Loss: 2.2604174613952637 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18412 , TFLOPS: 97.74456779619874, Tokens per sec: 79869.60251529374, Loss: 2.234773635864258 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18413 , TFLOPS: 97.82939730592481, Tokens per sec: 79938.91889139675, Loss: 2.2507050037384033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18414 , TFLOPS: 96.71438911219659, Tokens per sec: 79027.81699343702, Loss: 2.249922275543213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18415 , TFLOPS: 97.84430437969397, Tokens per sec: 79951.09984511575, Loss: 2.247748851776123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18416 , TFLOPS: 98.42745156593597, Tokens per sec: 80427.60442253812, Loss: 2.246971607208252 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18417 , TFLOPS: 96.5776071399098, Tokens per sec: 78916.0489228003, Loss: 2.2576537132263184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18418 , TFLOPS: 97.16724656832301, Tokens per sec: 79397.85847842596, Loss: 2.2564992904663086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18419 , TFLOPS: 97.05858941273117, Tokens per sec: 79309.07191950783, Loss: 2.246117115020752 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18420 , TFLOPS: 97.7741162323975, Tokens per sec: 79893.74730315615, Loss: 2.2659921646118164 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18421 , TFLOPS: 96.61350590952534, Tokens per sec: 78945.3827315697, Loss: 2.2788798809051514 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18422 , TFLOPS: 96.71676611331324, Tokens per sec: 79029.7593022389, Loss: 2.2606000900268555 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18423 , TFLOPS: 97.58150321874542, Tokens per sec: 79736.35825140098, Loss: 2.2703640460968018 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18424 , TFLOPS: 97.31683524349478, Tokens per sec: 79520.09123566387, Loss: 2.2384262084960938 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18425 , TFLOPS: 96.5888557790564, Tokens per sec: 78925.24047541272, Loss: 2.2548398971557617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18426 , TFLOPS: 97.85828856550017, Tokens per sec: 79962.52668332297, Loss: 2.2476723194122314 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18427 , TFLOPS: 96.66110590198397, Tokens per sec: 78984.27791074046, Loss: 2.242964506149292 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18428 , TFLOPS: 97.74332693808879, Tokens per sec: 79868.58857818955, Loss: 2.2645373344421387 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18429 , TFLOPS: 97.15376353234524, Tokens per sec: 79386.84114264429, Loss: 2.2390706539154053 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18430 , TFLOPS: 95.96481780622189, Tokens per sec: 78415.3229836431, Loss: 2.2480616569519043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18431 , TFLOPS: 97.30990720296674, Tokens per sec: 79514.43015540477, Loss: 2.248405694961548 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18432 , TFLOPS: 96.32477286828141, Tokens per sec: 78709.45153091957, Loss: 2.235485076904297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18433 , TFLOPS: 96.69970322153766, Tokens per sec: 79015.81677413118, Loss: 2.2496650218963623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18434 , TFLOPS: 96.67478379240583, Tokens per sec: 78995.45446710438, Loss: 2.2259836196899414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18435 , TFLOPS: 97.17225378347356, Tokens per sec: 79401.95000282265, Loss: 2.245967388153076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18436 , TFLOPS: 97.17759864104545, Tokens per sec: 79406.3174235335, Loss: 2.2626800537109375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18437 , TFLOPS: 96.63787217670269, Tokens per sec: 78965.29303572381, Loss: 2.2600090503692627 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18438 , TFLOPS: 96.66218041168742, Tokens per sec: 78985.15592027949, Loss: 2.280752182006836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18439 , TFLOPS: 96.35124228344444, Tokens per sec: 78731.08037143265, Loss: 2.238515615463257 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18440 , TFLOPS: 96.56167462189318, Tokens per sec: 78903.03005218941, Loss: 2.2621593475341797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18441 , TFLOPS: 97.23838794495518, Tokens per sec: 79455.98992861404, Loss: 2.23895525932312 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18442 , TFLOPS: 94.65906511075148, Tokens per sec: 77348.35884310954, Loss: 2.2624149322509766 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18443 , TFLOPS: 97.80956503823458, Tokens per sec: 79922.71343493908, Loss: 2.2651584148406982 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18444 , TFLOPS: 97.31703861928696, Tokens per sec: 79520.25741925903, Loss: 2.266169786453247 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18445 , TFLOPS: 97.30582082597029, Tokens per sec: 79511.09107156828, Loss: 2.2626230716705322 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18446 , TFLOPS: 96.47152222190309, Tokens per sec: 78829.36420542827, Loss: 2.2295665740966797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18447 , TFLOPS: 98.3550775016663, Tokens per sec: 80368.46571154922, Loss: 2.2386696338653564 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18448 , TFLOPS: 96.62492854690842, Tokens per sec: 78954.71646262024, Loss: 2.246114730834961 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18449 , TFLOPS: 97.80778399309577, Tokens per sec: 79921.25809710797, Loss: 2.2341582775115967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18450 , TFLOPS: 96.62349079380934, Tokens per sec: 78953.54163755193, Loss: 2.229379177093506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18451 , TFLOPS: 97.79306705253147, Tokens per sec: 79909.23250612513, Loss: 2.238632917404175 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18452 , TFLOPS: 96.7088029266576, Tokens per sec: 79023.25237743193, Loss: 2.2551870346069336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18453 , TFLOPS: 97.28289344488024, Tokens per sec: 79492.35651827631, Loss: 2.24566650390625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18454 , TFLOPS: 98.42399254893415, Tokens per sec: 80424.77797070291, Loss: 2.225358009338379 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18455 , TFLOPS: 96.08536507379908, Tokens per sec: 78513.82525914317, Loss: 2.2721493244171143 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18456 , TFLOPS: 97.19960806775704, Tokens per sec: 79424.3018926726, Loss: 2.2760705947875977 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18457 , TFLOPS: 97.21672434508913, Tokens per sec: 79438.28804349282, Loss: 2.2616467475891113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18458 , TFLOPS: 97.82224766206414, Tokens per sec: 79933.07673335, Loss: 2.2603161334991455 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18459 , TFLOPS: 95.60001226300425, Tokens per sec: 78117.23098334984, Loss: 2.2514750957489014 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18460 , TFLOPS: 97.40924362256656, Tokens per sec: 79595.60050100673, Loss: 2.2524185180664062 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18461 , TFLOPS: 96.60228512198208, Tokens per sec: 78936.21393721933, Loss: 2.2673561573028564 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18462 , TFLOPS: 96.74130066792989, Tokens per sec: 79049.8071183918, Loss: 2.235352039337158 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18463 , TFLOPS: 96.72375381668073, Tokens per sec: 79035.46913453996, Loss: 2.2715964317321777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18464 , TFLOPS: 97.37827937829937, Tokens per sec: 79570.29881992545, Loss: 2.2457942962646484 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18465 , TFLOPS: 96.68644457212893, Tokens per sec: 79004.9827903913, Loss: 2.2324328422546387 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18466 , TFLOPS: 96.86325324376718, Tokens per sec: 79149.45770744701, Loss: 2.227740526199341 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18467 , TFLOPS: 96.47436731391348, Tokens per sec: 78831.68900334915, Loss: 2.2628777027130127 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18468 , TFLOPS: 96.6794560353346, Tokens per sec: 78999.27227707571, Loss: 2.275103807449341 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18469 , TFLOPS: 97.14511536868518, Tokens per sec: 79379.77450549412, Loss: 2.262007713317871 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18470 , TFLOPS: 96.2785707762885, Tokens per sec: 78671.6986122045, Loss: 2.2667064666748047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18471 , TFLOPS: 97.2425349503874, Tokens per sec: 79459.37855350613, Loss: 2.2615878582000732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18472 , TFLOPS: 96.10736142589433, Tokens per sec: 78531.79904467666, Loss: 2.2589056491851807 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18473 , TFLOPS: 97.43254717153783, Tokens per sec: 79614.64243075787, Loss: 2.256622791290283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18474 , TFLOPS: 96.57601703285751, Tokens per sec: 78914.74960539486, Loss: 2.2568631172180176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18475 , TFLOPS: 96.12056307529622, Tokens per sec: 78542.58645224363, Loss: 2.2418923377990723 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18476 , TFLOPS: 97.21856068834622, Tokens per sec: 79439.78856683988, Loss: 2.243213653564453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18477 , TFLOPS: 96.28736533513718, Tokens per sec: 78678.88487263185, Loss: 2.2539849281311035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18478 , TFLOPS: 96.7970755928713, Tokens per sec: 79095.38224533571, Loss: 2.284336566925049 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18479 , TFLOPS: 96.65318522177965, Tokens per sec: 78977.80571904905, Loss: 2.2662429809570312 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18480 , TFLOPS: 96.31209656864411, Tokens per sec: 78699.09340016895, Loss: 2.2307381629943848 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18481 , TFLOPS: 97.71383641261099, Tokens per sec: 79844.491110668, Loss: 2.2412760257720947 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18482 , TFLOPS: 97.27548346668357, Tokens per sec: 79486.30163434199, Loss: 2.2689576148986816 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18483 , TFLOPS: 97.86102232698543, Tokens per sec: 79964.76051020584, Loss: 2.2727694511413574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18484 , TFLOPS: 97.17201613353276, Tokens per sec: 79401.75581293837, Loss: 2.2749688625335693 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18485 , TFLOPS: 97.85949215644042, Tokens per sec: 79963.51016846344, Loss: 2.2392404079437256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18486 , TFLOPS: 97.1317506745985, Tokens per sec: 79368.85386991806, Loss: 2.2447896003723145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18487 , TFLOPS: 96.51016724668686, Tokens per sec: 78860.94204999054, Loss: 2.241631269454956 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18488 , TFLOPS: 96.5938458169687, Tokens per sec: 78929.31796384585, Loss: 2.240285873413086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18489 , TFLOPS: 97.32137165688405, Tokens per sec: 79523.79805582191, Loss: 2.27323317527771 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18490 , TFLOPS: 97.11575481781948, Tokens per sec: 79355.78324357393, Loss: 2.2498996257781982 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18491 , TFLOPS: 96.00059759248028, Tokens per sec: 78444.55956804845, Loss: 2.2713418006896973 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18492 , TFLOPS: 98.41923535416086, Tokens per sec: 80420.89074439257, Loss: 2.245537281036377 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18493 , TFLOPS: 94.82806787628614, Tokens per sec: 77486.45535334614, Loss: 2.2662460803985596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18494 , TFLOPS: 97.77420153443461, Tokens per sec: 79893.81700564655, Loss: 2.273195266723633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18495 , TFLOPS: 97.27724717648785, Tokens per sec: 79487.742807025, Loss: 2.2595443725585938 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18496 , TFLOPS: 96.66659246074045, Tokens per sec: 78988.76111913695, Loss: 2.261086940765381 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18497 , TFLOPS: 96.66251916398947, Tokens per sec: 78985.43272350574, Loss: 2.2515532970428467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18498 , TFLOPS: 96.2214317600471, Tokens per sec: 78625.00885114452, Loss: 2.234815835952759 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18499 , TFLOPS: 97.82977045623859, Tokens per sec: 79939.22380212389, Loss: 2.248433828353882 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18500 , TFLOPS: 96.79965790817867, Tokens per sec: 79097.49232164808, Loss: 2.2594430446624756 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18501 , TFLOPS: 97.37151011086534, Tokens per sec: 79564.76747724865, Loss: 2.2529537677764893 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18502 , TFLOPS: 97.13503748814223, Tokens per sec: 79371.53960987474, Loss: 2.2708420753479004 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18503 , TFLOPS: 96.86092067933765, Tokens per sec: 79147.55170900612, Loss: 2.266145706176758 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18504 , TFLOPS: 97.20594366765322, Tokens per sec: 79429.47887444061, Loss: 2.254504919052124 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18505 , TFLOPS: 97.81257824078942, Tokens per sec: 79925.17560031336, Loss: 2.2571020126342773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18506 , TFLOPS: 95.91746815578152, Tokens per sec: 78376.63236537983, Loss: 2.263112783432007 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18507 , TFLOPS: 97.8031216065859, Tokens per sec: 79917.44834107027, Loss: 2.2478528022766113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18508 , TFLOPS: 95.95600660007146, Tokens per sec: 78408.12312027696, Loss: 2.255830764770508 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18509 , TFLOPS: 97.3711188539573, Tokens per sec: 79564.44777115724, Loss: 2.2344672679901123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18510 , TFLOPS: 96.62929391838345, Tokens per sec: 78958.28352002717, Loss: 2.2694923877716064 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18511 , TFLOPS: 97.2880774245489, Tokens per sec: 79496.59248151108, Loss: 2.2590274810791016 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18512 , TFLOPS: 96.63669831395843, Tokens per sec: 78964.33384225758, Loss: 2.2534821033477783 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18513 , TFLOPS: 96.07775399247511, Tokens per sec: 78507.60604866668, Loss: 2.230334520339966 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18514 , TFLOPS: 96.69009909413732, Tokens per sec: 79007.96899440011, Loss: 2.252681016921997 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18515 , TFLOPS: 96.16453081582398, Tokens per sec: 78578.5136248592, Loss: 2.253875255584717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18516 , TFLOPS: 97.33670355974037, Tokens per sec: 79536.32614832411, Loss: 2.266857624053955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18517 , TFLOPS: 96.08569681109722, Tokens per sec: 78514.09633022915, Loss: 2.257427930831909 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18518 , TFLOPS: 96.01856589464185, Tokens per sec: 78459.24193028994, Loss: 2.252577781677246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18519 , TFLOPS: 97.84491427257166, Tokens per sec: 79951.59820428616, Loss: 2.2439897060394287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18520 , TFLOPS: 96.41155603218152, Tokens per sec: 78780.36428813974, Loss: 2.2577998638153076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18521 , TFLOPS: 97.36691581162671, Tokens per sec: 79561.01335707298, Loss: 2.2546422481536865 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18522 , TFLOPS: 97.80967859634538, Tokens per sec: 79922.80622619465, Loss: 2.2700817584991455 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18523 , TFLOPS: 97.13353524845097, Tokens per sec: 79370.31209115192, Loss: 2.245626211166382 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18524 , TFLOPS: 97.70747142467202, Tokens per sec: 79839.29011517369, Loss: 2.25618314743042 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18525 , TFLOPS: 96.05724003533383, Tokens per sec: 78490.84356621031, Loss: 2.2612991333007812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18526 , TFLOPS: 97.29803928497252, Tokens per sec: 79504.73257410432, Loss: 2.283156394958496 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18527 , TFLOPS: 96.45275740450013, Tokens per sec: 78814.03099007894, Loss: 2.236176013946533 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18528 , TFLOPS: 97.3527142487888, Tokens per sec: 79549.40889449738, Loss: 2.254927158355713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18529 , TFLOPS: 93.85902985595564, Tokens per sec: 76694.62944167625, Loss: 2.233891487121582 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18530 , TFLOPS: 97.88784476398254, Tokens per sec: 79986.67781394714, Loss: 2.2848353385925293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18531 , TFLOPS: 96.65975446701313, Tokens per sec: 78983.17361843675, Loss: 2.2547945976257324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18532 , TFLOPS: 97.83263525159198, Tokens per sec: 79941.56470015353, Loss: 2.260094404220581 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18533 , TFLOPS: 95.99529452760127, Tokens per sec: 78440.2262972228, Loss: 2.2511508464813232 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18534 , TFLOPS: 97.19318258958882, Tokens per sec: 79419.05146905482, Loss: 2.25978684425354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18535 , TFLOPS: 97.38446275515876, Tokens per sec: 79575.35141633154, Loss: 2.243595838546753 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18536 , TFLOPS: 95.62470240304593, Tokens per sec: 78137.40593235866, Loss: 2.2502946853637695 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18537 , TFLOPS: 97.76351300184952, Tokens per sec: 79885.08313052397, Loss: 2.254448413848877 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18538 , TFLOPS: 95.64704808789273, Tokens per sec: 78155.66516667607, Loss: 2.270264148712158 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18539 , TFLOPS: 97.83503314164561, Tokens per sec: 79943.52407784354, Loss: 2.22725510597229 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18540 , TFLOPS: 96.78333078804972, Tokens per sec: 79084.15101149326, Loss: 2.274857997894287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18541 , TFLOPS: 97.40451242550355, Tokens per sec: 79591.7345180947, Loss: 2.2378783226013184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18542 , TFLOPS: 96.16310491686896, Tokens per sec: 78577.34848611715, Loss: 2.2732465267181396 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18543 , TFLOPS: 97.8257594521641, Tokens per sec: 79935.9463074422, Loss: 2.2685744762420654 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18544 , TFLOPS: 95.94895542171162, Tokens per sec: 78402.36142092565, Loss: 2.2504758834838867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18545 , TFLOPS: 97.86994801764499, Tokens per sec: 79972.0539218115, Loss: 2.261101484298706 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18546 , TFLOPS: 96.27188457063039, Tokens per sec: 78666.23514144303, Loss: 2.2347044944763184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18547 , TFLOPS: 96.36321289196924, Tokens per sec: 78740.86186380914, Loss: 2.2302205562591553 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18548 , TFLOPS: 98.39190451438348, Tokens per sec: 80398.55801165207, Loss: 2.267336368560791 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18549 , TFLOPS: 95.65019175029437, Tokens per sec: 78158.23393414952, Loss: 2.2536911964416504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18550 , TFLOPS: 97.09474634971967, Tokens per sec: 79338.6166834835, Loss: 2.2539966106414795 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18551 , TFLOPS: 96.62395715087078, Tokens per sec: 78953.92270991193, Loss: 2.251319169998169 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18552 , TFLOPS: 97.76400787011941, Tokens per sec: 79885.48750012634, Loss: 2.2494735717773438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18553 , TFLOPS: 96.91995783675088, Tokens per sec: 79195.7924900801, Loss: 2.28652286529541 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18554 , TFLOPS: 97.93223508659091, Tokens per sec: 80022.950289259, Loss: 2.270054817199707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18555 , TFLOPS: 96.53094366203862, Tokens per sec: 78877.91899380696, Loss: 2.268929958343506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18556 , TFLOPS: 96.23385084881993, Tokens per sec: 78635.15680827659, Loss: 2.2454495429992676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18557 , TFLOPS: 96.58716535380304, Tokens per sec: 78923.85918542279, Loss: 2.2579827308654785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18558 , TFLOPS: 97.17801150550648, Tokens per sec: 79406.6547857127, Loss: 2.2676782608032227 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18559 , TFLOPS: 97.19585296292355, Tokens per sec: 79421.23349984817, Loss: 2.284669876098633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18560 , TFLOPS: 97.376469040067, Tokens per sec: 79568.81954595352, Loss: 2.2588818073272705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18561 , TFLOPS: 97.88416694931533, Tokens per sec: 79983.67257690719, Loss: 2.242525100708008 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18562 , TFLOPS: 96.62491951666699, Tokens per sec: 78954.70908377752, Loss: 2.24814510345459 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18563 , TFLOPS: 97.77977809343928, Tokens per sec: 79898.37375556266, Loss: 2.2463324069976807 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18564 , TFLOPS: 96.64175143558828, Tokens per sec: 78968.46287802025, Loss: 2.2471141815185547 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18565 , TFLOPS: 97.25779432670859, Tokens per sec: 79471.84738270947, Loss: 2.2631919384002686 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18566 , TFLOPS: 97.79079727079515, Tokens per sec: 79907.37780903901, Loss: 2.257451057434082 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18567 , TFLOPS: 96.0089537851326, Tokens per sec: 78451.38762817222, Loss: 2.2437236309051514 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18568 , TFLOPS: 97.78195953530467, Tokens per sec: 79900.15626786617, Loss: 2.2573583126068115 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18569 , TFLOPS: 96.59068103004506, Tokens per sec: 78926.7319349817, Loss: 2.2180428504943848 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18570 , TFLOPS: 97.04903677685743, Tokens per sec: 79301.26621482868, Loss: 2.250563383102417 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18571 , TFLOPS: 96.60705620206187, Tokens per sec: 78940.11250957104, Loss: 2.2441508769989014 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18572 , TFLOPS: 97.27767865174927, Tokens per sec: 79488.09537656826, Loss: 2.257495880126953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18573 , TFLOPS: 96.6551313443053, Tokens per sec: 78979.39594586242, Loss: 2.2440834045410156 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18574 , TFLOPS: 95.11079069760439, Tokens per sec: 77717.47544857816, Loss: 2.265897274017334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18575 , TFLOPS: 97.24388636388106, Tokens per sec: 79460.48282826027, Loss: 2.261209011077881 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18576 , TFLOPS: 95.48014501184353, Tokens per sec: 78019.28436676887, Loss: 2.2550151348114014 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18577 , TFLOPS: 97.9816541611094, Tokens per sec: 80063.33188720822, Loss: 2.295487880706787 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18578 , TFLOPS: 96.76691683229647, Tokens per sec: 79070.73874571447, Loss: 2.2539994716644287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18579 , TFLOPS: 97.38307909880105, Tokens per sec: 79574.22079510316, Loss: 2.2785463333129883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18580 , TFLOPS: 96.42813011852124, Tokens per sec: 78793.90740074284, Loss: 2.24711012840271 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18581 , TFLOPS: 97.37072902332437, Tokens per sec: 79564.12923051196, Loss: 2.277704954147339 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18582 , TFLOPS: 97.23444896972157, Tokens per sec: 79452.77129055242, Loss: 2.2624568939208984 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18583 , TFLOPS: 97.77062521177825, Tokens per sec: 79890.8947003413, Loss: 2.2491960525512695 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18584 , TFLOPS: 97.1652351159067, Tokens per sec: 79396.21486887718, Loss: 2.270190954208374 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18585 , TFLOPS: 96.11814378491137, Tokens per sec: 78540.6095877921, Loss: 2.2435765266418457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18586 , TFLOPS: 98.43900416808259, Tokens per sec: 80437.04435114257, Loss: 2.228410243988037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18587 , TFLOPS: 96.74909287522279, Tokens per sec: 79056.17433156008, Loss: 2.2489757537841797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18588 , TFLOPS: 97.78084221710917, Tokens per sec: 79899.24327840631, Loss: 2.2401015758514404 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18589 , TFLOPS: 96.49584851522835, Tokens per sec: 78849.24184591885, Loss: 2.2579939365386963 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18590 , TFLOPS: 97.75738412715468, Tokens per sec: 79880.07506923954, Loss: 2.2595484256744385 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18591 , TFLOPS: 96.50625164709771, Tokens per sec: 78857.74251276974, Loss: 2.277393341064453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18592 , TFLOPS: 97.86007763553694, Tokens per sec: 79963.98857850477, Loss: 2.2559547424316406 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18593 , TFLOPS: 97.89509545798272, Tokens per sec: 79992.60254266416, Loss: 2.2286174297332764 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18594 , TFLOPS: 95.65058473539396, Tokens per sec: 78158.55505239073, Loss: 2.244657039642334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18595 , TFLOPS: 97.80960663264105, Tokens per sec: 79922.74742279934, Loss: 2.242149829864502 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18596 , TFLOPS: 97.20372697989453, Tokens per sec: 79427.66756181038, Loss: 2.256293535232544 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18597 , TFLOPS: 97.2827648385909, Tokens per sec: 79492.25143076667, Loss: 2.2536590099334717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18598 , TFLOPS: 97.88186579866614, Tokens per sec: 79981.79224747485, Loss: 2.249253511428833 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18599 , TFLOPS: 98.40804449556103, Tokens per sec: 80411.74640575232, Loss: 2.2574098110198975 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18600 , TFLOPS: 96.54570520788762, Tokens per sec: 78889.98103291618, Loss: 2.2668397426605225 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018600 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018600/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018600/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018600/optimizer.pt +[rank0]:[2024-08-30 18:10:33,875] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007805975998053327, 'preprocessing_with_comm': 0.0018740890081971884, 'state_converting': 2.593130670982646, : 2.604466896998929}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018600/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018600/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018600/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018600/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018600/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018600, took 14.97s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016600 + eval ppl=7.268348217010498, eval loss=1.983528971672058 +------------------------------------------------------------------ +iteration: 18601 , TFLOPS: 95.76585285631026, Tokens per sec: 78252.74360125685, Loss: 2.2611513137817383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18602 , TFLOPS: 96.88174517973965, Tokens per sec: 79164.56794435492, Loss: 2.2281320095062256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18603 , TFLOPS: 96.80187918146873, Tokens per sec: 79099.30738123406, Loss: 2.2484302520751953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18604 , TFLOPS: 96.66829095180925, Tokens per sec: 78990.14899991233, Loss: 2.2606377601623535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18605 , TFLOPS: 96.67177692704617, Tokens per sec: 78992.9974800154, Loss: 2.26129412651062 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18606 , TFLOPS: 97.34714535783573, Tokens per sec: 79544.85841034362, Loss: 2.2668325901031494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18607 , TFLOPS: 96.97417912270562, Tokens per sec: 79240.09809861377, Loss: 2.24897837638855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18608 , TFLOPS: 97.30092415649246, Tokens per sec: 79507.08987688646, Loss: 2.2477269172668457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18609 , TFLOPS: 98.27945681123481, Tokens per sec: 80306.6740987479, Loss: 2.26849102973938 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18610 , TFLOPS: 98.32306222619503, Tokens per sec: 80342.30520581515, Loss: 2.280555486679077 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18611 , TFLOPS: 97.72632878651629, Tokens per sec: 79854.69895096832, Loss: 2.266815662384033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18612 , TFLOPS: 97.79809545000631, Tokens per sec: 79913.34133914429, Loss: 2.2504186630249023 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18613 , TFLOPS: 97.29470749596135, Tokens per sec: 79502.01008353541, Loss: 2.266364336013794 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18614 , TFLOPS: 96.43834733858256, Tokens per sec: 78802.2561542696, Loss: 2.236008644104004 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18615 , TFLOPS: 98.45163779161125, Tokens per sec: 80447.36761014623, Loss: 2.261897087097168 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18616 , TFLOPS: 97.05678704565032, Tokens per sec: 79307.59915896902, Loss: 2.2702603340148926 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18617 , TFLOPS: 97.1165886460657, Tokens per sec: 79356.46458609858, Loss: 2.2740354537963867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18618 , TFLOPS: 97.13549127864644, Tokens per sec: 79371.91041377734, Loss: 2.247793197631836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18619 , TFLOPS: 97.7224519274161, Tokens per sec: 79851.53106959834, Loss: 2.2472760677337646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18620 , TFLOPS: 97.79838039031418, Tokens per sec: 79913.57417120486, Loss: 2.2429962158203125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18621 , TFLOPS: 96.05206666209678, Tokens per sec: 78486.61626976384, Loss: 2.2656188011169434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18622 , TFLOPS: 96.61480947905395, Tokens per sec: 78946.44791178839, Loss: 2.2648496627807617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18623 , TFLOPS: 97.74554137934157, Tokens per sec: 79870.39805514201, Loss: 2.258436679840088 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18624 , TFLOPS: 96.06814150257057, Tokens per sec: 78499.75143571818, Loss: 2.2531440258026123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18625 , TFLOPS: 97.260322377569, Tokens per sec: 79473.91311814533, Loss: 2.255424976348877 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18626 , TFLOPS: 97.19214669686164, Tokens per sec: 79418.20501444119, Loss: 2.2383341789245605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18627 , TFLOPS: 94.82010022871074, Tokens per sec: 77479.9447834068, Loss: 2.2582356929779053 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18628 , TFLOPS: 97.939967694097, Tokens per sec: 80029.26880190727, Loss: 2.264151096343994 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18629 , TFLOPS: 96.75220338988237, Tokens per sec: 79058.7160131602, Loss: 2.263082981109619 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18630 , TFLOPS: 95.58179657207374, Tokens per sec: 78102.34647337694, Loss: 2.258862018585205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18631 , TFLOPS: 96.54627997756975, Tokens per sec: 78890.45069201934, Loss: 2.2624664306640625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18632 , TFLOPS: 96.64969110432918, Tokens per sec: 78974.95058573369, Loss: 2.2423009872436523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18633 , TFLOPS: 96.11914839972526, Tokens per sec: 78541.43048441761, Loss: 2.2766199111938477 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18634 , TFLOPS: 96.1422481725577, Tokens per sec: 78560.30590344006, Loss: 2.248965263366699 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18635 , TFLOPS: 96.04469735941517, Tokens per sec: 78480.59462283997, Loss: 2.2511441707611084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18636 , TFLOPS: 96.85607088589754, Tokens per sec: 79143.5888179414, Loss: 2.2525923252105713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18637 , TFLOPS: 95.654752680773, Tokens per sec: 78161.96078785241, Loss: 2.2530033588409424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18638 , TFLOPS: 97.43238059030553, Tokens per sec: 79614.50631294477, Loss: 2.233614921569824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18639 , TFLOPS: 96.31403118645889, Tokens per sec: 78700.6742261869, Loss: 2.268636465072632 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18640 , TFLOPS: 97.8006475574508, Tokens per sec: 79915.42673183428, Loss: 2.2527194023132324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18641 , TFLOPS: 96.11853400750549, Tokens per sec: 78540.92844871899, Loss: 2.25944447517395 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18642 , TFLOPS: 97.91848713183329, Tokens per sec: 80011.7164815227, Loss: 2.2498233318328857 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18643 , TFLOPS: 97.30156549754408, Tokens per sec: 79507.61393316931, Loss: 2.255296468734741 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18644 , TFLOPS: 96.89269651551719, Tokens per sec: 79173.5165627312, Loss: 2.239819049835205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18645 , TFLOPS: 97.05379924354709, Tokens per sec: 79305.1577489576, Loss: 2.262566089630127 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18646 , TFLOPS: 96.76478120863537, Tokens per sec: 79068.99367264514, Loss: 2.243617534637451 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18647 , TFLOPS: 97.83296471982604, Tokens per sec: 79941.83391712885, Loss: 2.2418911457061768 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18648 , TFLOPS: 97.8168968399724, Tokens per sec: 79928.70443887642, Loss: 2.2607672214508057 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18649 , TFLOPS: 97.1029584238365, Tokens per sec: 79345.32698064196, Loss: 2.2677674293518066 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18650 , TFLOPS: 97.75615675759514, Tokens per sec: 79879.07215397713, Loss: 2.2614376544952393 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18651 , TFLOPS: 97.12152982662789, Tokens per sec: 79360.50215193324, Loss: 2.254249334335327 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18652 , TFLOPS: 97.13221481152428, Tokens per sec: 79369.23312814806, Loss: 2.2653005123138428 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18653 , TFLOPS: 97.2745624516236, Tokens per sec: 79485.54904922745, Loss: 2.272440195083618 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18654 , TFLOPS: 97.3364313227845, Tokens per sec: 79536.10369649946, Loss: 2.2398881912231445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18655 , TFLOPS: 96.72842050938662, Tokens per sec: 79039.28240928131, Loss: 2.252074718475342 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18656 , TFLOPS: 97.83664736173274, Tokens per sec: 79944.84309863046, Loss: 2.2481722831726074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18657 , TFLOPS: 97.14906328562905, Tokens per sec: 79383.00045005739, Loss: 2.2342090606689453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18658 , TFLOPS: 97.06872061709345, Tokens per sec: 79317.35038739265, Loss: 2.2439708709716797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18659 , TFLOPS: 95.53389453722049, Tokens per sec: 78063.20448759026, Loss: 2.2464892864227295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18660 , TFLOPS: 97.72336072158966, Tokens per sec: 79852.27366871198, Loss: 2.255051851272583 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18661 , TFLOPS: 97.3126475259762, Tokens per sec: 79516.669343878, Loss: 2.2558608055114746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18662 , TFLOPS: 96.14524728812569, Tokens per sec: 78562.75655797476, Loss: 2.267958164215088 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18663 , TFLOPS: 97.28196189218015, Tokens per sec: 79491.59532258476, Loss: 2.26990008354187 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18664 , TFLOPS: 96.02217706695558, Tokens per sec: 78462.19271214733, Loss: 2.26004958152771 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18665 , TFLOPS: 96.11497432236852, Tokens per sec: 78538.01973835911, Loss: 2.2659037113189697 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18666 , TFLOPS: 97.83548107861243, Tokens per sec: 79943.89009867005, Loss: 2.265167474746704 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18667 , TFLOPS: 97.25678863823, Tokens per sec: 79471.02560876489, Loss: 2.2567315101623535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18668 , TFLOPS: 95.6068803469324, Tokens per sec: 78122.84307152775, Loss: 2.2690279483795166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18669 , TFLOPS: 97.09868095472571, Tokens per sec: 79341.83175051968, Loss: 2.24631667137146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18670 , TFLOPS: 96.78607193498195, Tokens per sec: 79086.39087321497, Loss: 2.260160446166992 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18671 , TFLOPS: 96.69730037007109, Tokens per sec: 79013.85334234295, Loss: 2.250887632369995 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18672 , TFLOPS: 95.50845439549057, Tokens per sec: 78042.41669289523, Loss: 2.24489688873291 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18673 , TFLOPS: 96.84011680492394, Tokens per sec: 79130.5523276833, Loss: 2.272629499435425 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18674 , TFLOPS: 97.30030406859741, Tokens per sec: 79506.58318710493, Loss: 2.2532691955566406 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18675 , TFLOPS: 96.2584628988443, Tokens per sec: 78655.26794792204, Loss: 2.247335195541382 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18676 , TFLOPS: 96.62133734322246, Tokens per sec: 78951.78199764265, Loss: 2.236769676208496 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18677 , TFLOPS: 96.85348304170283, Tokens per sec: 79141.47422383286, Loss: 2.2513716220855713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18678 , TFLOPS: 97.01732045359252, Tokens per sec: 79275.34999063812, Loss: 2.278184413909912 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18679 , TFLOPS: 96.8480973097896, Tokens per sec: 79137.0734036455, Loss: 2.2410025596618652 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18680 , TFLOPS: 97.26939959932075, Tokens per sec: 79481.33034970715, Loss: 2.2562079429626465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18681 , TFLOPS: 97.30284674833962, Tokens per sec: 79508.66087617668, Loss: 2.2700719833374023 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18682 , TFLOPS: 96.31723533072116, Tokens per sec: 78703.29241494561, Loss: 2.2512526512145996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18683 , TFLOPS: 97.13809748248066, Tokens per sec: 79374.04001001993, Loss: 2.2489373683929443 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18684 , TFLOPS: 97.29760165063644, Tokens per sec: 79504.37497182259, Loss: 2.293233633041382 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18685 , TFLOPS: 97.10065652743073, Tokens per sec: 79343.44604183272, Loss: 2.2614035606384277 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18686 , TFLOPS: 97.79238102503228, Tokens per sec: 79908.67193539563, Loss: 2.232743263244629 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18687 , TFLOPS: 96.66644987008259, Tokens per sec: 78988.64460463982, Loss: 2.267301321029663 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18688 , TFLOPS: 97.24728899211108, Tokens per sec: 79463.26320337846, Loss: 2.2709341049194336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18689 , TFLOPS: 96.5222421638532, Tokens per sec: 78870.80877564316, Loss: 2.26059627532959 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18690 , TFLOPS: 96.93515595308492, Tokens per sec: 79208.21126217095, Loss: 2.2534730434417725 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18691 , TFLOPS: 97.1108651411877, Tokens per sec: 79351.78776292663, Loss: 2.256165027618408 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18692 , TFLOPS: 97.14129489999405, Tokens per sec: 79376.65270217898, Loss: 2.2362542152404785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18693 , TFLOPS: 96.27868683448993, Tokens per sec: 78671.7934463485, Loss: 2.2596588134765625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18694 , TFLOPS: 97.85407467633179, Tokens per sec: 79959.08340600833, Loss: 2.2478737831115723 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18695 , TFLOPS: 97.13072451688805, Tokens per sec: 79368.0153700372, Loss: 2.254791021347046 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18696 , TFLOPS: 97.22735658161103, Tokens per sec: 79446.97591765299, Loss: 2.264453887939453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18697 , TFLOPS: 96.1819092255077, Tokens per sec: 78592.71397077183, Loss: 2.2380177974700928 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18698 , TFLOPS: 97.01650851202898, Tokens per sec: 79274.68653228531, Loss: 2.26137113571167 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18699 , TFLOPS: 97.23141500904312, Tokens per sec: 79450.29216318096, Loss: 2.2752716541290283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18700 , TFLOPS: 96.6883246865285, Tokens per sec: 79006.51908026564, Loss: 2.2625367641448975 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18701 , TFLOPS: 97.38614815534189, Tokens per sec: 79576.72860020724, Loss: 2.2416532039642334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18702 , TFLOPS: 96.7953142900156, Tokens per sec: 79093.94303943236, Loss: 2.2593016624450684 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18703 , TFLOPS: 95.9975172807455, Tokens per sec: 78442.0425660357, Loss: 2.2533390522003174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18704 , TFLOPS: 97.27395410876008, Tokens per sec: 79485.05195661353, Loss: 2.23291277885437 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18705 , TFLOPS: 97.91827068883244, Tokens per sec: 80011.53962037497, Loss: 2.2508039474487305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18706 , TFLOPS: 95.47078705692442, Tokens per sec: 78011.63774090928, Loss: 2.261173963546753 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18707 , TFLOPS: 97.72988112071187, Tokens per sec: 79857.60165468475, Loss: 2.238643169403076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18708 , TFLOPS: 96.69109734761179, Tokens per sec: 79008.78469301143, Loss: 2.272064447402954 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18709 , TFLOPS: 97.26404183462203, Tokens per sec: 79476.9523822507, Loss: 2.259760856628418 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18710 , TFLOPS: 95.36749106617063, Tokens per sec: 77927.23192778899, Loss: 2.2559127807617188 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18711 , TFLOPS: 96.73195978095177, Tokens per sec: 79042.17443918614, Loss: 2.2516446113586426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18712 , TFLOPS: 97.77863590589958, Tokens per sec: 79897.44044472146, Loss: 2.255481719970703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18713 , TFLOPS: 96.15192971618943, Tokens per sec: 78568.21694196665, Loss: 2.245407819747925 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18714 , TFLOPS: 96.76216160798566, Tokens per sec: 79066.85312951973, Loss: 2.246389865875244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18715 , TFLOPS: 96.1112810596602, Tokens per sec: 78535.00187832711, Loss: 2.260942220687866 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18716 , TFLOPS: 97.72938171423553, Tokens per sec: 79857.19357679707, Loss: 2.268958806991577 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18717 , TFLOPS: 96.15278625412624, Tokens per sec: 78568.91684116406, Loss: 2.2387142181396484 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18718 , TFLOPS: 96.60137302720828, Tokens per sec: 78935.46864110025, Loss: 2.2312376499176025 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18719 , TFLOPS: 97.18851659555835, Tokens per sec: 79415.23876521915, Loss: 2.253178834915161 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18720 , TFLOPS: 96.82700330679003, Tokens per sec: 79119.83695078662, Loss: 2.2607946395874023 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18721 , TFLOPS: 96.57664256802526, Tokens per sec: 78915.26074628315, Loss: 2.2329652309417725 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18722 , TFLOPS: 97.2564118873491, Tokens per sec: 79470.71775592168, Loss: 2.270537853240967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18723 , TFLOPS: 96.4004308820678, Tokens per sec: 78771.27364159496, Loss: 2.2571141719818115 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18724 , TFLOPS: 98.3909743561559, Tokens per sec: 80397.79795541991, Loss: 2.2863738536834717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18725 , TFLOPS: 96.10784043458985, Tokens per sec: 78532.19045501255, Loss: 2.2530927658081055 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18726 , TFLOPS: 97.78300289377452, Tokens per sec: 79901.00882293035, Loss: 2.247828722000122 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18727 , TFLOPS: 96.69240900932267, Tokens per sec: 79009.85648556058, Loss: 2.2507073879241943 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18728 , TFLOPS: 97.80112558533935, Tokens per sec: 79915.81734072753, Loss: 2.2517895698547363 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18729 , TFLOPS: 97.23926307135908, Tokens per sec: 79456.70501692643, Loss: 2.2448222637176514 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18730 , TFLOPS: 97.24834099208566, Tokens per sec: 79464.1228196386, Loss: 2.2489988803863525 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18731 , TFLOPS: 96.13087619830962, Tokens per sec: 78551.0135705413, Loss: 2.2680625915527344 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18732 , TFLOPS: 97.33905609816254, Tokens per sec: 79538.24846802859, Loss: 2.2414867877960205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18733 , TFLOPS: 97.69452592540978, Tokens per sec: 79828.71201447971, Loss: 2.260984420776367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18734 , TFLOPS: 96.04919478061032, Tokens per sec: 78484.26958147246, Loss: 2.2464969158172607 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18735 , TFLOPS: 96.69021761973272, Tokens per sec: 79008.06584471524, Loss: 2.246861457824707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18736 , TFLOPS: 97.80880805032128, Tokens per sec: 79922.09488062852, Loss: 2.2534196376800537 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18737 , TFLOPS: 97.35625560000084, Tokens per sec: 79552.30262373522, Loss: 2.2495534420013428 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18738 , TFLOPS: 97.2202307970581, Tokens per sec: 79441.15325566077, Loss: 2.2560157775878906 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18739 , TFLOPS: 97.27123528732828, Tokens per sec: 79482.83033763293, Loss: 2.25227689743042 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18740 , TFLOPS: 96.56712917038737, Tokens per sec: 78907.4871041766, Loss: 2.264963150024414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18741 , TFLOPS: 96.59796657323999, Tokens per sec: 78932.68513987283, Loss: 2.2698750495910645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18742 , TFLOPS: 97.15378774653901, Tokens per sec: 79386.86092868535, Loss: 2.259342670440674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18743 , TFLOPS: 97.74596568895124, Tokens per sec: 79870.74476944677, Loss: 2.249051332473755 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18744 , TFLOPS: 95.62629108939859, Tokens per sec: 78138.70408887384, Loss: 2.2567007541656494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18745 , TFLOPS: 97.59404175215826, Tokens per sec: 79746.60380982317, Loss: 2.2529842853546143 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18746 , TFLOPS: 96.03810931768406, Tokens per sec: 78475.2113643502, Loss: 2.265289545059204 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18747 , TFLOPS: 97.27775884035277, Tokens per sec: 79488.16090074045, Loss: 2.2522988319396973 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18748 , TFLOPS: 96.667817427296, Tokens per sec: 78989.76207084295, Loss: 2.254929542541504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18749 , TFLOPS: 96.79415173566558, Tokens per sec: 79092.99308634615, Loss: 2.25406813621521 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18750 , TFLOPS: 97.13299511505718, Tokens per sec: 79369.87073425158, Loss: 2.2600784301757812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18751 , TFLOPS: 96.71913083319716, Tokens per sec: 79031.69157572936, Loss: 2.2439002990722656 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18752 , TFLOPS: 97.08783390147626, Tokens per sec: 79332.96834408156, Loss: 2.2537975311279297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18753 , TFLOPS: 96.1116858282494, Tokens per sec: 78535.33262516101, Loss: 2.2456865310668945 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18754 , TFLOPS: 96.22942412732078, Tokens per sec: 78631.5396201857, Loss: 2.2612268924713135 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18755 , TFLOPS: 96.14909609608166, Tokens per sec: 78565.90151803273, Loss: 2.2598137855529785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18756 , TFLOPS: 97.9942846806656, Tokens per sec: 80073.6526098761, Loss: 2.25079345703125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18757 , TFLOPS: 96.44349740977006, Tokens per sec: 78806.46441001153, Loss: 2.2767629623413086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18758 , TFLOPS: 97.46258425791783, Tokens per sec: 79639.18650726236, Loss: 2.255000591278076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18759 , TFLOPS: 96.68957546788506, Tokens per sec: 79007.54112591007, Loss: 2.244678497314453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18760 , TFLOPS: 97.30448805868689, Tokens per sec: 79510.00203311253, Loss: 2.261293888092041 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18761 , TFLOPS: 96.96348760071805, Tokens per sec: 79231.36178077348, Loss: 2.258906364440918 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18762 , TFLOPS: 98.39843057438283, Tokens per sec: 80403.89062327327, Loss: 2.271329879760742 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18763 , TFLOPS: 96.92258248970792, Tokens per sec: 79197.93716157587, Loss: 2.2492549419403076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18764 , TFLOPS: 94.80376381738598, Tokens per sec: 77466.59588117668, Loss: 2.247243642807007 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18765 , TFLOPS: 97.85138172863635, Tokens per sec: 79956.88292912375, Loss: 2.243931770324707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18766 , TFLOPS: 96.09682649449732, Tokens per sec: 78523.1906810389, Loss: 2.257021188735962 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18767 , TFLOPS: 98.43094011612499, Tokens per sec: 80430.45500670084, Loss: 2.2628655433654785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18768 , TFLOPS: 97.13276822212902, Tokens per sec: 79369.68533419924, Loss: 2.2559802532196045 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18769 , TFLOPS: 97.2251146147791, Tokens per sec: 79445.14394884126, Loss: 2.2621943950653076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18770 , TFLOPS: 98.48601270664854, Tokens per sec: 80475.45623811224, Loss: 2.2568206787109375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18771 , TFLOPS: 97.80063722789053, Tokens per sec: 79915.41829128469, Loss: 2.257305860519409 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18772 , TFLOPS: 97.26528319184044, Tokens per sec: 79477.96672718924, Loss: 2.271005868911743 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18773 , TFLOPS: 96.71827455762113, Tokens per sec: 79030.99189091369, Loss: 2.2218291759490967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18774 , TFLOPS: 97.69749478936626, Tokens per sec: 79831.13794964392, Loss: 2.2513537406921387 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18775 , TFLOPS: 96.99911517537365, Tokens per sec: 79260.47398915993, Loss: 2.257768392562866 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18776 , TFLOPS: 96.68584041913728, Tokens per sec: 79004.48912142955, Loss: 2.250358819961548 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18777 , TFLOPS: 97.20338946690445, Tokens per sec: 79427.39177125781, Loss: 2.2565817832946777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18778 , TFLOPS: 96.46080461937693, Tokens per sec: 78820.60657650852, Loss: 2.2389755249023438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18779 , TFLOPS: 96.04909589716262, Tokens per sec: 78484.18878126184, Loss: 2.2644383907318115 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18780 , TFLOPS: 97.76757668390354, Tokens per sec: 79888.4036697388, Loss: 2.248023271560669 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18781 , TFLOPS: 97.22231541699709, Tokens per sec: 79442.85665227582, Loss: 2.2534239292144775 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18782 , TFLOPS: 95.38938729643897, Tokens per sec: 77945.12390120036, Loss: 2.2413432598114014 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18783 , TFLOPS: 97.21726153702414, Tokens per sec: 79438.72699685147, Loss: 2.2694225311279297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18784 , TFLOPS: 96.73871293803045, Tokens per sec: 79047.69261767686, Loss: 2.2523980140686035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18785 , TFLOPS: 96.57669763705476, Tokens per sec: 78915.30574460476, Loss: 2.233900547027588 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18786 , TFLOPS: 96.47205285581047, Tokens per sec: 78829.79780005373, Loss: 2.239466667175293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18787 , TFLOPS: 96.23498790476665, Tokens per sec: 78636.08592596109, Loss: 2.2576563358306885 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18788 , TFLOPS: 97.89539599301166, Tokens per sec: 79992.84811757276, Loss: 2.260737895965576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18789 , TFLOPS: 95.43887787674232, Tokens per sec: 77985.5639283671, Loss: 2.264981985092163 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18790 , TFLOPS: 97.46208646741472, Tokens per sec: 79638.779749828, Loss: 2.235694408416748 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18791 , TFLOPS: 96.69071726901181, Tokens per sec: 79008.47412100331, Loss: 2.2428338527679443 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18792 , TFLOPS: 96.58208050741308, Tokens per sec: 78919.70422654194, Loss: 2.239413261413574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18793 , TFLOPS: 96.63354929974071, Tokens per sec: 78961.76070167743, Loss: 2.266589403152466 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18794 , TFLOPS: 97.12501248846253, Tokens per sec: 79363.34792457001, Loss: 2.255000591278076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18795 , TFLOPS: 97.74612721542952, Tokens per sec: 79870.87675689023, Loss: 2.260988473892212 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18796 , TFLOPS: 96.52126402922053, Tokens per sec: 78870.00951665538, Loss: 2.219602584838867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18797 , TFLOPS: 97.69957361531242, Tokens per sec: 79832.83661183825, Loss: 2.2286272048950195 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18798 , TFLOPS: 96.59294625304582, Tokens per sec: 78928.58290700764, Loss: 2.249929189682007 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18799 , TFLOPS: 97.72483615163111, Tokens per sec: 79853.4792805798, Loss: 2.226008176803589 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18800 , TFLOPS: 97.73344396898014, Tokens per sec: 79860.51294972062, Loss: 2.2524330615997314 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018800 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018800/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018800/model.pt +[rank0]:[2024-08-30 19:06:07,356] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007728826021775603, 'preprocessing_with_comm': 0.001671349018579349, 'state_converting': 2.64607397801592, : 2.6570785579970106}) +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018800/optimizer.pt +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018800/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018800/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018800/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018800/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018800/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018800, took 14.76s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0016800 + eval ppl=7.501462936401367, eval loss=2.0150980949401855 +------------------------------------------------------------------ +iteration: 18801 , TFLOPS: 94.79064265095433, Tokens per sec: 77455.87423831651, Loss: 2.239985466003418 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18802 , TFLOPS: 96.58089587524343, Tokens per sec: 78918.73623310098, Loss: 2.2420308589935303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18803 , TFLOPS: 96.42185843102612, Tokens per sec: 78788.782643443, Loss: 2.2491931915283203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18804 , TFLOPS: 95.33867784113676, Tokens per sec: 77903.68789989542, Loss: 2.253411293029785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18805 , TFLOPS: 96.33670651401789, Tokens per sec: 78719.20282005021, Loss: 2.256220579147339 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18806 , TFLOPS: 96.76795164706463, Tokens per sec: 79071.58431950003, Loss: 2.246109962463379 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18807 , TFLOPS: 97.68378998543577, Tokens per sec: 79819.93940155927, Loss: 2.2413816452026367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18808 , TFLOPS: 96.45991232978398, Tokens per sec: 78819.87746371255, Loss: 2.2583658695220947 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18809 , TFLOPS: 98.37453014085547, Tokens per sec: 80384.36096378618, Loss: 2.2351760864257812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18810 , TFLOPS: 97.66111917793295, Tokens per sec: 79801.41450115021, Loss: 2.2711331844329834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18811 , TFLOPS: 98.33078943688673, Tokens per sec: 80348.61930858743, Loss: 2.2441301345825195 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18812 , TFLOPS: 97.61158349462357, Tokens per sec: 79760.93761915619, Loss: 2.266685724258423 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18813 , TFLOPS: 96.96152279348544, Tokens per sec: 79229.75628620508, Loss: 2.256167411804199 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18814 , TFLOPS: 96.43685433797629, Tokens per sec: 78801.03618504095, Loss: 2.2398698329925537 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18815 , TFLOPS: 97.6940776970246, Tokens per sec: 79828.34575552776, Loss: 2.2799549102783203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18816 , TFLOPS: 97.77929074096707, Tokens per sec: 79897.97552731205, Loss: 2.255546808242798 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18817 , TFLOPS: 97.67632398727042, Tokens per sec: 79813.83874226647, Loss: 2.2477784156799316 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18818 , TFLOPS: 97.72692535480593, Tokens per sec: 79855.18642227483, Loss: 2.2712111473083496 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18819 , TFLOPS: 97.09959288154785, Tokens per sec: 79342.57690940113, Loss: 2.2485804557800293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18820 , TFLOPS: 97.8010357149844, Tokens per sec: 79915.74390534703, Loss: 2.2458600997924805 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18821 , TFLOPS: 98.33368210314556, Tokens per sec: 80350.98298065136, Loss: 2.2603187561035156 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18822 , TFLOPS: 97.11651789232896, Tokens per sec: 79356.40677139892, Loss: 2.265470027923584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18823 , TFLOPS: 97.33649587090501, Tokens per sec: 79536.15644043034, Loss: 2.2436602115631104 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18824 , TFLOPS: 97.35191507005192, Tokens per sec: 79548.75586497878, Loss: 2.2681524753570557 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18825 , TFLOPS: 97.66354434577677, Tokens per sec: 79803.39616822478, Loss: 2.2568836212158203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18826 , TFLOPS: 97.25774954159657, Tokens per sec: 79471.81078764157, Loss: 2.262277126312256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18827 , TFLOPS: 96.66906669450186, Tokens per sec: 78990.78287923586, Loss: 2.269753932952881 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18828 , TFLOPS: 97.86364582585442, Tokens per sec: 79966.90423866657, Loss: 2.26206111907959 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18829 , TFLOPS: 97.85675480083073, Tokens per sec: 79961.27340472896, Loss: 2.281743288040161 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18830 , TFLOPS: 96.08496607557628, Tokens per sec: 78513.49922742417, Loss: 2.2479753494262695 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18831 , TFLOPS: 96.51440178065664, Tokens per sec: 78864.40219670384, Loss: 2.2576770782470703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18832 , TFLOPS: 97.86311437459469, Tokens per sec: 79966.46997616148, Loss: 2.2652714252471924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18833 , TFLOPS: 95.98310353845208, Tokens per sec: 78430.26472616503, Loss: 2.283270835876465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18834 , TFLOPS: 97.38621897741851, Tokens per sec: 79576.78647074915, Loss: 2.2522239685058594 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18835 , TFLOPS: 96.00684952844377, Tokens per sec: 78449.66818586338, Loss: 2.2559022903442383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18836 , TFLOPS: 96.56410352653133, Tokens per sec: 78905.01477269482, Loss: 2.258683919906616 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18837 , TFLOPS: 96.03359507704579, Tokens per sec: 78471.52266211777, Loss: 2.266284227371216 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18838 , TFLOPS: 96.91168227070534, Tokens per sec: 79189.03031203225, Loss: 2.273292064666748 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18839 , TFLOPS: 95.48794589318945, Tokens per sec: 78025.65866772918, Loss: 2.2404086589813232 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18840 , TFLOPS: 96.65476309227418, Tokens per sec: 78979.09503764813, Loss: 2.2585349082946777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18841 , TFLOPS: 96.49295651261995, Tokens per sec: 78846.87871614075, Loss: 2.2615044116973877 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18842 , TFLOPS: 95.49428735116702, Tokens per sec: 78030.8404362863, Loss: 2.2527904510498047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18843 , TFLOPS: 96.20017975498736, Tokens per sec: 78607.64329073492, Loss: 2.2441744804382324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18844 , TFLOPS: 97.4693568638289, Tokens per sec: 79644.7205778947, Loss: 2.247955560684204 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18845 , TFLOPS: 97.067377976226, Tokens per sec: 79316.25328097722, Loss: 2.244624376296997 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18846 , TFLOPS: 96.84929317343006, Tokens per sec: 79138.05057461039, Loss: 2.254185199737549 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18847 , TFLOPS: 98.44428707173829, Tokens per sec: 80441.36114771367, Loss: 2.2629218101501465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18848 , TFLOPS: 97.66191474459796, Tokens per sec: 79802.0645791519, Loss: 2.260901927947998 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18849 , TFLOPS: 98.40508960313133, Tokens per sec: 80409.33188708233, Loss: 2.2507450580596924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18850 , TFLOPS: 97.09062112751978, Tokens per sec: 79335.24585823131, Loss: 2.246403694152832 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18851 , TFLOPS: 97.78528289028792, Tokens per sec: 79902.87186677377, Loss: 2.2680184841156006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18852 , TFLOPS: 96.41779292548031, Tokens per sec: 78785.4606142061, Loss: 2.259599447250366 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18853 , TFLOPS: 98.45001517849613, Tokens per sec: 80446.04173120011, Loss: 2.2550394535064697 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18854 , TFLOPS: 96.65657111587001, Tokens per sec: 78980.57242027097, Loss: 2.2294628620147705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18855 , TFLOPS: 96.59942364056478, Tokens per sec: 78933.87574709217, Loss: 2.2647786140441895 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18856 , TFLOPS: 98.43678227405222, Tokens per sec: 80435.22878433371, Loss: 2.2587506771087646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18857 , TFLOPS: 97.19935552648067, Tokens per sec: 79424.09553469469, Loss: 2.235996723175049 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18858 , TFLOPS: 96.66891180467074, Tokens per sec: 78990.65631476764, Loss: 2.243453025817871 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18859 , TFLOPS: 98.41230451880926, Tokens per sec: 80415.22738041122, Loss: 2.2487943172454834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18860 , TFLOPS: 97.11919590932033, Tokens per sec: 79358.59504802084, Loss: 2.2452287673950195 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18861 , TFLOPS: 97.28094969210785, Tokens per sec: 79490.76822784934, Loss: 2.2532434463500977 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18862 , TFLOPS: 97.06086434351593, Tokens per sec: 79310.930824014, Loss: 2.2786853313446045 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18863 , TFLOPS: 97.75272453268707, Tokens per sec: 79876.26759465144, Loss: 2.271848201751709 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18864 , TFLOPS: 97.26471294512946, Tokens per sec: 79477.50076392225, Loss: 2.242488384246826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18865 , TFLOPS: 97.15684397806957, Tokens per sec: 79389.35825414336, Loss: 2.2529211044311523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18866 , TFLOPS: 96.06966808756913, Tokens per sec: 78500.99884761829, Loss: 2.258784294128418 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18867 , TFLOPS: 98.50298811051991, Tokens per sec: 80489.3272776013, Loss: 2.289830446243286 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18868 , TFLOPS: 95.63929355036942, Tokens per sec: 78149.32873444622, Loss: 2.2443671226501465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18869 , TFLOPS: 97.20442090329348, Tokens per sec: 79428.23458448282, Loss: 2.253164529800415 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18870 , TFLOPS: 97.85038951775319, Tokens per sec: 79956.07216806932, Loss: 2.2674059867858887 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18871 , TFLOPS: 94.88547349921367, Tokens per sec: 77533.36296559243, Loss: 2.2485437393188477 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18872 , TFLOPS: 97.65429191160034, Tokens per sec: 79795.83576608037, Loss: 2.2298896312713623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18873 , TFLOPS: 95.71123426227915, Tokens per sec: 78208.11334206634, Loss: 2.270128011703491 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18874 , TFLOPS: 96.64321259295309, Tokens per sec: 78969.65682731658, Loss: 2.252474308013916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18875 , TFLOPS: 96.69813385345311, Tokens per sec: 79014.53440307015, Loss: 2.2464916706085205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18876 , TFLOPS: 97.80128553057165, Tokens per sec: 79915.94803609421, Loss: 2.25642466545105 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18877 , TFLOPS: 96.02294413985996, Tokens per sec: 78462.81950716372, Loss: 2.243335008621216 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18878 , TFLOPS: 97.1780485180819, Tokens per sec: 79406.68502964085, Loss: 2.247298240661621 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18879 , TFLOPS: 97.24130448576267, Tokens per sec: 79458.37310918629, Loss: 2.2470061779022217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18880 , TFLOPS: 96.73122813394248, Tokens per sec: 79041.5765915806, Loss: 2.2710514068603516 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18881 , TFLOPS: 95.64184970850455, Tokens per sec: 78151.4174370598, Loss: 2.257828712463379 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18882 , TFLOPS: 96.14838496783244, Tokens per sec: 78565.32043683421, Loss: 2.2567954063415527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18883 , TFLOPS: 97.82507324622475, Tokens per sec: 79935.38559090464, Loss: 2.2590250968933105 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18884 , TFLOPS: 96.13870355078846, Tokens per sec: 78557.40950174573, Loss: 2.2652344703674316 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18885 , TFLOPS: 98.42363959442513, Tokens per sec: 80424.48956248774, Loss: 2.253805637359619 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18886 , TFLOPS: 97.7599290804567, Tokens per sec: 79882.15461609555, Loss: 2.2464425563812256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18887 , TFLOPS: 97.2433613044024, Tokens per sec: 79460.05378864403, Loss: 2.256196975708008 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18888 , TFLOPS: 98.46619694851279, Tokens per sec: 80459.26427203653, Loss: 2.2494285106658936 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18889 , TFLOPS: 97.73524507545949, Tokens per sec: 79861.9846801895, Loss: 2.264233350753784 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18890 , TFLOPS: 96.05257008085351, Tokens per sec: 78487.02762618911, Loss: 2.2656707763671875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18891 , TFLOPS: 97.66393306136405, Tokens per sec: 79803.71379773757, Loss: 2.2489542961120605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18892 , TFLOPS: 97.83755444767634, Tokens per sec: 79945.58430190544, Loss: 2.2554194927215576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18893 , TFLOPS: 95.48797290114403, Tokens per sec: 78025.68073662411, Loss: 2.2378525733947754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18894 , TFLOPS: 98.42543055973395, Tokens per sec: 80425.95300634514, Loss: 2.2676644325256348 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18895 , TFLOPS: 96.41629991282615, Tokens per sec: 78784.24063513281, Loss: 2.266484260559082 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18896 , TFLOPS: 97.19474319205625, Tokens per sec: 79420.32667750443, Loss: 2.235257625579834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18897 , TFLOPS: 97.10636237884131, Tokens per sec: 79348.10843990199, Loss: 2.274031400680542 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18898 , TFLOPS: 95.92833236560688, Tokens per sec: 78385.50979089779, Loss: 2.238309860229492 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18899 , TFLOPS: 97.86249998418786, Tokens per sec: 79965.96794194424, Loss: 2.250361442565918 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18900 , TFLOPS: 96.72070469920385, Tokens per sec: 79032.97762215835, Loss: 2.241353988647461 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18901 , TFLOPS: 97.8092277377165, Tokens per sec: 79922.43781800287, Loss: 2.245391368865967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18902 , TFLOPS: 97.26614079004284, Tokens per sec: 79478.66749275695, Loss: 2.2635133266448975 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18903 , TFLOPS: 96.14540686033047, Tokens per sec: 78562.88694853104, Loss: 2.2618355751037598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18904 , TFLOPS: 97.36391468301335, Tokens per sec: 79558.56105762708, Loss: 2.2572309970855713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18905 , TFLOPS: 97.96652276161599, Tokens per sec: 80050.96763116527, Loss: 2.2503409385681152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18906 , TFLOPS: 95.60929475439139, Tokens per sec: 78124.81594601486, Loss: 2.2539215087890625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18907 , TFLOPS: 97.77326183484864, Tokens per sec: 79893.04915292484, Loss: 2.240903854370117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18908 , TFLOPS: 97.58243510227015, Tokens per sec: 79737.11971741785, Loss: 2.2253012657165527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18909 , TFLOPS: 95.84694797358713, Tokens per sec: 78319.00850915782, Loss: 2.2435293197631836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18910 , TFLOPS: 98.45764707815727, Tokens per sec: 80452.27795287578, Loss: 2.2818920612335205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18911 , TFLOPS: 96.75667740785612, Tokens per sec: 79062.37184841803, Loss: 2.246530055999756 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18912 , TFLOPS: 97.7279318613812, Tokens per sec: 79856.00886470726, Loss: 2.264536142349243 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18913 , TFLOPS: 96.72208302690426, Tokens per sec: 79034.10388920366, Loss: 2.2417540550231934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18914 , TFLOPS: 96.0343556233513, Tokens per sec: 78472.14412408222, Loss: 2.26149582862854 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18915 , TFLOPS: 96.20754571696406, Tokens per sec: 78613.66220788282, Loss: 2.2759227752685547 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18916 , TFLOPS: 97.18448429537361, Tokens per sec: 79411.94386893755, Loss: 2.2470743656158447 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18917 , TFLOPS: 97.13001565757051, Tokens per sec: 79367.43614284118, Loss: 2.2614285945892334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18918 , TFLOPS: 96.09704124038815, Tokens per sec: 78523.36615543436, Loss: 2.2561535835266113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18919 , TFLOPS: 96.7230491136416, Tokens per sec: 79034.89330354602, Loss: 2.2795169353485107 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18920 , TFLOPS: 96.11343939306113, Tokens per sec: 78536.76550810944, Loss: 2.2543628215789795 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18921 , TFLOPS: 97.8149633729292, Tokens per sec: 79927.12455318354, Loss: 2.239166498184204 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18922 , TFLOPS: 96.63059052240776, Tokens per sec: 78959.34300855298, Loss: 2.240377902984619 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18923 , TFLOPS: 98.47154840496691, Tokens per sec: 80463.63708486393, Loss: 2.257413864135742 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18924 , TFLOPS: 97.82633176087087, Tokens per sec: 79936.41395561903, Loss: 2.2541587352752686 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18925 , TFLOPS: 97.1240498468982, Tokens per sec: 79362.5613253672, Loss: 2.227036952972412 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18926 , TFLOPS: 98.34040077509752, Tokens per sec: 80356.47298045731, Loss: 2.2508223056793213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18927 , TFLOPS: 97.84146217122824, Tokens per sec: 79948.77740341345, Loss: 2.2535579204559326 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18928 , TFLOPS: 96.00779775779851, Tokens per sec: 78450.44300847875, Loss: 2.2446179389953613 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18929 , TFLOPS: 98.45355889420935, Tokens per sec: 80448.93739253274, Loss: 2.259824752807617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18930 , TFLOPS: 97.80514410768487, Tokens per sec: 79919.10097878204, Loss: 2.215578556060791 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18931 , TFLOPS: 95.31509175196577, Tokens per sec: 77884.41509927384, Loss: 2.2703874111175537 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18932 , TFLOPS: 98.3624010393653, Tokens per sec: 80374.44996272771, Loss: 2.249758720397949 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18933 , TFLOPS: 97.24429447948343, Tokens per sec: 79460.8163100248, Loss: 2.2500383853912354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18934 , TFLOPS: 97.12751820666078, Tokens per sec: 79365.39541141273, Loss: 2.2699906826019287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18935 , TFLOPS: 96.55894728541084, Tokens per sec: 78900.8014753418, Loss: 2.2570509910583496 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18936 , TFLOPS: 96.53813183065267, Tokens per sec: 78883.79263142144, Loss: 2.2415552139282227 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18937 , TFLOPS: 97.00018614916844, Tokens per sec: 79261.34910941697, Loss: 2.260709762573242 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18938 , TFLOPS: 96.15318171586071, Tokens per sec: 78569.23998312738, Loss: 2.2804338932037354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18939 , TFLOPS: 97.80048539822202, Tokens per sec: 79915.2942273541, Loss: 2.246354579925537 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18940 , TFLOPS: 96.00364523355107, Tokens per sec: 78447.04987402067, Loss: 2.2395007610321045 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18941 , TFLOPS: 96.63856868847674, Tokens per sec: 78965.8621734242, Loss: 2.2562882900238037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18942 , TFLOPS: 97.38668531926264, Tokens per sec: 79577.1675306747, Loss: 2.2447195053100586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18943 , TFLOPS: 97.48300330662276, Tokens per sec: 79655.87143759223, Loss: 2.259443998336792 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18944 , TFLOPS: 96.11093703564565, Tokens per sec: 78534.72076744886, Loss: 2.2714250087738037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18945 , TFLOPS: 97.32375576991005, Tokens per sec: 79525.74617594799, Loss: 2.256122589111328 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18946 , TFLOPS: 97.86370692972442, Tokens per sec: 79966.9541682117, Loss: 2.2570607662200928 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18947 , TFLOPS: 96.55025615265444, Tokens per sec: 78893.69972703689, Loss: 2.267996072769165 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18948 , TFLOPS: 97.88094500821595, Tokens per sec: 79981.03984589476, Loss: 2.255690097808838 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18949 , TFLOPS: 96.5972121952584, Tokens per sec: 78932.06871820465, Loss: 2.2551426887512207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18950 , TFLOPS: 97.2638165502091, Tokens per sec: 79476.76829655757, Loss: 2.2488608360290527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18951 , TFLOPS: 96.72302920996609, Tokens per sec: 79034.8770397404, Loss: 2.235682725906372 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18952 , TFLOPS: 97.15999744687713, Tokens per sec: 79391.93503468354, Loss: 2.2285056114196777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18953 , TFLOPS: 96.73019228649018, Tokens per sec: 79040.73017396223, Loss: 2.280529499053955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18954 , TFLOPS: 97.67196876254806, Tokens per sec: 79810.27997603237, Loss: 2.2344512939453125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18955 , TFLOPS: 96.55312668826627, Tokens per sec: 78896.04531557948, Loss: 2.263294219970703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18956 , TFLOPS: 96.63347328717161, Tokens per sec: 78961.6985898505, Loss: 2.243666172027588 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18957 , TFLOPS: 97.33813836954494, Tokens per sec: 79537.49856835071, Loss: 2.295835018157959 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18958 , TFLOPS: 96.90378477626523, Tokens per sec: 79182.57706602565, Loss: 2.251477003097534 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18959 , TFLOPS: 97.1939762460146, Tokens per sec: 79419.69998615087, Loss: 2.2441325187683105 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18960 , TFLOPS: 95.55477913907717, Tokens per sec: 78080.26983338484, Loss: 2.2430949211120605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18961 , TFLOPS: 98.5029617628746, Tokens per sec: 80489.30574826208, Loss: 2.2577707767486572 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18962 , TFLOPS: 98.47453449200941, Tokens per sec: 80466.07709345508, Loss: 2.2316057682037354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18963 , TFLOPS: 96.6895540350605, Tokens per sec: 79007.52361259738, Loss: 2.244328022003174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18964 , TFLOPS: 97.73784478369546, Tokens per sec: 79864.10896871156, Loss: 2.2603743076324463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18965 , TFLOPS: 97.82130676467155, Tokens per sec: 79932.30790186964, Loss: 2.2737348079681396 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18966 , TFLOPS: 97.26729792232686, Tokens per sec: 79479.61301533348, Loss: 2.2460439205169678 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18967 , TFLOPS: 96.723567095208, Tokens per sec: 79035.31655961798, Loss: 2.249102830886841 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18968 , TFLOPS: 97.79010794582807, Tokens per sec: 79906.8145438636, Loss: 2.2593588829040527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18969 , TFLOPS: 96.58557329363677, Tokens per sec: 78922.55827207763, Loss: 2.2601444721221924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18970 , TFLOPS: 96.97708555242133, Tokens per sec: 79242.47301715265, Loss: 2.2466390132904053 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18971 , TFLOPS: 97.86416444056609, Tokens per sec: 79967.3280120978, Loss: 2.270264148712158 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18972 , TFLOPS: 95.94977049242412, Tokens per sec: 78403.02743618663, Loss: 2.2596070766448975 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18973 , TFLOPS: 97.91151849494834, Tokens per sec: 80006.02222893536, Loss: 2.2474451065063477 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18974 , TFLOPS: 94.91798420694634, Tokens per sec: 77559.92830178085, Loss: 2.2579455375671387 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18975 , TFLOPS: 98.43480058650974, Tokens per sec: 80433.60949642958, Loss: 2.282813310623169 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18976 , TFLOPS: 95.4120783915598, Tokens per sec: 77963.66537914435, Loss: 2.24228835105896 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18977 , TFLOPS: 98.46051111912924, Tokens per sec: 80454.61823448095, Loss: 2.2640042304992676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18978 , TFLOPS: 96.16594508794279, Tokens per sec: 78579.66926301409, Loss: 2.2617971897125244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18979 , TFLOPS: 97.24469242136406, Tokens per sec: 79461.14147857942, Loss: 2.2621846199035645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18980 , TFLOPS: 96.64723273740364, Tokens per sec: 78972.94179083484, Loss: 2.2667763233184814 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18981 , TFLOPS: 96.9106206083929, Tokens per sec: 79188.16280042715, Loss: 2.2414374351501465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18982 , TFLOPS: 96.63786848915589, Tokens per sec: 78965.2900225344, Loss: 2.23581600189209 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18983 , TFLOPS: 97.3109791120312, Tokens per sec: 79515.30603989474, Loss: 2.2650578022003174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18984 , TFLOPS: 97.7772239490119, Tokens per sec: 79896.28669840151, Loss: 2.26529598236084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18985 , TFLOPS: 96.56112012151713, Tokens per sec: 78902.57695565803, Loss: 2.2545292377471924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18986 , TFLOPS: 98.35621392810458, Tokens per sec: 80369.39431484615, Loss: 2.2416529655456543 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18987 , TFLOPS: 97.2315695399582, Tokens per sec: 79450.41843436965, Loss: 2.251948118209839 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18988 , TFLOPS: 97.06495153673345, Tokens per sec: 79314.27057480572, Loss: 2.2375762462615967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18989 , TFLOPS: 97.30294029307635, Tokens per sec: 79508.73731398913, Loss: 2.260573387145996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18990 , TFLOPS: 97.74044670680162, Tokens per sec: 79866.23506706108, Loss: 2.23732852935791 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18991 , TFLOPS: 94.31379161158617, Tokens per sec: 77066.226978811, Loss: 2.2724597454071045 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18992 , TFLOPS: 97.20179605745325, Tokens per sec: 79426.08975537725, Loss: 2.244701385498047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18993 , TFLOPS: 97.2602262222413, Tokens per sec: 79473.83454715181, Loss: 2.2491567134857178 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18994 , TFLOPS: 97.03498289710681, Tokens per sec: 79289.7824279054, Loss: 2.242713451385498 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18995 , TFLOPS: 96.64267533373318, Tokens per sec: 78969.2178189777, Loss: 2.280428171157837 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18996 , TFLOPS: 96.79485873563054, Tokens per sec: 79093.5707942173, Loss: 2.2502353191375732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18997 , TFLOPS: 95.82522890351737, Tokens per sec: 78301.26129790522, Loss: 2.271929979324341 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18998 , TFLOPS: 97.30800085028578, Tokens per sec: 79512.87242555528, Loss: 2.246603488922119 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 18999 , TFLOPS: 97.75713429155057, Tokens per sec: 79879.87092213609, Loss: 2.2367563247680664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19000 , TFLOPS: 98.41758090146334, Tokens per sec: 80419.53884850403, Loss: 2.2540552616119385 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019000 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019000/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019000/model.pt +[rank0]:[2024-08-30 20:01:36,730] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.00781837699469179, 'preprocessing_with_comm': 0.0015761560061946511, 'state_converting': 2.6507283330138307, : 2.6617840910039376}) +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019000/optimizer.pt +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019000/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019000/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019000/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019000/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019000/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019000, took 15.17s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017000 + eval ppl=7.879062652587891, eval loss=2.064208984375 +------------------------------------------------------------------ +iteration: 19001 , TFLOPS: 93.0702380309021, Tokens per sec: 76050.08733611702, Loss: 2.256528377532959 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19002 , TFLOPS: 96.216714957482, Tokens per sec: 78621.15463034729, Loss: 2.243718147277832 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19003 , TFLOPS: 96.47283302222628, Tokens per sec: 78830.43529411536, Loss: 2.25209903717041 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19004 , TFLOPS: 96.11209033552406, Tokens per sec: 78535.66315846809, Loss: 2.2704219818115234 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19005 , TFLOPS: 96.03314197729952, Tokens per sec: 78471.15242265127, Loss: 2.2768712043762207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19006 , TFLOPS: 97.20061449807007, Tokens per sec: 79425.1242727892, Loss: 2.2702410221099854 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19007 , TFLOPS: 97.2983049159957, Tokens per sec: 79504.94962805131, Loss: 2.2657241821289062 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19008 , TFLOPS: 96.20290057449459, Tokens per sec: 78609.86654239442, Loss: 2.2518434524536133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19009 , TFLOPS: 97.79318589121678, Tokens per sec: 79909.32961227407, Loss: 2.2598330974578857 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19010 , TFLOPS: 98.38027456381602, Tokens per sec: 80389.05487966184, Loss: 2.2652299404144287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19011 , TFLOPS: 97.7519577416777, Tokens per sec: 79875.64102997871, Loss: 2.2417638301849365 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19012 , TFLOPS: 97.26094643950206, Tokens per sec: 79474.42305521562, Loss: 2.263353109359741 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19013 , TFLOPS: 97.82359094967293, Tokens per sec: 79934.17436823444, Loss: 2.250251531600952 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19014 , TFLOPS: 97.34882563009378, Tokens per sec: 79546.23140405973, Loss: 2.2523698806762695 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19015 , TFLOPS: 98.38005310874638, Tokens per sec: 80388.87392302368, Loss: 2.2673306465148926 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19016 , TFLOPS: 97.13316152050108, Tokens per sec: 79370.00670842346, Loss: 2.2562100887298584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19017 , TFLOPS: 95.70060915351311, Tokens per sec: 78199.43129217898, Loss: 2.2735323905944824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19018 , TFLOPS: 97.01158737219109, Tokens per sec: 79270.66534224237, Loss: 2.2267098426818848 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19019 , TFLOPS: 97.73907797631794, Tokens per sec: 79865.1166421486, Loss: 2.240598201751709 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19020 , TFLOPS: 98.40288485333384, Tokens per sec: 80407.5303292675, Loss: 2.260863780975342 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19021 , TFLOPS: 97.86076168454957, Tokens per sec: 79964.54753256198, Loss: 2.2769200801849365 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19022 , TFLOPS: 95.56593873732814, Tokens per sec: 78089.38862838931, Loss: 2.263944149017334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19023 , TFLOPS: 96.97698792399747, Tokens per sec: 79242.39324245435, Loss: 2.2669777870178223 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19024 , TFLOPS: 98.39687315016019, Tokens per sec: 80402.61801184929, Loss: 2.2402288913726807 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19025 , TFLOPS: 97.85297022510703, Tokens per sec: 79958.18093048148, Loss: 2.246706485748291 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19026 , TFLOPS: 97.33848505685211, Tokens per sec: 79537.78185547392, Loss: 2.2600948810577393 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19027 , TFLOPS: 97.11346288730664, Tokens per sec: 79353.91044815235, Loss: 2.225592851638794 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19028 , TFLOPS: 97.07522375392548, Tokens per sec: 79322.66426790375, Loss: 2.2538609504699707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19029 , TFLOPS: 97.75963631691357, Tokens per sec: 79881.91539146805, Loss: 2.273470878601074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19030 , TFLOPS: 96.7202220148696, Tokens per sec: 79032.58320836343, Loss: 2.2199478149414062 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19031 , TFLOPS: 96.59076866083089, Tokens per sec: 78926.8035403526, Loss: 2.266479969024658 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19032 , TFLOPS: 97.7872621729271, Tokens per sec: 79904.4891895695, Loss: 2.228447437286377 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19033 , TFLOPS: 95.61793644066111, Tokens per sec: 78131.87729032266, Loss: 2.2500882148742676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19034 , TFLOPS: 98.37303103198505, Tokens per sec: 80383.13600333765, Loss: 2.2378034591674805 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19035 , TFLOPS: 95.83679635549537, Tokens per sec: 78310.71336068914, Loss: 2.264319658279419 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19036 , TFLOPS: 96.48191116313403, Tokens per sec: 78837.8532767432, Loss: 2.2552237510681152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19037 , TFLOPS: 96.66647137413636, Tokens per sec: 78988.6621761557, Loss: 2.261383056640625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19038 , TFLOPS: 96.57650894444352, Tokens per sec: 78915.15155901473, Loss: 2.269434928894043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19039 , TFLOPS: 97.05975493064825, Tokens per sec: 79310.02429420098, Loss: 2.285125970840454 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19040 , TFLOPS: 95.67005194175363, Tokens per sec: 78174.4622078377, Loss: 2.264200448989868 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19041 , TFLOPS: 97.1965300284542, Tokens per sec: 79421.78674752246, Loss: 2.2339885234832764 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19042 , TFLOPS: 95.55451547610194, Tokens per sec: 78080.05438758049, Loss: 2.25118350982666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19043 , TFLOPS: 96.71396985487887, Tokens per sec: 79027.47440749008, Loss: 2.2554445266723633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19044 , TFLOPS: 96.6992342692692, Tokens per sec: 79015.4335811607, Loss: 2.261300563812256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19045 , TFLOPS: 97.3424014064659, Tokens per sec: 79540.98200555916, Loss: 2.2469561100006104 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19046 , TFLOPS: 96.18641864781803, Tokens per sec: 78596.39873582446, Loss: 2.251251697540283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19047 , TFLOPS: 97.82026038934251, Tokens per sec: 79931.45288165219, Loss: 2.250308036804199 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19048 , TFLOPS: 97.75864401936325, Tokens per sec: 79881.10455959567, Loss: 2.231539487838745 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19049 , TFLOPS: 97.27950068963816, Tokens per sec: 79489.58421063071, Loss: 2.244694232940674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19050 , TFLOPS: 97.21783346416723, Tokens per sec: 79439.1943332428, Loss: 2.237607955932617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19051 , TFLOPS: 98.38355426283586, Tokens per sec: 80391.73480615793, Loss: 2.26969575881958 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19052 , TFLOPS: 97.79191271973149, Tokens per sec: 79908.28927107909, Loss: 2.2340776920318604 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19053 , TFLOPS: 98.36028602981565, Tokens per sec: 80372.7217339795, Loss: 2.2497148513793945 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19054 , TFLOPS: 95.97843869530416, Tokens per sec: 78426.4529627451, Loss: 2.267822504043579 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19055 , TFLOPS: 97.21532242489748, Tokens per sec: 79437.14249841559, Loss: 2.2375881671905518 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19056 , TFLOPS: 97.15564650393412, Tokens per sec: 79388.37976720156, Loss: 2.2793283462524414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19057 , TFLOPS: 97.76989353883715, Tokens per sec: 79890.29683154619, Loss: 2.2680673599243164 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19058 , TFLOPS: 97.29271132031779, Tokens per sec: 79500.37895703093, Loss: 2.2564759254455566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19059 , TFLOPS: 96.59072198369415, Tokens per sec: 78926.76539926261, Loss: 2.233172655105591 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19060 , TFLOPS: 96.80250547502608, Tokens per sec: 79099.819141822, Loss: 2.2388436794281006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19061 , TFLOPS: 97.81282422451694, Tokens per sec: 79925.37659994932, Loss: 2.226940870285034 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19062 , TFLOPS: 96.82559223187599, Tokens per sec: 79118.68392515005, Loss: 2.2660834789276123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19063 , TFLOPS: 97.33831489069104, Tokens per sec: 79537.64280832325, Loss: 2.248699903488159 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19064 , TFLOPS: 97.8600746116576, Tokens per sec: 79963.98610761511, Loss: 2.269357919692993 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19065 , TFLOPS: 95.33238103784622, Tokens per sec: 77898.54261983259, Loss: 2.269751787185669 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19066 , TFLOPS: 97.81429966987756, Tokens per sec: 79926.58222433482, Loss: 2.2616074085235596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19067 , TFLOPS: 95.9700742129208, Tokens per sec: 78419.61812886874, Loss: 2.265106678009033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19068 , TFLOPS: 97.84392705825911, Tokens per sec: 79950.79152605821, Loss: 2.251354455947876 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19069 , TFLOPS: 95.3929405519348, Tokens per sec: 77948.02735773494, Loss: 2.23508882522583 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19070 , TFLOPS: 97.30639048761935, Tokens per sec: 79511.55655676613, Loss: 2.248943328857422 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19071 , TFLOPS: 96.1187311513281, Tokens per sec: 78541.08954001138, Loss: 2.2598390579223633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19072 , TFLOPS: 97.19646028220953, Tokens per sec: 79421.72975607052, Loss: 2.265409469604492 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19073 , TFLOPS: 94.91569446308674, Tokens per sec: 77558.05729312991, Loss: 2.237757444381714 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19074 , TFLOPS: 97.15085793622264, Tokens per sec: 79384.46690525582, Loss: 2.255392551422119 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19075 , TFLOPS: 95.76772785185882, Tokens per sec: 78254.27570838627, Loss: 2.256228446960449 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19076 , TFLOPS: 96.6938204615268, Tokens per sec: 79011.00981949059, Loss: 2.2710623741149902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19077 , TFLOPS: 96.44253692262352, Tokens per sec: 78805.67957123896, Loss: 2.264012336730957 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19078 , TFLOPS: 95.33320346291528, Tokens per sec: 77899.21464452762, Loss: 2.2651805877685547 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19079 , TFLOPS: 97.12573823657056, Tokens per sec: 79363.94095203151, Loss: 2.2845845222473145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19080 , TFLOPS: 96.87403769129288, Tokens per sec: 79158.26995713665, Loss: 2.257643222808838 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19081 , TFLOPS: 97.90858621636062, Tokens per sec: 80003.62618861777, Loss: 2.2640256881713867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19082 , TFLOPS: 97.17942798351903, Tokens per sec: 79407.81222636014, Loss: 2.232158660888672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19083 , TFLOPS: 97.87795822444431, Tokens per sec: 79978.59926798829, Loss: 2.2856481075286865 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19084 , TFLOPS: 97.23000077764009, Tokens per sec: 79449.136558296, Loss: 2.2900431156158447 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19085 , TFLOPS: 97.76774039505737, Tokens per sec: 79888.53744233689, Loss: 2.257361888885498 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19086 , TFLOPS: 96.47961520165367, Tokens per sec: 78835.9771875143, Loss: 2.2478151321411133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19087 , TFLOPS: 97.19098672160534, Tokens per sec: 79417.25716879885, Loss: 2.2478647232055664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19088 , TFLOPS: 96.62978953629545, Tokens per sec: 78958.6885021814, Loss: 2.2731122970581055 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19089 , TFLOPS: 97.80843687110256, Tokens per sec: 79921.7915805339, Loss: 2.241163730621338 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19090 , TFLOPS: 97.9626549434304, Tokens per sec: 80047.80713735933, Loss: 2.244441509246826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19091 , TFLOPS: 97.77115652827679, Tokens per sec: 79891.32885272957, Loss: 2.232327699661255 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19092 , TFLOPS: 95.99353855094171, Tokens per sec: 78438.79144349141, Loss: 2.257110357284546 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19093 , TFLOPS: 96.70517907196543, Tokens per sec: 79020.29123247664, Loss: 2.248389959335327 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19094 , TFLOPS: 97.16971830062066, Tokens per sec: 79399.8781945144, Loss: 2.232226610183716 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19095 , TFLOPS: 97.79462274831076, Tokens per sec: 79910.50370519357, Loss: 2.260688543319702 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19096 , TFLOPS: 97.32854351532444, Tokens per sec: 79529.65836597362, Loss: 2.256049156188965 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19097 , TFLOPS: 97.23784206915128, Tokens per sec: 79455.5438794426, Loss: 2.2433314323425293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19098 , TFLOPS: 96.70105564568173, Tokens per sec: 79016.9218747138, Loss: 2.249601364135742 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19099 , TFLOPS: 97.1119099040283, Tokens per sec: 79352.64146553827, Loss: 2.255985736846924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19100 , TFLOPS: 96.73315135617705, Tokens per sec: 79043.1481059766, Loss: 2.250474691390991 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19101 , TFLOPS: 97.87967163886019, Tokens per sec: 79979.99934301472, Loss: 2.258502960205078 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19102 , TFLOPS: 97.80710701183173, Tokens per sec: 79920.70491829008, Loss: 2.2786662578582764 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19103 , TFLOPS: 95.95249638072549, Tokens per sec: 78405.25482968827, Loss: 2.259006977081299 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19104 , TFLOPS: 97.19399764996902, Tokens per sec: 79419.717475873, Loss: 2.258613348007202 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19105 , TFLOPS: 96.07611716202817, Tokens per sec: 78506.26855237273, Loss: 2.2686901092529297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19106 , TFLOPS: 97.8623708190815, Tokens per sec: 79965.86239781084, Loss: 2.2628586292266846 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19107 , TFLOPS: 96.16907335915343, Tokens per sec: 78582.22545394917, Loss: 2.26621675491333 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19108 , TFLOPS: 96.61075685072703, Tokens per sec: 78943.13640485852, Loss: 2.242276906967163 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19109 , TFLOPS: 96.1168563162132, Tokens per sec: 78539.55756397644, Loss: 2.253594398498535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19110 , TFLOPS: 97.41449883188764, Tokens per sec: 79599.8946678241, Loss: 2.2403218746185303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19111 , TFLOPS: 94.39716083761098, Tokens per sec: 77134.35011951067, Loss: 2.245313882827759 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19112 , TFLOPS: 97.13687947931713, Tokens per sec: 79373.04474828082, Loss: 2.243692398071289 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19113 , TFLOPS: 95.71977988903012, Tokens per sec: 78215.09619366856, Loss: 2.2450218200683594 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19114 , TFLOPS: 96.63661792258026, Tokens per sec: 78964.26815239301, Loss: 2.2481632232666016 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19115 , TFLOPS: 96.47968558095535, Tokens per sec: 78836.03469625344, Loss: 2.2406675815582275 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19116 , TFLOPS: 94.5762642382071, Tokens per sec: 77280.70011866945, Loss: 2.2543153762817383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19117 , TFLOPS: 97.86895590140412, Tokens per sec: 79971.24323809175, Loss: 2.2547807693481445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19118 , TFLOPS: 96.2123165283447, Tokens per sec: 78617.56056067356, Loss: 2.2498879432678223 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19119 , TFLOPS: 97.76879101077864, Tokens per sec: 79889.39592748799, Loss: 2.2715559005737305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19120 , TFLOPS: 97.16198555212542, Tokens per sec: 79393.55956666029, Loss: 2.26012921333313 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19121 , TFLOPS: 97.9685210981471, Tokens per sec: 80052.6005233866, Loss: 2.2382006645202637 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19122 , TFLOPS: 97.24210378682882, Tokens per sec: 79459.02623866328, Loss: 2.2446677684783936 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19123 , TFLOPS: 97.7586713452349, Tokens per sec: 79881.12688826883, Loss: 2.241501808166504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19124 , TFLOPS: 96.62745568822164, Tokens per sec: 78956.7814548417, Loss: 2.2679641246795654 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19125 , TFLOPS: 97.14481707180448, Tokens per sec: 79379.53075943404, Loss: 2.264221668243408 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19126 , TFLOPS: 96.53934195213316, Tokens per sec: 78884.78145283443, Loss: 2.2471795082092285 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19127 , TFLOPS: 98.39313286842336, Tokens per sec: 80399.56173135871, Loss: 2.2483150959014893 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19128 , TFLOPS: 97.27176830850787, Tokens per sec: 79483.26588295995, Loss: 2.26560378074646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19129 , TFLOPS: 98.43804963088625, Tokens per sec: 80436.26437422748, Loss: 2.250886917114258 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19130 , TFLOPS: 96.6388807580581, Tokens per sec: 78966.11717351251, Loss: 2.264178514480591 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19131 , TFLOPS: 97.20515146939279, Tokens per sec: 79428.83154884902, Loss: 2.255988836288452 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19132 , TFLOPS: 96.70382527438726, Tokens per sec: 79019.18500962565, Loss: 2.268545389175415 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19133 , TFLOPS: 97.923072065956, Tokens per sec: 80015.4629492211, Loss: 2.2783725261688232 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19134 , TFLOPS: 97.8747389590826, Tokens per sec: 79975.96872338999, Loss: 2.2597556114196777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19135 , TFLOPS: 95.38763785557502, Tokens per sec: 77943.69438803582, Loss: 2.245072841644287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19136 , TFLOPS: 96.78039468248774, Tokens per sec: 79081.75184405647, Loss: 2.267448663711548 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19137 , TFLOPS: 97.83908037888332, Tokens per sec: 79946.83117957563, Loss: 2.222527027130127 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19138 , TFLOPS: 96.64736973896552, Tokens per sec: 78973.05373833778, Loss: 2.244856357574463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19139 , TFLOPS: 97.3634468531276, Tokens per sec: 79558.17878178443, Loss: 2.274003505706787 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19140 , TFLOPS: 97.87970769024152, Tokens per sec: 79980.02880152645, Loss: 2.2557544708251953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19141 , TFLOPS: 95.42032327099588, Tokens per sec: 77970.40248237364, Loss: 2.243279218673706 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19142 , TFLOPS: 97.76464020424703, Tokens per sec: 79886.00419661975, Loss: 2.2649343013763428 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19143 , TFLOPS: 96.05328818187192, Tokens per sec: 78487.61440501682, Loss: 2.238839626312256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19144 , TFLOPS: 97.76178515755994, Tokens per sec: 79883.67126447627, Loss: 2.2469451427459717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19145 , TFLOPS: 96.08343722770165, Tokens per sec: 78512.24996646978, Loss: 2.2594046592712402 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19146 , TFLOPS: 95.67684493978457, Tokens per sec: 78180.0129413961, Loss: 2.262131929397583 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19147 , TFLOPS: 97.31316107782479, Tokens per sec: 79517.08898031348, Loss: 2.268076181411743 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19148 , TFLOPS: 96.04241004311632, Tokens per sec: 78478.72559781141, Loss: 2.260984420776367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19149 , TFLOPS: 95.46769302564277, Tokens per sec: 78009.10952830159, Loss: 2.2321794033050537 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19150 , TFLOPS: 96.82406718991712, Tokens per sec: 79117.43777410737, Loss: 2.256420373916626 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19151 , TFLOPS: 95.7348654571766, Tokens per sec: 78227.42299973812, Loss: 2.2345588207244873 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19152 , TFLOPS: 98.4092906174467, Tokens per sec: 80412.76464402315, Loss: 2.2450900077819824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19153 , TFLOPS: 95.30060099308422, Tokens per sec: 77872.57432716643, Loss: 2.242513656616211 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19154 , TFLOPS: 95.89669981452259, Tokens per sec: 78359.66201911251, Loss: 2.2503058910369873 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19155 , TFLOPS: 97.11462248110233, Tokens per sec: 79354.85798209341, Loss: 2.236639976501465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19156 , TFLOPS: 96.77918410450073, Tokens per sec: 79080.76264962024, Loss: 2.253471851348877 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19157 , TFLOPS: 98.46682592906205, Tokens per sec: 80459.77822823473, Loss: 2.2452304363250732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19158 , TFLOPS: 97.2737763126863, Tokens per sec: 79484.90667486476, Loss: 2.249218702316284 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19159 , TFLOPS: 98.44097957312596, Tokens per sec: 80438.65850545495, Loss: 2.2613656520843506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19160 , TFLOPS: 97.72434131113012, Tokens per sec: 79853.07493366812, Loss: 2.2646231651306152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19161 , TFLOPS: 97.77611219330448, Tokens per sec: 79895.37825419386, Loss: 2.2731337547302246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19162 , TFLOPS: 95.93471622043339, Tokens per sec: 78390.72620301157, Loss: 2.24998140335083 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19163 , TFLOPS: 97.14433476469533, Tokens per sec: 79379.13665387947, Loss: 2.2588508129119873 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19164 , TFLOPS: 96.63985686896146, Tokens per sec: 78966.91477885899, Loss: 2.2590348720550537 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19165 , TFLOPS: 97.79233568105782, Tokens per sec: 79908.63488366683, Loss: 2.256413459777832 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19166 , TFLOPS: 97.22945659749854, Tokens per sec: 79448.6918946939, Loss: 2.254425525665283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19167 , TFLOPS: 97.22825155083626, Tokens per sec: 79447.70722004549, Loss: 2.251268148422241 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19168 , TFLOPS: 97.19341939600189, Tokens per sec: 79419.24496967088, Loss: 2.2473936080932617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19169 , TFLOPS: 97.24023531258962, Tokens per sec: 79457.49946026361, Loss: 2.2424685955047607 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19170 , TFLOPS: 96.0322521486734, Tokens per sec: 78470.42532077464, Loss: 2.245516300201416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19171 , TFLOPS: 97.94593833900494, Tokens per sec: 80034.14756955962, Loss: 2.2450621128082275 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19172 , TFLOPS: 97.84406666265313, Tokens per sec: 79950.90560040232, Loss: 2.227694511413574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19173 , TFLOPS: 96.11541575510579, Tokens per sec: 78538.38044441222, Loss: 2.2689201831817627 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19174 , TFLOPS: 96.79785423830587, Tokens per sec: 79096.01849656443, Loss: 2.233527660369873 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19175 , TFLOPS: 97.27718407414925, Tokens per sec: 79487.69124447973, Loss: 2.2641496658325195 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19176 , TFLOPS: 96.55644164537074, Tokens per sec: 78898.75405236414, Loss: 2.24053955078125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19177 , TFLOPS: 97.31251215268236, Tokens per sec: 79516.55872687479, Loss: 2.2429020404815674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19178 , TFLOPS: 97.29221700266514, Tokens per sec: 79499.97503735207, Loss: 2.2793428897857666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19179 , TFLOPS: 96.58005494836486, Tokens per sec: 78918.04909010112, Loss: 2.2601890563964844 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19180 , TFLOPS: 96.50207690684782, Tokens per sec: 78854.33122504418, Loss: 2.2524361610412598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19181 , TFLOPS: 96.08938514012308, Tokens per sec: 78517.11015882221, Loss: 2.251171112060547 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19182 , TFLOPS: 97.76200531407935, Tokens per sec: 79883.85116003563, Loss: 2.2552313804626465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19183 , TFLOPS: 95.49181672055494, Tokens per sec: 78028.82162041348, Loss: 2.270820140838623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19184 , TFLOPS: 96.14179595226707, Tokens per sec: 78559.93638259941, Loss: 2.253119707107544 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19185 , TFLOPS: 96.75103306186848, Tokens per sec: 79057.75970801312, Loss: 2.249448776245117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19186 , TFLOPS: 96.32208547798987, Tokens per sec: 78707.25559513277, Loss: 2.2191290855407715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19187 , TFLOPS: 96.17344547854078, Tokens per sec: 78585.79802524894, Loss: 2.256507158279419 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19188 , TFLOPS: 97.13993655715176, Tokens per sec: 79375.54276527568, Loss: 2.2574713230133057 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19189 , TFLOPS: 96.11470306534004, Tokens per sec: 78537.79808725837, Loss: 2.2739834785461426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19190 , TFLOPS: 97.7063058648368, Tokens per sec: 79838.33770622817, Loss: 2.2702085971832275 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19191 , TFLOPS: 96.47945397891334, Tokens per sec: 78835.84544826295, Loss: 2.2648324966430664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19192 , TFLOPS: 96.36990164429065, Tokens per sec: 78746.32741551477, Loss: 2.247382164001465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19193 , TFLOPS: 97.17156073123093, Tokens per sec: 79401.38369199444, Loss: 2.2555956840515137 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19194 , TFLOPS: 96.79503266141154, Tokens per sec: 79093.71291345022, Loss: 2.2544806003570557 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19195 , TFLOPS: 97.16868427816577, Tokens per sec: 79399.03326814841, Loss: 2.241009473800659 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19196 , TFLOPS: 97.83653163657183, Tokens per sec: 79944.74853662244, Loss: 2.2500641345977783 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19197 , TFLOPS: 97.74068174896733, Tokens per sec: 79866.4271260652, Loss: 2.256441116333008 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19198 , TFLOPS: 97.85219349386077, Tokens per sec: 79957.5462433853, Loss: 2.255370855331421 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19199 , TFLOPS: 97.19180848744216, Tokens per sec: 79417.92865481826, Loss: 2.2532355785369873 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19200 , TFLOPS: 96.50255906958326, Tokens per sec: 78854.72521262728, Loss: 2.247433662414551 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019200 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019200/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019200/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019200/optimizer.pt +[rank0]:[2024-08-30 20:57:10,303] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007678373018279672, 'preprocessing_with_comm': 0.0014517460076604038, 'state_converting': 2.675532781984657, : 2.6862845739815384}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019200/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019200/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019200/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019200/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019200/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019200, took 14.76s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017200 + eval ppl=7.690476894378662, eval loss=2.039982795715332 +------------------------------------------------------------------ +iteration: 19201 , TFLOPS: 95.86022138720566, Tokens per sec: 78329.85455711358, Loss: 2.257037401199341 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19202 , TFLOPS: 95.74188275942275, Tokens per sec: 78233.15701804489, Loss: 2.258702039718628 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19203 , TFLOPS: 97.82521705238999, Tokens per sec: 79935.50309862416, Loss: 2.278287887573242 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19204 , TFLOPS: 95.07001859627182, Tokens per sec: 77684.15951501206, Loss: 2.267986297607422 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19205 , TFLOPS: 97.35587006162477, Tokens per sec: 79551.98759040351, Loss: 2.2642619609832764 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19206 , TFLOPS: 98.4066193146626, Tokens per sec: 80410.58185375281, Loss: 2.2481801509857178 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19207 , TFLOPS: 97.66937225441664, Tokens per sec: 79808.15830239814, Loss: 2.272507429122925 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19208 , TFLOPS: 98.39995493494806, Tokens per sec: 80405.13621753166, Loss: 2.2579214572906494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19209 , TFLOPS: 98.35827798760344, Tokens per sec: 80371.0809109963, Loss: 2.2451021671295166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19210 , TFLOPS: 97.01108795900464, Tokens per sec: 79270.25725887169, Loss: 2.2507636547088623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19211 , TFLOPS: 97.77933175004198, Tokens per sec: 79898.00903688282, Loss: 2.24821138381958 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19212 , TFLOPS: 96.24197103864535, Tokens per sec: 78641.79202441523, Loss: 2.2681546211242676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19213 , TFLOPS: 97.78095212993779, Tokens per sec: 79899.33309100798, Loss: 2.251772880554199 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19214 , TFLOPS: 96.42083336179606, Tokens per sec: 78787.94503298748, Loss: 2.254472494125366 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19215 , TFLOPS: 98.29723824259064, Tokens per sec: 80321.20376404352, Loss: 2.2490310668945312 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19216 , TFLOPS: 97.0511241808693, Tokens per sec: 79302.97188638133, Loss: 2.242540121078491 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19217 , TFLOPS: 97.05915623588739, Tokens per sec: 79309.53508530011, Loss: 2.2497267723083496 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19218 , TFLOPS: 96.96333385054413, Tokens per sec: 79231.2361475485, Loss: 2.2538771629333496 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19219 , TFLOPS: 97.81633361603869, Tokens per sec: 79928.2442141015, Loss: 2.250943899154663 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19220 , TFLOPS: 96.00200014165688, Tokens per sec: 78445.70562708551, Loss: 2.239300489425659 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19221 , TFLOPS: 97.22672364839356, Tokens per sec: 79446.45873162785, Loss: 2.2518091201782227 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19222 , TFLOPS: 97.75982686356139, Tokens per sec: 79882.0710920391, Loss: 2.2573978900909424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19223 , TFLOPS: 96.5098846818674, Tokens per sec: 78860.71115900269, Loss: 2.2384631633758545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19224 , TFLOPS: 97.18112615760593, Tokens per sec: 79409.19984812144, Loss: 2.266138792037964 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19225 , TFLOPS: 96.61204337150797, Tokens per sec: 78944.18765410662, Loss: 2.244556427001953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19226 , TFLOPS: 98.39916376718409, Tokens per sec: 80404.48973398525, Loss: 2.2685601711273193 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19227 , TFLOPS: 95.4446392323087, Tokens per sec: 77990.27168031089, Loss: 2.2721259593963623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19228 , TFLOPS: 97.75653519988336, Tokens per sec: 79879.3813889128, Loss: 2.272322654724121 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19229 , TFLOPS: 97.20687386489891, Tokens per sec: 79430.2389625555, Loss: 2.2629103660583496 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19230 , TFLOPS: 94.3475422892452, Tokens per sec: 77093.80552634592, Loss: 2.236751079559326 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19231 , TFLOPS: 97.11281530699648, Tokens per sec: 79353.38129361073, Loss: 2.2600245475769043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19232 , TFLOPS: 96.7652925458478, Tokens per sec: 79069.4114994444, Loss: 2.258152961730957 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19233 , TFLOPS: 96.15050202030335, Tokens per sec: 78567.05033490597, Loss: 2.2561352252960205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19234 , TFLOPS: 97.31792746629772, Tokens per sec: 79520.98371903246, Loss: 2.2573304176330566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19235 , TFLOPS: 96.7061083360795, Tokens per sec: 79021.05055810562, Loss: 2.2562830448150635 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19236 , TFLOPS: 97.27662899321854, Tokens per sec: 79487.2376735622, Loss: 2.2318317890167236 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19237 , TFLOPS: 94.23713790987479, Tokens per sec: 77003.5912659008, Loss: 2.2560982704162598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19238 , TFLOPS: 97.07289539521908, Tokens per sec: 79320.76170606744, Loss: 2.2606353759765625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19239 , TFLOPS: 96.68829183065463, Tokens per sec: 79006.49223288531, Loss: 2.2624151706695557 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19240 , TFLOPS: 96.8542306660976, Tokens per sec: 79142.0851269714, Loss: 2.2433784008026123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19241 , TFLOPS: 97.309090745974, Tokens per sec: 79513.76300737854, Loss: 2.2388877868652344 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19242 , TFLOPS: 96.26063018476887, Tokens per sec: 78657.03889304191, Loss: 2.2358345985412598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19243 , TFLOPS: 97.34143701785702, Tokens per sec: 79540.19397880127, Loss: 2.2495768070220947 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19244 , TFLOPS: 97.77362099996203, Tokens per sec: 79893.34263598474, Loss: 2.2451529502868652 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19245 , TFLOPS: 97.68560757905475, Tokens per sec: 79821.42460409437, Loss: 2.22802996635437 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19246 , TFLOPS: 98.41162501854649, Tokens per sec: 80414.67214325462, Loss: 2.258012533187866 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19247 , TFLOPS: 98.38252520031142, Tokens per sec: 80390.89393267878, Loss: 2.272686004638672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19248 , TFLOPS: 97.1189806415014, Tokens per sec: 79358.41914714452, Loss: 2.242910861968994 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19249 , TFLOPS: 97.73741850382066, Tokens per sec: 79863.76064445244, Loss: 2.2346487045288086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19250 , TFLOPS: 97.10066995715677, Tokens per sec: 79343.45701560756, Loss: 2.2399075031280518 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19251 , TFLOPS: 96.4540056214796, Tokens per sec: 78815.05094030483, Loss: 2.254786968231201 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19252 , TFLOPS: 97.77339557559038, Tokens per sec: 79893.1584359277, Loss: 2.2686808109283447 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19253 , TFLOPS: 98.38781190066665, Tokens per sec: 80395.2138316308, Loss: 2.246793270111084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19254 , TFLOPS: 97.24995555995159, Tokens per sec: 79465.44212460451, Loss: 2.2657110691070557 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19255 , TFLOPS: 97.18594320425895, Tokens per sec: 79413.13598094342, Loss: 2.2579336166381836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19256 , TFLOPS: 96.50639953116008, Tokens per sec: 78857.86335264397, Loss: 2.2410290241241455 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19257 , TFLOPS: 97.78636694155642, Tokens per sec: 79903.75767297122, Loss: 2.2459332942962646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19258 , TFLOPS: 96.54515025817363, Tokens per sec: 78889.52756921921, Loss: 2.239649772644043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19259 , TFLOPS: 97.2949190624895, Tokens per sec: 79502.18295999177, Loss: 2.276704788208008 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19260 , TFLOPS: 97.19799468880831, Tokens per sec: 79422.98355920152, Loss: 2.261916399002075 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19261 , TFLOPS: 95.92064461609145, Tokens per sec: 78379.22793286845, Loss: 2.261413812637329 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19262 , TFLOPS: 97.15029330993173, Tokens per sec: 79384.00553457873, Loss: 2.2638180255889893 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19263 , TFLOPS: 96.09471156347838, Tokens per sec: 78521.46251646017, Loss: 2.2528650760650635 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19264 , TFLOPS: 98.32444050368541, Tokens per sec: 80343.43143183255, Loss: 2.2556166648864746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19265 , TFLOPS: 95.46028797134299, Tokens per sec: 78003.05866781018, Loss: 2.2335903644561768 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19266 , TFLOPS: 97.82270548870089, Tokens per sec: 79933.45083528038, Loss: 2.25864315032959 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19267 , TFLOPS: 96.65115211200589, Tokens per sec: 78976.14441271602, Loss: 2.2753639221191406 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19268 , TFLOPS: 96.59404161476084, Tokens per sec: 78929.4779552621, Loss: 2.2364003658294678 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19269 , TFLOPS: 96.69813226940938, Tokens per sec: 79014.53310870726, Loss: 2.257925033569336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19270 , TFLOPS: 96.70574563635088, Tokens per sec: 79020.75418682065, Loss: 2.2693233489990234 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19271 , TFLOPS: 95.5215913388427, Tokens per sec: 78053.15122748326, Loss: 2.253678321838379 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19272 , TFLOPS: 96.77513747171832, Tokens per sec: 79077.45604180414, Loss: 2.2408876419067383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19273 , TFLOPS: 97.28441310447953, Tokens per sec: 79493.5982712544, Loss: 2.238769769668579 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19274 , TFLOPS: 96.72032447547035, Tokens per sec: 79032.66693155786, Loss: 2.258981466293335 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19275 , TFLOPS: 96.58628534913565, Tokens per sec: 78923.14011095572, Loss: 2.2642011642456055 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19276 , TFLOPS: 96.51860456665307, Tokens per sec: 78867.83640133087, Loss: 2.2666993141174316 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19277 , TFLOPS: 97.35351555354376, Tokens per sec: 79550.06366124007, Loss: 2.263072967529297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19278 , TFLOPS: 97.31418165136068, Tokens per sec: 79517.92291722156, Loss: 2.2706236839294434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19279 , TFLOPS: 97.97647087534408, Tokens per sec: 80059.09649097973, Loss: 2.262723207473755 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19280 , TFLOPS: 96.11608376075561, Tokens per sec: 78538.9262890247, Loss: 2.2351577281951904 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19281 , TFLOPS: 97.74421205154671, Tokens per sec: 79869.31182718088, Loss: 2.251675844192505 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19282 , TFLOPS: 97.85156072435073, Tokens per sec: 79957.02919112972, Loss: 2.2409815788269043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19283 , TFLOPS: 97.13546443533275, Tokens per sec: 79371.88847941472, Loss: 2.254159688949585 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19284 , TFLOPS: 98.40785743904515, Tokens per sec: 80411.59355705793, Loss: 2.2720413208007812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19285 , TFLOPS: 98.42202217554515, Tokens per sec: 80423.16792788485, Loss: 2.2548632621765137 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19286 , TFLOPS: 97.10165756243238, Tokens per sec: 79344.2640133015, Loss: 2.2747960090637207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19287 , TFLOPS: 97.79684566763362, Tokens per sec: 79912.3201097953, Loss: 2.2538063526153564 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19288 , TFLOPS: 95.90522956917704, Tokens per sec: 78366.63190121607, Loss: 2.2683768272399902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19289 , TFLOPS: 97.74412812887374, Tokens per sec: 79869.24325180442, Loss: 2.25600004196167 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19290 , TFLOPS: 97.14122491536602, Tokens per sec: 79376.59551593785, Loss: 2.259504795074463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19291 , TFLOPS: 97.8295198154234, Tokens per sec: 79939.01899706172, Loss: 2.242030620574951 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19292 , TFLOPS: 97.1581112026146, Tokens per sec: 79390.39373594038, Loss: 2.257847785949707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19293 , TFLOPS: 97.22594984497755, Tokens per sec: 79445.82643693715, Loss: 2.2583470344543457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19294 , TFLOPS: 95.98975551510321, Tokens per sec: 78435.70022752376, Loss: 2.2680797576904297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19295 , TFLOPS: 98.3862867286239, Tokens per sec: 80393.96757429326, Loss: 2.237095832824707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19296 , TFLOPS: 96.12748598304178, Tokens per sec: 78548.24333837403, Loss: 2.25842022895813 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19297 , TFLOPS: 96.59792299265644, Tokens per sec: 78932.64952905614, Loss: 2.2424774169921875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19298 , TFLOPS: 97.81009409071613, Tokens per sec: 79923.14573734078, Loss: 2.241124153137207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19299 , TFLOPS: 96.67796298785768, Tokens per sec: 78998.05226954786, Loss: 2.2487759590148926 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19300 , TFLOPS: 96.72352908784829, Tokens per sec: 79035.28550282595, Loss: 2.2586798667907715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19301 , TFLOPS: 96.09366008813635, Tokens per sec: 78520.60332889081, Loss: 2.2476489543914795 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19302 , TFLOPS: 97.0796503114367, Tokens per sec: 79326.28132199588, Loss: 2.2703633308410645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19303 , TFLOPS: 96.76194871307929, Tokens per sec: 79066.67916761134, Loss: 2.280674934387207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19304 , TFLOPS: 97.23812437233902, Tokens per sec: 79455.7745566444, Loss: 2.2476913928985596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19305 , TFLOPS: 96.59750312873108, Tokens per sec: 78932.30644743447, Loss: 2.2426862716674805 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19306 , TFLOPS: 96.78379388803509, Tokens per sec: 79084.52942241255, Loss: 2.2441630363464355 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19307 , TFLOPS: 95.40320346566803, Tokens per sec: 77956.41344873728, Loss: 2.257610559463501 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19308 , TFLOPS: 96.72524617549564, Tokens per sec: 79036.68857934437, Loss: 2.2562167644500732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19309 , TFLOPS: 96.81490524495285, Tokens per sec: 79109.95131302763, Loss: 2.264111042022705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19310 , TFLOPS: 96.75088201350401, Tokens per sec: 79057.63628250618, Loss: 2.264289379119873 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19311 , TFLOPS: 97.78334757636712, Tokens per sec: 79901.29047194967, Loss: 2.2543089389801025 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19312 , TFLOPS: 96.74393314068134, Tokens per sec: 79051.95817964287, Loss: 2.2534432411193848 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19313 , TFLOPS: 96.54941812767191, Tokens per sec: 78893.01495525098, Loss: 2.2635114192962646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19314 , TFLOPS: 97.92145934604763, Tokens per sec: 80014.14515426884, Loss: 2.2521934509277344 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19315 , TFLOPS: 97.41235450436787, Tokens per sec: 79598.14248260779, Loss: 2.2621021270751953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19316 , TFLOPS: 97.1780576932597, Tokens per sec: 79406.69252691483, Loss: 2.27470326423645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19317 , TFLOPS: 97.29794713531388, Tokens per sec: 79504.6572762461, Loss: 2.2487244606018066 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19318 , TFLOPS: 97.23167209787152, Tokens per sec: 79450.50223708071, Loss: 2.252049684524536 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19319 , TFLOPS: 98.48379052652373, Tokens per sec: 80473.64043752826, Loss: 2.2351999282836914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19320 , TFLOPS: 97.78118362733973, Tokens per sec: 79899.52225349439, Loss: 2.242039203643799 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19321 , TFLOPS: 97.03744229853193, Tokens per sec: 79291.79206812049, Loss: 2.240201234817505 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19322 , TFLOPS: 98.3388017504335, Tokens per sec: 80355.16637624179, Loss: 2.2714004516601562 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19323 , TFLOPS: 98.36869603261451, Tokens per sec: 80379.59376375917, Loss: 2.2621734142303467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19324 , TFLOPS: 97.0923766897478, Tokens per sec: 79336.68037331999, Loss: 2.2452025413513184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19325 , TFLOPS: 97.78809280825998, Tokens per sec: 79905.16792308245, Loss: 2.2480053901672363 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19326 , TFLOPS: 96.57341220215913, Tokens per sec: 78912.62113117691, Loss: 2.252410888671875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19327 , TFLOPS: 97.65963458120767, Tokens per sec: 79800.20139894907, Loss: 2.252734899520874 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19328 , TFLOPS: 97.80253743419516, Tokens per sec: 79916.97099877185, Loss: 2.2637932300567627 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19329 , TFLOPS: 97.75057975018512, Tokens per sec: 79874.51503765747, Loss: 2.2650179862976074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19330 , TFLOPS: 97.12055085197315, Tokens per sec: 79359.7022065418, Loss: 2.246826410293579 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19331 , TFLOPS: 97.19572988201982, Tokens per sec: 79421.13292727334, Loss: 2.2465708255767822 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19332 , TFLOPS: 96.47258772130179, Tokens per sec: 78830.23485241536, Loss: 2.257678747177124 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19333 , TFLOPS: 98.40270729049082, Tokens per sec: 80407.38523809759, Loss: 2.2489140033721924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19334 , TFLOPS: 95.86391115935484, Tokens per sec: 78332.86956491986, Loss: 2.2581794261932373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19335 , TFLOPS: 97.75496015894836, Tokens per sec: 79878.09438241967, Loss: 2.2713088989257812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19336 , TFLOPS: 97.24921364722867, Tokens per sec: 79464.83588862009, Loss: 2.2420473098754883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19337 , TFLOPS: 95.90718806291314, Tokens per sec: 78368.23223686396, Loss: 2.254537343978882 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19338 , TFLOPS: 97.89373326139903, Tokens per sec: 79991.48945676957, Loss: 2.246169090270996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19339 , TFLOPS: 95.46322209489796, Tokens per sec: 78005.45621569801, Loss: 2.245781183242798 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19340 , TFLOPS: 97.74745660120635, Tokens per sec: 79871.9630322299, Loss: 2.247877359390259 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19341 , TFLOPS: 96.623441422082, Tokens per sec: 78953.50129464258, Loss: 2.2420079708099365 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19342 , TFLOPS: 97.20059020778237, Tokens per sec: 79425.10442456983, Loss: 2.2648401260375977 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19343 , TFLOPS: 97.26332817643183, Tokens per sec: 79476.36923377229, Loss: 2.2540128231048584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19344 , TFLOPS: 96.62177043273307, Tokens per sec: 78952.13588623048, Loss: 2.244840621948242 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19345 , TFLOPS: 96.63603161344044, Tokens per sec: 78963.78906410192, Loss: 2.254842519760132 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19346 , TFLOPS: 97.32538589259062, Tokens per sec: 79527.07819115337, Loss: 2.253868341445923 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19347 , TFLOPS: 96.82027366518867, Tokens per sec: 79114.33798739775, Loss: 2.2488930225372314 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19348 , TFLOPS: 96.66400500713658, Tokens per sec: 78986.64684419025, Loss: 2.238490104675293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19349 , TFLOPS: 97.740544474274, Tokens per sec: 79866.31495537952, Loss: 2.2404603958129883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19350 , TFLOPS: 97.35190462840156, Tokens per sec: 79548.74733283748, Loss: 2.2625207901000977 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19351 , TFLOPS: 97.78632531924899, Tokens per sec: 79903.72366231236, Loss: 2.2835168838500977 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19352 , TFLOPS: 97.89864336475904, Tokens per sec: 79995.50162862234, Loss: 2.229931354522705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19353 , TFLOPS: 96.66686460686496, Tokens per sec: 78988.98349674096, Loss: 2.265089273452759 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19354 , TFLOPS: 96.59861400619538, Tokens per sec: 78933.21417400708, Loss: 2.257751941680908 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19355 , TFLOPS: 97.89584340256737, Tokens per sec: 79993.21370743807, Loss: 2.2474379539489746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19356 , TFLOPS: 97.20033441905032, Tokens per sec: 79424.89541301229, Loss: 2.256836175918579 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19357 , TFLOPS: 98.31541838485666, Tokens per sec: 80336.05922628757, Loss: 2.2438313961029053 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19358 , TFLOPS: 97.12902478429098, Tokens per sec: 79366.62647477712, Loss: 2.240682363510132 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19359 , TFLOPS: 97.21576377528024, Tokens per sec: 79437.50313717472, Loss: 2.266035795211792 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19360 , TFLOPS: 98.33472623014018, Tokens per sec: 80351.8361636969, Loss: 2.2415833473205566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19361 , TFLOPS: 97.73214533306886, Tokens per sec: 79859.45180088766, Loss: 2.253516912460327 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19362 , TFLOPS: 97.76722654035373, Tokens per sec: 79888.11755843075, Loss: 2.2434051036834717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19363 , TFLOPS: 97.07723016667993, Tokens per sec: 79324.30375941505, Loss: 2.2536845207214355 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19364 , TFLOPS: 95.83106606187361, Tokens per sec: 78306.03099025993, Loss: 2.255573034286499 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19365 , TFLOPS: 98.4274780481047, Tokens per sec: 80427.6260617999, Loss: 2.252631664276123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19366 , TFLOPS: 96.39066247042686, Tokens per sec: 78763.29162098188, Loss: 2.2425267696380615 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19367 , TFLOPS: 97.84266191389058, Tokens per sec: 79949.75774402698, Loss: 2.2566213607788086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19368 , TFLOPS: 97.82429975326859, Tokens per sec: 79934.75354989868, Loss: 2.2640345096588135 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19369 , TFLOPS: 96.00822427973154, Tokens per sec: 78450.79153052987, Loss: 2.278766393661499 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19370 , TFLOPS: 97.09532615440075, Tokens per sec: 79339.09045681391, Loss: 2.2428359985351562 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19371 , TFLOPS: 97.09585673285679, Tokens per sec: 79339.52400612865, Loss: 2.248445987701416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19372 , TFLOPS: 95.93319228177783, Tokens per sec: 78389.48095350644, Loss: 2.234750509262085 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19373 , TFLOPS: 97.12715768556441, Tokens per sec: 79365.10082034423, Loss: 2.2730872631073 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19374 , TFLOPS: 97.20870951248199, Tokens per sec: 79431.73891744939, Loss: 2.2563178539276123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19375 , TFLOPS: 96.09543591947738, Tokens per sec: 78522.05440639354, Loss: 2.269573450088501 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19376 , TFLOPS: 97.23070873609352, Tokens per sec: 79449.71504937277, Loss: 2.2555112838745117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19377 , TFLOPS: 94.8443900876924, Tokens per sec: 77499.7926524573, Loss: 2.270664691925049 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19378 , TFLOPS: 97.83601582455856, Tokens per sec: 79944.32705334821, Loss: 2.25299334526062 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19379 , TFLOPS: 96.01221411326034, Tokens per sec: 78454.05172621371, Loss: 2.267402172088623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19380 , TFLOPS: 96.59966319429932, Tokens per sec: 78934.07149261532, Loss: 2.2651758193969727 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19381 , TFLOPS: 97.28224629654812, Tokens per sec: 79491.82771671505, Loss: 2.257140636444092 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19382 , TFLOPS: 96.71117637354108, Tokens per sec: 79025.19178197837, Loss: 2.2637453079223633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19383 , TFLOPS: 96.78936990078617, Tokens per sec: 79089.0857259708, Loss: 2.2426655292510986 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19384 , TFLOPS: 97.39501028523006, Tokens per sec: 79583.97007467037, Loss: 2.271953582763672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19385 , TFLOPS: 96.09220636840728, Tokens per sec: 78519.4154570781, Loss: 2.2748515605926514 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19386 , TFLOPS: 96.85744293140637, Tokens per sec: 79144.70995164626, Loss: 2.247601270675659 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19387 , TFLOPS: 97.81913067656838, Tokens per sec: 79930.52976426306, Loss: 2.252199649810791 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19388 , TFLOPS: 97.17253478290964, Tokens per sec: 79402.17961469543, Loss: 2.2594072818756104 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19389 , TFLOPS: 97.83965694516593, Tokens per sec: 79947.30230672727, Loss: 2.261235237121582 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19390 , TFLOPS: 96.65637856735107, Tokens per sec: 78980.41508391949, Loss: 2.2388622760772705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19391 , TFLOPS: 96.12193874852103, Tokens per sec: 78543.71055024861, Loss: 2.247018814086914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19392 , TFLOPS: 97.8691195645613, Tokens per sec: 79971.37697147054, Loss: 2.2450504302978516 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19393 , TFLOPS: 97.37867720110427, Tokens per sec: 79570.62389118019, Loss: 2.2475528717041016 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19394 , TFLOPS: 96.97545578994902, Tokens per sec: 79241.14129628272, Loss: 2.2405338287353516 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19395 , TFLOPS: 98.42095024022545, Tokens per sec: 80422.29202194106, Loss: 2.2605631351470947 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19396 , TFLOPS: 95.3520363430265, Tokens per sec: 77914.60347566796, Loss: 2.2555248737335205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19397 , TFLOPS: 98.33125333071354, Tokens per sec: 80348.998368175, Loss: 2.253214120864868 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19398 , TFLOPS: 97.79172342364407, Tokens per sec: 79908.13459237316, Loss: 2.263347864151001 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19399 , TFLOPS: 97.28708292278772, Tokens per sec: 79495.77984852126, Loss: 2.2435922622680664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19400 , TFLOPS: 97.17178229514252, Tokens per sec: 79401.56473757017, Loss: 2.2718141078948975 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019400 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019400/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019400/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019400/optimizer.pt +[rank0]:[2024-08-30 21:52:38,410] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007696528016822413, 'preprocessing_with_comm': 0.0015877909900154918, 'state_converting': 2.6522566500061657, : 2.6631661480059847}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019400/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019400/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019400/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019400/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019400/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019400, took 14.92s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017400 + eval ppl=7.609494686126709, eval loss=2.0293967723846436 +------------------------------------------------------------------ +iteration: 19401 , TFLOPS: 94.6784811742539, Tokens per sec: 77364.22420841131, Loss: 2.2459640502929688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19402 , TFLOPS: 96.80492642135258, Tokens per sec: 79101.79735938607, Loss: 2.2519493103027344 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19403 , TFLOPS: 96.53506576246372, Tokens per sec: 78881.28726817662, Loss: 2.244840383529663 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19404 , TFLOPS: 96.90806423419102, Tokens per sec: 79186.07392125986, Loss: 2.2646355628967285 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19405 , TFLOPS: 95.54211709481328, Tokens per sec: 78069.9233510668, Loss: 2.256089210510254 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19406 , TFLOPS: 97.27108310527078, Tokens per sec: 79482.70598575623, Loss: 2.2575156688690186 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19407 , TFLOPS: 98.39742363245664, Tokens per sec: 80403.06782510439, Loss: 2.2490828037261963 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19408 , TFLOPS: 97.70325397426583, Tokens per sec: 79835.84392787996, Loss: 2.242298126220703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19409 , TFLOPS: 97.78564351349355, Tokens per sec: 79903.16654127835, Loss: 2.2297568321228027 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19410 , TFLOPS: 96.49631295671671, Tokens per sec: 78849.62135301477, Loss: 2.2780184745788574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19411 , TFLOPS: 98.30880628332703, Tokens per sec: 80330.65630791709, Loss: 2.2570385932922363 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19412 , TFLOPS: 97.7148666742125, Tokens per sec: 79845.33296394389, Loss: 2.2609715461730957 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19413 , TFLOPS: 97.42195612010129, Tokens per sec: 79605.98820999115, Loss: 2.268902063369751 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19414 , TFLOPS: 96.05282987422908, Tokens per sec: 78487.23991004397, Loss: 2.257310390472412 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19415 , TFLOPS: 97.14032107477843, Tokens per sec: 79375.8569645268, Loss: 2.267947196960449 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19416 , TFLOPS: 97.8366387235129, Tokens per sec: 79944.83604011865, Loss: 2.2416999340057373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19417 , TFLOPS: 98.39315520196081, Tokens per sec: 80399.57998066708, Loss: 2.264751434326172 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19418 , TFLOPS: 97.7118328193205, Tokens per sec: 79842.85392301326, Loss: 2.255845308303833 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19419 , TFLOPS: 96.4456472272907, Tokens per sec: 78808.22108124888, Loss: 2.235929012298584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19420 , TFLOPS: 95.51422758063383, Tokens per sec: 78047.1341110908, Loss: 2.2519240379333496 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19421 , TFLOPS: 98.39432077560663, Tokens per sec: 80400.53240089762, Loss: 2.2585604190826416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19422 , TFLOPS: 97.83669291979226, Tokens per sec: 79944.88032529365, Loss: 2.2509660720825195 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19423 , TFLOPS: 96.32589990352393, Tokens per sec: 78710.3724604287, Loss: 2.254631757736206 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19424 , TFLOPS: 96.7407871244877, Tokens per sec: 79049.38748882539, Loss: 2.2493042945861816 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19425 , TFLOPS: 97.10591239579925, Tokens per sec: 79347.74074717471, Loss: 2.2546539306640625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19426 , TFLOPS: 97.20359402819867, Tokens per sec: 79427.5589235572, Loss: 2.256510019302368 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19427 , TFLOPS: 96.77088938885973, Tokens per sec: 79073.98482394501, Loss: 2.2534258365631104 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19428 , TFLOPS: 97.91196127544806, Tokens per sec: 80006.38403627987, Loss: 2.259061336517334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19429 , TFLOPS: 97.12559129170454, Tokens per sec: 79363.82087959879, Loss: 2.2628023624420166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19430 , TFLOPS: 95.22775414740288, Tokens per sec: 77813.04929431557, Loss: 2.283696174621582 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19431 , TFLOPS: 96.8696553045052, Tokens per sec: 79154.68899607977, Loss: 2.263291597366333 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19432 , TFLOPS: 97.8329433074627, Tokens per sec: 79941.81642053556, Loss: 2.2615785598754883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19433 , TFLOPS: 94.34791433691763, Tokens per sec: 77094.10953607643, Loss: 2.248746395111084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19434 , TFLOPS: 97.86151223010503, Tokens per sec: 79965.16082265612, Loss: 2.2434496879577637 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19435 , TFLOPS: 95.6005192590612, Tokens per sec: 78117.64526287922, Loss: 2.2387399673461914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19436 , TFLOPS: 96.68933815832071, Tokens per sec: 79007.34721415614, Loss: 2.2488083839416504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19437 , TFLOPS: 95.94600011894964, Tokens per sec: 78399.94656696258, Loss: 2.25127911567688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19438 , TFLOPS: 96.84759915768286, Tokens per sec: 79136.66635073551, Loss: 2.2522804737091064 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19439 , TFLOPS: 94.3981665118475, Tokens per sec: 77135.1718818177, Loss: 2.255403995513916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19440 , TFLOPS: 96.86317035375042, Tokens per sec: 79149.38997588056, Loss: 2.2432632446289062 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19441 , TFLOPS: 97.40137862281868, Tokens per sec: 79589.17380724962, Loss: 2.2543177604675293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19442 , TFLOPS: 96.26431793174775, Tokens per sec: 78660.0522460294, Loss: 2.25512433052063 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19443 , TFLOPS: 96.31591793350614, Tokens per sec: 78702.2159357684, Loss: 2.251102924346924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19444 , TFLOPS: 96.77044565895244, Tokens per sec: 79073.62224081512, Loss: 2.25944185256958 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19445 , TFLOPS: 98.44783136485898, Tokens per sec: 80444.25728086081, Loss: 2.213024377822876 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19446 , TFLOPS: 97.71538632960528, Tokens per sec: 79845.75758774244, Loss: 2.2403101921081543 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19447 , TFLOPS: 97.82611057539542, Tokens per sec: 79936.23321927323, Loss: 2.2507503032684326 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19448 , TFLOPS: 96.52599096318916, Tokens per sec: 78873.87201608326, Loss: 2.2711925506591797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19449 , TFLOPS: 98.36454160570337, Tokens per sec: 80376.19907458562, Loss: 2.2504143714904785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19450 , TFLOPS: 97.11697641500689, Tokens per sec: 79356.78144208247, Loss: 2.256687641143799 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19451 , TFLOPS: 97.14104675765971, Tokens per sec: 79376.44993868984, Loss: 2.2596120834350586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19452 , TFLOPS: 96.0272486929818, Tokens per sec: 78466.33686832862, Loss: 2.2572860717773438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19453 , TFLOPS: 97.13604378046291, Tokens per sec: 79372.36187723425, Loss: 2.2595901489257812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19454 , TFLOPS: 97.91014192988244, Tokens per sec: 80004.89740218407, Loss: 2.2525806427001953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19455 , TFLOPS: 98.38365898180182, Tokens per sec: 80391.8203747207, Loss: 2.239746570587158 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19456 , TFLOPS: 97.82433601989871, Tokens per sec: 79934.78318429574, Loss: 2.2645435333251953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19457 , TFLOPS: 97.12129367575719, Tokens per sec: 79360.30918697773, Loss: 2.2417080402374268 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19458 , TFLOPS: 95.49068940635479, Tokens per sec: 78027.90046296087, Loss: 2.2325870990753174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19459 , TFLOPS: 98.36583323862935, Tokens per sec: 80377.25450109903, Loss: 2.2351021766662598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19460 , TFLOPS: 97.19945895164908, Tokens per sec: 79424.18004606219, Loss: 2.260993242263794 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19461 , TFLOPS: 96.2337220941226, Tokens per sec: 78635.05159949895, Loss: 2.256741523742676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19462 , TFLOPS: 96.72008967136459, Tokens per sec: 79032.47506707866, Loss: 2.240220785140991 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19463 , TFLOPS: 97.89058379968007, Tokens per sec: 79988.91595052465, Loss: 2.2613086700439453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19464 , TFLOPS: 97.08895790531888, Tokens per sec: 79333.88679655579, Loss: 2.237715482711792 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19465 , TFLOPS: 96.59009560372095, Tokens per sec: 78926.25356806205, Loss: 2.248849391937256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19466 , TFLOPS: 95.94437968805863, Tokens per sec: 78398.62247116794, Loss: 2.2559447288513184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19467 , TFLOPS: 97.15028683207447, Tokens per sec: 79384.00024135482, Loss: 2.253460645675659 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19468 , TFLOPS: 95.62812929335449, Tokens per sec: 78140.20613264578, Loss: 2.2611262798309326 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19469 , TFLOPS: 97.26281293496756, Tokens per sec: 79475.94821670835, Loss: 2.2549502849578857 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19470 , TFLOPS: 97.77782412211485, Tokens per sec: 79896.77711529378, Loss: 2.2473461627960205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19471 , TFLOPS: 94.15673363033937, Tokens per sec: 76937.89085930212, Loss: 2.226393222808838 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19472 , TFLOPS: 97.82596103894096, Tokens per sec: 79936.11102918687, Loss: 2.2459192276000977 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19473 , TFLOPS: 95.39281053515164, Tokens per sec: 77947.92111767449, Loss: 2.259979724884033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19474 , TFLOPS: 96.68580042721973, Tokens per sec: 79004.45644300422, Loss: 2.2450218200683594 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19475 , TFLOPS: 94.13928816427473, Tokens per sec: 76923.63571989446, Loss: 2.2561004161834717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19476 , TFLOPS: 96.78917949525261, Tokens per sec: 79088.93014070786, Loss: 2.2625744342803955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19477 , TFLOPS: 94.92183763154814, Tokens per sec: 77563.07703421896, Loss: 2.2405548095703125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19478 , TFLOPS: 96.18238811351564, Tokens per sec: 78593.1052824908, Loss: 2.246983528137207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19479 , TFLOPS: 97.28906110001715, Tokens per sec: 79497.39626805823, Loss: 2.2623002529144287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19480 , TFLOPS: 96.85285695890654, Tokens per sec: 79140.96263546322, Loss: 2.248094320297241 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19481 , TFLOPS: 95.40150000485298, Tokens per sec: 77955.0215070543, Loss: 2.2333874702453613 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19482 , TFLOPS: 97.90112138917722, Tokens per sec: 79997.52648616445, Loss: 2.2426090240478516 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19483 , TFLOPS: 97.79245725181852, Tokens per sec: 79908.7342222649, Loss: 2.2306299209594727 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19484 , TFLOPS: 98.36728057271232, Tokens per sec: 80378.43715503592, Loss: 2.2804744243621826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19485 , TFLOPS: 97.8307122615629, Tokens per sec: 79939.9933754986, Loss: 2.2717695236206055 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19486 , TFLOPS: 96.57178992120295, Tokens per sec: 78911.29552364639, Loss: 2.2319443225860596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19487 , TFLOPS: 98.47719154501478, Tokens per sec: 80468.24823986439, Loss: 2.2321109771728516 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19488 , TFLOPS: 97.05054913918528, Tokens per sec: 79302.50200501842, Loss: 2.254495143890381 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19489 , TFLOPS: 97.19679968896082, Tokens per sec: 79422.00709406412, Loss: 2.246953248977661 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19490 , TFLOPS: 95.8797883071093, Tokens per sec: 78345.84319106417, Loss: 2.265007972717285 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19491 , TFLOPS: 96.53373981686639, Tokens per sec: 78880.20380389616, Loss: 2.2425007820129395 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19492 , TFLOPS: 97.81423625021903, Tokens per sec: 79926.53040249927, Loss: 2.2490508556365967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19493 , TFLOPS: 97.86580780128489, Tokens per sec: 79968.670844445, Loss: 2.2519261837005615 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19494 , TFLOPS: 97.78803226347776, Tokens per sec: 79905.11845038235, Loss: 2.2689669132232666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19495 , TFLOPS: 97.1450478062249, Tokens per sec: 79379.7192984686, Loss: 2.2453389167785645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19496 , TFLOPS: 95.42020777030248, Tokens per sec: 77970.30810378383, Loss: 2.282465934753418 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19497 , TFLOPS: 98.42179409535987, Tokens per sec: 80422.98155769415, Loss: 2.2626140117645264 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19498 , TFLOPS: 97.81515496967678, Tokens per sec: 79927.28111181814, Loss: 2.2543859481811523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19499 , TFLOPS: 96.2385016456523, Tokens per sec: 78638.95709409048, Loss: 2.2513980865478516 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19500 , TFLOPS: 96.79553305624715, Tokens per sec: 79094.1217989517, Loss: 2.259824752807617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19501 , TFLOPS: 97.91953212798734, Tokens per sec: 80012.57037478079, Loss: 2.263099431991577 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19502 , TFLOPS: 96.4563239363718, Tokens per sec: 78816.94529508198, Loss: 2.278141975402832 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19503 , TFLOPS: 96.73462279101011, Tokens per sec: 79044.35045325679, Loss: 2.262587070465088 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19504 , TFLOPS: 97.83790211428565, Tokens per sec: 79945.86838924164, Loss: 2.2437660694122314 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19505 , TFLOPS: 97.14042556156267, Tokens per sec: 79375.94234336793, Loss: 2.259326219558716 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19506 , TFLOPS: 95.9190982785984, Tokens per sec: 78377.9643806966, Loss: 2.2481307983398438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19507 , TFLOPS: 97.25463673309119, Tokens per sec: 79469.26723168099, Loss: 2.2265915870666504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19508 , TFLOPS: 97.09296100476149, Tokens per sec: 79337.1578321593, Loss: 2.2571616172790527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19509 , TFLOPS: 94.98799412986695, Tokens per sec: 77617.13521201504, Loss: 2.2501487731933594 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19510 , TFLOPS: 97.87196295073934, Tokens per sec: 79973.7003755119, Loss: 2.258643627166748 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19511 , TFLOPS: 95.61081360621436, Tokens per sec: 78126.05703893813, Loss: 2.251060962677002 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19512 , TFLOPS: 96.81671917713413, Tokens per sec: 79111.43352370773, Loss: 2.256074905395508 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19513 , TFLOPS: 96.11940296033616, Tokens per sec: 78541.63849244575, Loss: 2.2483482360839844 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19514 , TFLOPS: 96.86578794051456, Tokens per sec: 79151.52887340821, Loss: 2.2522990703582764 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19515 , TFLOPS: 95.10130253195102, Tokens per sec: 77709.72242417568, Loss: 2.2673017978668213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19516 , TFLOPS: 96.13004250835061, Tokens per sec: 78550.33234101471, Loss: 2.27017879486084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19517 , TFLOPS: 97.36588241769215, Tokens per sec: 79560.16894428713, Loss: 2.2716641426086426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19518 , TFLOPS: 96.86338763870876, Tokens per sec: 79149.56752501344, Loss: 2.270887613296509 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19519 , TFLOPS: 96.315185431289, Tokens per sec: 78701.61738935047, Loss: 2.260327100753784 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19520 , TFLOPS: 97.08987015459321, Tokens per sec: 79334.63221892125, Loss: 2.2497754096984863 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19521 , TFLOPS: 98.38380588169444, Tokens per sec: 80391.94041040451, Loss: 2.2599058151245117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19522 , TFLOPS: 97.7148618335655, Tokens per sec: 79845.3290085266, Loss: 2.2633750438690186 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19523 , TFLOPS: 97.82720655281715, Tokens per sec: 79937.12877063749, Loss: 2.2382822036743164 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19524 , TFLOPS: 97.24300669585878, Tokens per sec: 79459.7640288746, Loss: 2.2496938705444336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19525 , TFLOPS: 97.75501674052383, Tokens per sec: 79878.14061668153, Loss: 2.246702194213867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19526 , TFLOPS: 97.87080587767299, Tokens per sec: 79972.75490132366, Loss: 2.2569446563720703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19527 , TFLOPS: 96.9637479707776, Tokens per sec: 79231.57453585166, Loss: 2.2465314865112305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19528 , TFLOPS: 95.98406404252027, Tokens per sec: 78431.04957876471, Loss: 2.275191068649292 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19529 , TFLOPS: 96.01748967790616, Tokens per sec: 78458.36252589092, Loss: 2.2556638717651367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19530 , TFLOPS: 97.79875564505343, Tokens per sec: 79913.88080151223, Loss: 2.2644784450531006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19531 , TFLOPS: 97.6880214641501, Tokens per sec: 79823.3970517447, Loss: 2.2554545402526855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19532 , TFLOPS: 97.77861482110215, Tokens per sec: 79897.4232157907, Loss: 2.259073257446289 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19533 , TFLOPS: 96.5570390027933, Tokens per sec: 78899.24216849144, Loss: 2.2502951622009277 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19534 , TFLOPS: 94.9508368131899, Tokens per sec: 77586.77301204379, Loss: 2.281010150909424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19535 , TFLOPS: 98.41263421137133, Tokens per sec: 80415.49678069071, Loss: 2.247067928314209 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19536 , TFLOPS: 97.77442093123761, Tokens per sec: 79893.99628042206, Loss: 2.2249560356140137 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19537 , TFLOPS: 96.25953426360641, Tokens per sec: 78656.14338764854, Loss: 2.252969741821289 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19538 , TFLOPS: 96.7246236898558, Tokens per sec: 79036.17993030386, Loss: 2.258249282836914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19539 , TFLOPS: 97.87572641359539, Tokens per sec: 79976.77559789173, Loss: 2.2660744190216064 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19540 , TFLOPS: 96.82876976890725, Tokens per sec: 79121.28037245537, Loss: 2.2294652462005615 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19541 , TFLOPS: 96.64391418449436, Tokens per sec: 78970.23011582551, Loss: 2.236175537109375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19542 , TFLOPS: 97.85557977231596, Tokens per sec: 79960.3132586816, Loss: 2.2487757205963135 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19543 , TFLOPS: 96.50832758863761, Tokens per sec: 78859.43881804148, Loss: 2.245100975036621 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19544 , TFLOPS: 96.03522273886699, Tokens per sec: 78472.85266649161, Loss: 2.264946937561035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19545 , TFLOPS: 97.32862760484093, Tokens per sec: 79529.7270776822, Loss: 2.279693126678467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19546 , TFLOPS: 97.82670780831116, Tokens per sec: 79936.72123366281, Loss: 2.253729820251465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19547 , TFLOPS: 94.45960830642944, Tokens per sec: 77185.3775538231, Loss: 2.2605509757995605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19548 , TFLOPS: 97.85613875454582, Tokens per sec: 79960.77001745111, Loss: 2.2445895671844482 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19549 , TFLOPS: 95.02692047872985, Tokens per sec: 77648.94293372428, Loss: 2.2433300018310547 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19550 , TFLOPS: 96.87498507885574, Tokens per sec: 79159.0440919022, Loss: 2.2547731399536133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19551 , TFLOPS: 96.69901362047355, Tokens per sec: 79015.25328334977, Loss: 2.271939516067505 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19552 , TFLOPS: 96.7519923696171, Tokens per sec: 79058.5435830693, Loss: 2.2773327827453613 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19553 , TFLOPS: 94.54248278759219, Tokens per sec: 77253.09642576012, Loss: 2.2425835132598877 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19554 , TFLOPS: 96.11149760214481, Tokens per sec: 78535.17882076558, Loss: 2.258410930633545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19555 , TFLOPS: 97.35465964847566, Tokens per sec: 79550.99853066057, Loss: 2.2811691761016846 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19556 , TFLOPS: 97.0032969663339, Tokens per sec: 79263.89103820243, Loss: 2.2690844535827637 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19557 , TFLOPS: 96.31267260412312, Tokens per sec: 78699.56409358732, Loss: 2.2397260665893555 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19558 , TFLOPS: 97.27833497460227, Tokens per sec: 79488.63167486672, Loss: 2.2449920177459717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19559 , TFLOPS: 98.45347361005669, Tokens per sec: 80448.86770465618, Loss: 2.238264322280884 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19560 , TFLOPS: 98.42991482895273, Tokens per sec: 80429.61721815915, Loss: 2.233333110809326 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19561 , TFLOPS: 97.09627122847547, Tokens per sec: 79339.86270116875, Loss: 2.2483949661254883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19562 , TFLOPS: 97.10020281556277, Tokens per sec: 79343.07530218581, Loss: 2.2343480587005615 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19563 , TFLOPS: 98.37110940634363, Tokens per sec: 80381.56579355901, Loss: 2.2558140754699707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19564 , TFLOPS: 97.09520161946742, Tokens per sec: 79338.98869611407, Loss: 2.246490478515625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19565 , TFLOPS: 97.81820130478141, Tokens per sec: 79929.77035065177, Loss: 2.234205484390259 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19566 , TFLOPS: 95.32014091177076, Tokens per sec: 77888.54089772746, Loss: 2.2546603679656982 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19567 , TFLOPS: 97.21742023163002, Tokens per sec: 79438.8566702991, Loss: 2.2744195461273193 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19568 , TFLOPS: 97.8747215259372, Tokens per sec: 79975.9544783181, Loss: 2.258685350418091 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19569 , TFLOPS: 97.68552359629933, Tokens per sec: 79821.355979623, Loss: 2.2605388164520264 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19570 , TFLOPS: 97.78679020641839, Tokens per sec: 79904.10353358575, Loss: 2.2583680152893066 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19571 , TFLOPS: 96.42627814241003, Tokens per sec: 78792.39410338827, Loss: 2.24576473236084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19572 , TFLOPS: 94.99085952023532, Tokens per sec: 77619.47659624674, Loss: 2.259136199951172 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19573 , TFLOPS: 98.4230768609242, Tokens per sec: 80424.02973845809, Loss: 2.2609381675720215 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19574 , TFLOPS: 97.22197804561034, Tokens per sec: 79442.580977431, Loss: 2.2357399463653564 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19575 , TFLOPS: 96.21677553850346, Tokens per sec: 78621.2041326594, Loss: 2.245368003845215 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19576 , TFLOPS: 96.13066290687964, Tokens per sec: 78550.83928462325, Loss: 2.262094736099243 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19577 , TFLOPS: 98.01275498550787, Tokens per sec: 80088.74517143065, Loss: 2.2673301696777344 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19578 , TFLOPS: 97.15389426679056, Tokens per sec: 79386.94796912496, Loss: 2.2662477493286133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19579 , TFLOPS: 96.78151702745261, Tokens per sec: 79082.668941019, Loss: 2.243962287902832 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19580 , TFLOPS: 97.86330644645899, Tokens per sec: 79966.62692302615, Loss: 2.2636911869049072 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19581 , TFLOPS: 97.19995841162326, Tokens per sec: 79424.58816766435, Loss: 2.2371859550476074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19582 , TFLOPS: 95.95624250293311, Tokens per sec: 78408.31588257788, Loss: 2.246868133544922 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19583 , TFLOPS: 97.27448952304773, Tokens per sec: 79485.48945741077, Loss: 2.2560362815856934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19584 , TFLOPS: 96.96393651083834, Tokens per sec: 79231.72859678874, Loss: 2.2412538528442383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19585 , TFLOPS: 95.04538560040051, Tokens per sec: 77664.03125997588, Loss: 2.256460189819336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19586 , TFLOPS: 97.6743902921841, Tokens per sec: 79812.2586702337, Loss: 2.2738590240478516 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19587 , TFLOPS: 95.5306523801408, Tokens per sec: 78060.55523757989, Loss: 2.253591537475586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19588 , TFLOPS: 96.79300117691243, Tokens per sec: 79092.05293517093, Loss: 2.257599115371704 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19589 , TFLOPS: 96.05289675916016, Tokens per sec: 78487.29456344293, Loss: 2.2441060543060303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19590 , TFLOPS: 96.90485443560864, Tokens per sec: 79183.45111221063, Loss: 2.2459847927093506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19591 , TFLOPS: 95.00539532309845, Tokens per sec: 77631.35417495106, Loss: 2.2602710723876953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19592 , TFLOPS: 96.09931549199287, Tokens per sec: 78525.22450496502, Loss: 2.262648820877075 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19593 , TFLOPS: 97.34769583157818, Tokens per sec: 79545.30821660903, Loss: 2.243544816970825 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19594 , TFLOPS: 96.88362131623516, Tokens per sec: 79166.10098378145, Loss: 2.2373712062835693 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19595 , TFLOPS: 96.23077632481784, Tokens per sec: 78632.6445355692, Loss: 2.254347562789917 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19596 , TFLOPS: 97.16963079830144, Tokens per sec: 79399.80669411685, Loss: 2.25209379196167 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19597 , TFLOPS: 98.35260985558452, Tokens per sec: 80366.44933441294, Loss: 2.2412526607513428 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19598 , TFLOPS: 97.8074670214483, Tokens per sec: 79920.99909141526, Loss: 2.243647575378418 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19599 , TFLOPS: 97.87307391622457, Tokens per sec: 79974.60817400865, Loss: 2.2515711784362793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19600 , TFLOPS: 97.22500395222153, Tokens per sec: 79445.05352361676, Loss: 2.2584235668182373 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019600 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019600/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019600/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019600/optimizer.pt +[rank0]:[2024-08-30 22:48:13,072] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007695652020629495, 'preprocessing_with_comm': 0.0017153630033135414, 'state_converting': 2.6433394869964104, : 2.6543620610027574}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019600/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019600/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019600/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019600/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019600/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019600, took 15.02s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017600 + eval ppl=8.026680946350098, eval loss=2.082771062850952 +------------------------------------------------------------------ +iteration: 19601 , TFLOPS: 94.80019607832749, Tokens per sec: 77463.6805897502, Loss: 2.249401569366455 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19602 , TFLOPS: 97.13730764744969, Tokens per sec: 79373.39461548397, Loss: 2.25602388381958 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19603 , TFLOPS: 96.57973338821107, Tokens per sec: 78917.78633502175, Loss: 2.2555980682373047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19604 , TFLOPS: 97.3445034325012, Tokens per sec: 79542.69962514356, Loss: 2.262439012527466 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19605 , TFLOPS: 95.85184069161893, Tokens per sec: 78323.00647501137, Loss: 2.2458200454711914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19606 , TFLOPS: 96.94323804262751, Tokens per sec: 79214.8153455877, Loss: 2.27530837059021 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19607 , TFLOPS: 97.36988593016198, Tokens per sec: 79563.4403173858, Loss: 2.232863664627075 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19608 , TFLOPS: 96.85614708565214, Tokens per sec: 79143.65108272244, Loss: 2.2394044399261475 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19609 , TFLOPS: 98.41485738616078, Tokens per sec: 80417.31339404079, Loss: 2.247736930847168 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19610 , TFLOPS: 97.05583956640254, Tokens per sec: 79306.82494928536, Loss: 2.241377115249634 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19611 , TFLOPS: 98.3442791805846, Tokens per sec: 80359.64212541986, Loss: 2.2612946033477783 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19612 , TFLOPS: 98.1827482844695, Tokens per sec: 80227.65107202902, Loss: 2.249122142791748 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19613 , TFLOPS: 98.26787311102184, Tokens per sec: 80297.20875911291, Loss: 2.240334987640381 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19614 , TFLOPS: 97.81298375539058, Tokens per sec: 79925.50695673285, Loss: 2.2639734745025635 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19615 , TFLOPS: 97.05486570537957, Tokens per sec: 79306.02918237408, Loss: 2.26362681388855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19616 , TFLOPS: 95.8222482841457, Tokens per sec: 78298.82575708865, Loss: 2.233959674835205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19617 , TFLOPS: 98.42027864403339, Tokens per sec: 80421.7432434041, Loss: 2.263462543487549 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19618 , TFLOPS: 98.35926007512572, Tokens per sec: 80371.88339999193, Loss: 2.2803540229797363 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19619 , TFLOPS: 97.21115967734147, Tokens per sec: 79433.74101022893, Loss: 2.254737138748169 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19620 , TFLOPS: 97.82715528168787, Tokens per sec: 79937.08687567791, Loss: 2.282567262649536 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19621 , TFLOPS: 97.7033401455537, Tokens per sec: 79835.91434065753, Loss: 2.250089645385742 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19622 , TFLOPS: 95.51641451941015, Tokens per sec: 78048.92111506175, Loss: 2.2592246532440186 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19623 , TFLOPS: 97.83657945995606, Tokens per sec: 79944.78761434078, Loss: 2.264761209487915 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19624 , TFLOPS: 98.3730830214359, Tokens per sec: 80383.17848525627, Loss: 2.2662785053253174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19625 , TFLOPS: 96.51644120302103, Tokens per sec: 78866.06866121718, Loss: 2.263172149658203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19626 , TFLOPS: 96.05005210906513, Tokens per sec: 78484.97012662247, Loss: 2.2301177978515625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19627 , TFLOPS: 98.42699332877343, Tokens per sec: 80427.22998515655, Loss: 2.2449822425842285 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19628 , TFLOPS: 95.9877392827011, Tokens per sec: 78434.05271212556, Loss: 2.2514429092407227 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19629 , TFLOPS: 97.80595241899792, Tokens per sec: 79919.76147076371, Loss: 2.257479190826416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19630 , TFLOPS: 96.29368887265387, Tokens per sec: 78684.05199791907, Loss: 2.2622628211975098 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19631 , TFLOPS: 97.21731311273992, Tokens per sec: 79438.76914069652, Loss: 2.2553622722625732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19632 , TFLOPS: 96.66233385441582, Tokens per sec: 78985.28130228283, Loss: 2.263005495071411 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19633 , TFLOPS: 95.7948907345101, Tokens per sec: 78276.47119903543, Loss: 2.2542455196380615 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19634 , TFLOPS: 96.73608513205198, Tokens per sec: 79045.54536976611, Loss: 2.2577364444732666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19635 , TFLOPS: 94.75859902619253, Tokens per sec: 77429.69056764735, Loss: 2.2346627712249756 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19636 , TFLOPS: 97.85745382095241, Tokens per sec: 79961.84459206475, Loss: 2.2517623901367188 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19637 , TFLOPS: 95.07708327230259, Tokens per sec: 77689.93224365773, Loss: 2.263519763946533 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19638 , TFLOPS: 96.84826348806563, Tokens per sec: 79137.20919219265, Loss: 2.2537753582000732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19639 , TFLOPS: 95.55905333593994, Tokens per sec: 78083.76238966902, Loss: 2.2559359073638916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19640 , TFLOPS: 98.3910962624339, Tokens per sec: 80397.89756817782, Loss: 2.229149341583252 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19641 , TFLOPS: 95.97964095900842, Tokens per sec: 78427.43536336688, Loss: 2.2328386306762695 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19642 , TFLOPS: 97.76899916724811, Tokens per sec: 79889.56601749761, Loss: 2.249539613723755 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19643 , TFLOPS: 96.73330713971289, Tokens per sec: 79043.27540071396, Loss: 2.2685787677764893 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19644 , TFLOPS: 97.0609947401189, Tokens per sec: 79311.03737443498, Loss: 2.2610974311828613 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19645 , TFLOPS: 98.48488505827493, Tokens per sec: 80474.53480759791, Loss: 2.268264055252075 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19646 , TFLOPS: 95.4897824222967, Tokens per sec: 78027.15934293917, Loss: 2.250715970993042 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19647 , TFLOPS: 98.44093639573273, Tokens per sec: 80438.62322409547, Loss: 2.2422776222229004 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19648 , TFLOPS: 97.82959598093846, Tokens per sec: 79939.08123386477, Loss: 2.258131265640259 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19649 , TFLOPS: 98.4112522960213, Tokens per sec: 80414.36758208455, Loss: 2.26448917388916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19650 , TFLOPS: 98.41569476650079, Tokens per sec: 80417.9976390727, Loss: 2.246521472930908 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19651 , TFLOPS: 96.5760302480304, Tokens per sec: 78914.76040385282, Loss: 2.2774510383605957 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19652 , TFLOPS: 98.4695680528581, Tokens per sec: 80462.01888817702, Loss: 2.2678945064544678 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19653 , TFLOPS: 97.77860525212022, Tokens per sec: 79897.41539672922, Loss: 2.2611310482025146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19654 , TFLOPS: 95.58800887859037, Tokens per sec: 78107.42270895066, Loss: 2.2596607208251953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19655 , TFLOPS: 98.36131332556344, Tokens per sec: 80373.56116377997, Loss: 2.2698822021484375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19656 , TFLOPS: 98.36588989099432, Tokens per sec: 80377.30079320482, Loss: 2.2816002368927 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19657 , TFLOPS: 97.12150903008555, Tokens per sec: 79360.48515854315, Loss: 2.2283573150634766 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19658 , TFLOPS: 97.0796753981953, Tokens per sec: 79326.30182103213, Loss: 2.249171018600464 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19659 , TFLOPS: 97.75922917340148, Tokens per sec: 79881.58270402352, Loss: 2.262166976928711 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19660 , TFLOPS: 95.96264062637219, Tokens per sec: 78413.54395394232, Loss: 2.258265972137451 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19661 , TFLOPS: 97.81858099022102, Tokens per sec: 79930.08060139845, Loss: 2.2717535495758057 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19662 , TFLOPS: 96.69434692298867, Tokens per sec: 79011.4400047034, Loss: 2.251260280609131 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19663 , TFLOPS: 98.39393090950146, Tokens per sec: 80400.21383126703, Loss: 2.2461466789245605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19664 , TFLOPS: 95.03493982120091, Tokens per sec: 77655.49574489362, Loss: 2.2480807304382324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19665 , TFLOPS: 97.88898841239397, Tokens per sec: 79987.61231850425, Loss: 2.2707712650299072 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19666 , TFLOPS: 96.50719169211979, Tokens per sec: 78858.5106477562, Loss: 2.252958297729492 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19667 , TFLOPS: 96.79872653622671, Tokens per sec: 79096.7312736505, Loss: 2.2542898654937744 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19668 , TFLOPS: 97.33164713237684, Tokens per sec: 79532.19441136134, Loss: 2.2830679416656494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19669 , TFLOPS: 96.16680546375338, Tokens per sec: 78580.37229823697, Loss: 2.2607429027557373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19670 , TFLOPS: 96.78581885420427, Tokens per sec: 79086.18407439665, Loss: 2.2473928928375244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19671 , TFLOPS: 96.61726494667619, Tokens per sec: 78948.45433759187, Loss: 2.2521026134490967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19672 , TFLOPS: 97.10878945011335, Tokens per sec: 79350.09166231674, Loss: 2.2382683753967285 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19673 , TFLOPS: 96.65258023928428, Tokens per sec: 78977.31137227846, Loss: 2.2566823959350586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19674 , TFLOPS: 97.92347024954485, Tokens per sec: 80015.7883152817, Loss: 2.246147394180298 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19675 , TFLOPS: 96.08123873014317, Tokens per sec: 78510.4535175204, Loss: 2.251633882522583 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19676 , TFLOPS: 97.89362243236268, Tokens per sec: 79991.39889551097, Loss: 2.2638556957244873 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19677 , TFLOPS: 95.42876858413723, Tokens per sec: 77977.30336514315, Loss: 2.249983549118042 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19678 , TFLOPS: 97.0097346267652, Tokens per sec: 79269.15141626097, Loss: 2.2439708709716797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19679 , TFLOPS: 95.85396106541748, Tokens per sec: 78324.73908702545, Loss: 2.257983446121216 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19680 , TFLOPS: 98.40123930852103, Tokens per sec: 80406.18571224084, Loss: 2.2543702125549316 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19681 , TFLOPS: 96.65471781955407, Tokens per sec: 78979.05804414311, Loss: 2.2417070865631104 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19682 , TFLOPS: 95.62676169262153, Tokens per sec: 78139.08863088171, Loss: 2.2551324367523193 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19683 , TFLOPS: 98.4441610562121, Tokens per sec: 80441.25817718329, Loss: 2.2521376609802246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19684 , TFLOPS: 96.59157019141571, Tokens per sec: 78927.4584916267, Loss: 2.2476532459259033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19685 , TFLOPS: 98.41167588473294, Tokens per sec: 80414.71370732498, Loss: 2.2584614753723145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19686 , TFLOPS: 96.67919596759937, Tokens per sec: 78999.05976903452, Loss: 2.2414047718048096 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19687 , TFLOPS: 98.44262537881914, Tokens per sec: 80440.00333565366, Loss: 2.243913412094116 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19688 , TFLOPS: 98.39296821994368, Tokens per sec: 80399.42719284754, Loss: 2.2602035999298096 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19689 , TFLOPS: 97.15495408806818, Tokens per sec: 79387.81397637313, Loss: 2.2483949661254883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19690 , TFLOPS: 98.34959151889144, Tokens per sec: 80363.98297380214, Loss: 2.2603068351745605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19691 , TFLOPS: 96.00369045522717, Tokens per sec: 78447.08682581632, Loss: 2.244645833969116 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19692 , TFLOPS: 97.17076620599197, Tokens per sec: 79400.73446496877, Loss: 2.2633676528930664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19693 , TFLOPS: 97.09258124217753, Tokens per sec: 79336.84751837599, Loss: 2.229529857635498 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19694 , TFLOPS: 97.7199212892971, Tokens per sec: 79849.46322004688, Loss: 2.257702589035034 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19695 , TFLOPS: 97.81210197210295, Tokens per sec: 79924.78642890941, Loss: 2.2493748664855957 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19696 , TFLOPS: 96.65855480147631, Tokens per sec: 78982.19334084447, Loss: 2.250567674636841 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19697 , TFLOPS: 97.22530395468651, Tokens per sec: 79445.29866335365, Loss: 2.228757619857788 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19698 , TFLOPS: 97.10090858548706, Tokens per sec: 79343.65200495909, Loss: 2.2588164806365967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19699 , TFLOPS: 95.61063262607203, Tokens per sec: 78125.90915540501, Loss: 2.248608350753784 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19700 , TFLOPS: 97.87952223678145, Tokens per sec: 79979.87726273022, Loss: 2.25557541847229 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19701 , TFLOPS: 97.80015736995739, Tokens per sec: 79915.02618701485, Loss: 2.255760431289673 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19702 , TFLOPS: 95.11437772319702, Tokens per sec: 77720.40649952815, Loss: 2.2568094730377197 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19703 , TFLOPS: 98.3383752292596, Tokens per sec: 80354.81785481097, Loss: 2.2388458251953125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19704 , TFLOPS: 95.9699444423768, Tokens per sec: 78419.51209001664, Loss: 2.245421886444092 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19705 , TFLOPS: 97.76056290360299, Tokens per sec: 79882.67252930444, Loss: 2.2768118381500244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19706 , TFLOPS: 97.25487048728547, Tokens per sec: 79469.45823825052, Loss: 2.2517454624176025 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19707 , TFLOPS: 97.13224624220673, Tokens per sec: 79369.25881096777, Loss: 2.252455234527588 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19708 , TFLOPS: 97.58665469031428, Tokens per sec: 79740.56765143055, Loss: 2.263673782348633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19709 , TFLOPS: 95.95626362907993, Tokens per sec: 78408.33314529629, Loss: 2.259579658508301 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19710 , TFLOPS: 96.65400966522816, Tokens per sec: 78978.4793930139, Loss: 2.2624106407165527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19711 , TFLOPS: 94.10663546255823, Tokens per sec: 76896.95435676658, Loss: 2.256871461868286 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19712 , TFLOPS: 97.24907329644853, Tokens per sec: 79464.72120438468, Loss: 2.269341468811035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19713 , TFLOPS: 97.3031125707079, Tokens per sec: 79508.87808647669, Loss: 2.2394633293151855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19714 , TFLOPS: 96.70710640141766, Tokens per sec: 79021.86610298592, Loss: 2.264482259750366 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19715 , TFLOPS: 96.04357725645704, Tokens per sec: 78479.67935787886, Loss: 2.2549304962158203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19716 , TFLOPS: 96.5816243028505, Tokens per sec: 78919.33145005011, Loss: 2.2502377033233643 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19717 , TFLOPS: 97.36287508864791, Tokens per sec: 79557.71157830955, Loss: 2.2491438388824463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19718 , TFLOPS: 97.68474435684122, Tokens per sec: 79820.71924300237, Loss: 2.2685999870300293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19719 , TFLOPS: 97.20663829341952, Tokens per sec: 79430.04647103552, Loss: 2.2658071517944336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19720 , TFLOPS: 95.63955872378257, Tokens per sec: 78149.54541446829, Loss: 2.2574727535247803 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19721 , TFLOPS: 98.47536332738116, Tokens per sec: 80466.75435617345, Loss: 2.269686460494995 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19722 , TFLOPS: 96.78889530004962, Tokens per sec: 79088.69791749165, Loss: 2.2671709060668945 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19723 , TFLOPS: 97.21399860969386, Tokens per sec: 79436.06077493468, Loss: 2.2513108253479004 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19724 , TFLOPS: 96.58199922502185, Tokens per sec: 78919.63780860769, Loss: 2.2461166381835938 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19725 , TFLOPS: 97.80740179931499, Tokens per sec: 79920.94579673116, Loss: 2.266455888748169 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19726 , TFLOPS: 98.38506855885501, Tokens per sec: 80392.97217641662, Loss: 2.2451095581054688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19727 , TFLOPS: 96.1395170877476, Tokens per sec: 78558.07426373735, Loss: 2.25750470161438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19728 , TFLOPS: 98.41242044002364, Tokens per sec: 80415.32210261955, Loss: 2.2375802993774414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19729 , TFLOPS: 97.29036724079357, Tokens per sec: 79498.46354930972, Loss: 2.236724615097046 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19730 , TFLOPS: 97.25398627907414, Tokens per sec: 79468.7357289595, Loss: 2.2629520893096924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19731 , TFLOPS: 96.77837791722408, Tokens per sec: 79080.10389324337, Loss: 2.2579495906829834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19732 , TFLOPS: 97.90966876449276, Tokens per sec: 80004.5107665638, Loss: 2.2408814430236816 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19733 , TFLOPS: 97.89225452327966, Tokens per sec: 79990.28114178614, Loss: 2.245873212814331 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19734 , TFLOPS: 95.34877869934668, Tokens per sec: 77911.94157115798, Loss: 2.267688274383545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19735 , TFLOPS: 97.17959734776014, Tokens per sec: 79407.95061824138, Loss: 2.265648603439331 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19736 , TFLOPS: 96.51668566542722, Tokens per sec: 78866.2684177423, Loss: 2.2652251720428467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19737 , TFLOPS: 96.9464976110541, Tokens per sec: 79217.47882285783, Loss: 2.2604594230651855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19738 , TFLOPS: 97.38118961078592, Tokens per sec: 79572.67684580626, Loss: 2.2465481758117676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19739 , TFLOPS: 97.42635784081583, Tokens per sec: 79609.58496929736, Loss: 2.2619130611419678 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19740 , TFLOPS: 95.64609419703893, Tokens per sec: 78154.88571790411, Loss: 2.233450412750244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19741 , TFLOPS: 97.1591531722147, Tokens per sec: 79391.2451561233, Loss: 2.248847007751465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19742 , TFLOPS: 97.2395700931029, Tokens per sec: 79456.95589229676, Loss: 2.2446181774139404 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19743 , TFLOPS: 97.79940482848191, Tokens per sec: 79914.41126600311, Loss: 2.254962205886841 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19744 , TFLOPS: 96.72979936008231, Tokens per sec: 79040.40910367954, Loss: 2.2616748809814453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19745 , TFLOPS: 96.823456983656, Tokens per sec: 79116.93915886332, Loss: 2.2675869464874268 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19746 , TFLOPS: 97.85269692336325, Tokens per sec: 79957.95760859121, Loss: 2.232815742492676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19747 , TFLOPS: 95.96243653448745, Tokens per sec: 78413.37718520951, Loss: 2.260310173034668 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19748 , TFLOPS: 97.1095173521192, Tokens per sec: 79350.68644978359, Loss: 2.2660837173461914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19749 , TFLOPS: 96.53488548562187, Tokens per sec: 78881.1399593284, Loss: 2.271592140197754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19750 , TFLOPS: 98.33333268695343, Tokens per sec: 80350.69746368601, Loss: 2.254995346069336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19751 , TFLOPS: 97.25518428203877, Tokens per sec: 79469.71464802123, Loss: 2.2621490955352783 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19752 , TFLOPS: 97.2668393168958, Tokens per sec: 79479.2382770302, Loss: 2.280404567718506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19753 , TFLOPS: 97.60154826404862, Tokens per sec: 79752.73757392357, Loss: 2.274733304977417 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19754 , TFLOPS: 96.51063404211273, Tokens per sec: 78861.3234805494, Loss: 2.2496941089630127 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19755 , TFLOPS: 95.9094027043778, Tokens per sec: 78370.04187741467, Loss: 2.243464946746826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19756 , TFLOPS: 98.49988610960474, Tokens per sec: 80486.79255280092, Loss: 2.2736334800720215 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19757 , TFLOPS: 97.89373866261583, Tokens per sec: 79991.49387024286, Loss: 2.2762393951416016 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19758 , TFLOPS: 95.99730230191312, Tokens per sec: 78441.86690129772, Loss: 2.2522566318511963 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19759 , TFLOPS: 97.8019888520238, Tokens per sec: 79916.52273815786, Loss: 2.2383365631103516 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19760 , TFLOPS: 97.17775490063985, Tokens per sec: 79406.44510727059, Loss: 2.2481637001037598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19761 , TFLOPS: 97.7235704868277, Tokens per sec: 79852.44507328782, Loss: 2.26484751701355 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19762 , TFLOPS: 96.11540673913595, Tokens per sec: 78538.37307723118, Loss: 2.2690200805664062 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19763 , TFLOPS: 97.76508665763507, Tokens per sec: 79886.36900517585, Loss: 2.2813026905059814 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19764 , TFLOPS: 97.6864131609002, Tokens per sec: 79822.08286575781, Loss: 2.258042335510254 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19765 , TFLOPS: 96.13013679289206, Tokens per sec: 78550.40938334062, Loss: 2.2377867698669434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19766 , TFLOPS: 97.65265881834061, Tokens per sec: 79794.50132353828, Loss: 2.2632086277008057 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19767 , TFLOPS: 96.5645209573146, Tokens per sec: 78905.35586613345, Loss: 2.2500295639038086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19768 , TFLOPS: 96.54462435771906, Tokens per sec: 78889.09784241988, Loss: 2.2440619468688965 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19769 , TFLOPS: 97.3926074797813, Tokens per sec: 79582.00668048448, Loss: 2.2504637241363525 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19770 , TFLOPS: 97.00326461700686, Tokens per sec: 79263.86460473457, Loss: 2.2451844215393066 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19771 , TFLOPS: 97.83576956333607, Tokens per sec: 79944.125826964, Loss: 2.2536048889160156 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19772 , TFLOPS: 96.13623221568275, Tokens per sec: 78555.39011021303, Loss: 2.2476933002471924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19773 , TFLOPS: 97.21790342371199, Tokens per sec: 79439.25149898774, Loss: 2.254962682723999 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19774 , TFLOPS: 96.58175685424095, Tokens per sec: 78919.43976120342, Loss: 2.269718647003174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19775 , TFLOPS: 96.42228308688017, Tokens per sec: 78789.12964067295, Loss: 2.26213002204895 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19776 , TFLOPS: 96.23655926837392, Tokens per sec: 78637.36992761509, Loss: 2.2833046913146973 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19777 , TFLOPS: 97.39957094111068, Tokens per sec: 79587.69670399216, Loss: 2.2566580772399902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19778 , TFLOPS: 95.4550149722015, Tokens per sec: 77998.7499644726, Loss: 2.2431082725524902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19779 , TFLOPS: 97.88983719473062, Tokens per sec: 79988.30588040114, Loss: 2.2500483989715576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19780 , TFLOPS: 97.56820228256834, Tokens per sec: 79725.48971405411, Loss: 2.2407796382904053 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19781 , TFLOPS: 97.75525707242969, Tokens per sec: 79878.33699806848, Loss: 2.2589468955993652 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19782 , TFLOPS: 96.655391026717, Tokens per sec: 78979.60813904583, Loss: 2.2697718143463135 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19783 , TFLOPS: 97.30001736517536, Tokens per sec: 79506.34891435837, Loss: 2.2496695518493652 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19784 , TFLOPS: 98.32675438835652, Tokens per sec: 80345.32216656201, Loss: 2.2583982944488525 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19785 , TFLOPS: 96.72002871160417, Tokens per sec: 79032.42525528918, Loss: 2.2531003952026367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19786 , TFLOPS: 97.73429674879172, Tokens per sec: 79861.20977805718, Loss: 2.2663238048553467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19787 , TFLOPS: 95.86440319254734, Tokens per sec: 78333.27161790752, Loss: 2.266961097717285 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19788 , TFLOPS: 98.32803132973905, Tokens per sec: 80346.36558823707, Loss: 2.2511963844299316 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19789 , TFLOPS: 97.26694849546836, Tokens per sec: 79479.32748965234, Loss: 2.2515828609466553 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19790 , TFLOPS: 97.76508418726597, Tokens per sec: 79886.36698657366, Loss: 2.2532808780670166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19791 , TFLOPS: 95.88871976129964, Tokens per sec: 78353.1413121992, Loss: 2.26733136177063 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19792 , TFLOPS: 97.8141015099697, Tokens per sec: 79926.42030277303, Loss: 2.2709319591522217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19793 , TFLOPS: 97.30702572616686, Tokens per sec: 79512.07562653578, Loss: 2.259046792984009 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19794 , TFLOPS: 96.90092229464176, Tokens per sec: 79180.23805860429, Loss: 2.254359245300293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19795 , TFLOPS: 97.25255056195986, Tokens per sec: 79467.56256754677, Loss: 2.2624874114990234 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19796 , TFLOPS: 97.90225346253763, Tokens per sec: 79998.45153244941, Loss: 2.236478805541992 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19797 , TFLOPS: 96.41460831979502, Tokens per sec: 78782.8583909216, Loss: 2.2547898292541504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19798 , TFLOPS: 97.2329892930968, Tokens per sec: 79451.5785512069, Loss: 2.264045238494873 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19799 , TFLOPS: 97.16389066813593, Tokens per sec: 79395.11628599453, Loss: 2.230651378631592 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19800 , TFLOPS: 96.61478811404814, Tokens per sec: 78946.43045389214, Loss: 2.2728521823883057 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019800 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019800/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019800/model.pt +[rank0]:[2024-08-30 23:43:41,603] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.0077224079868756235, 'preprocessing_with_comm': 0.0017020160157699138, 'state_converting': 2.6637111969757825, : 2.6747822430043016}) +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019800/optimizer.pt +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019800/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019800/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019800/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019800/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019800/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019800, took 15.05s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0017800 + eval ppl=7.568350315093994, eval loss=2.023975133895874 +------------------------------------------------------------------ +iteration: 19801 , TFLOPS: 97.57753691276562, Tokens per sec: 79733.11728068307, Loss: 2.2420265674591064 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19802 , TFLOPS: 96.1628661989314, Tokens per sec: 78577.15342354521, Loss: 2.270245313644409 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19803 , TFLOPS: 96.85934214731235, Tokens per sec: 79146.26184985321, Loss: 2.231825351715088 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19804 , TFLOPS: 96.79412757235974, Tokens per sec: 79092.97334188693, Loss: 2.2554450035095215 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19805 , TFLOPS: 96.27894786960108, Tokens per sec: 78672.00674485747, Loss: 2.2570221424102783 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19806 , TFLOPS: 98.41470682228694, Tokens per sec: 80417.19036442362, Loss: 2.2367992401123047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19807 , TFLOPS: 97.46963806346368, Tokens per sec: 79644.95035335499, Loss: 2.257761001586914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19808 , TFLOPS: 97.03431777967188, Tokens per sec: 79289.23894332767, Loss: 2.2465555667877197 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19809 , TFLOPS: 97.72038201838423, Tokens per sec: 79849.83969364419, Loss: 2.2499523162841797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19810 , TFLOPS: 97.63330270870674, Tokens per sec: 79778.68494808587, Loss: 2.2550466060638428 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19811 , TFLOPS: 97.17514041654186, Tokens per sec: 79404.30874501131, Loss: 2.2554237842559814 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19812 , TFLOPS: 98.36522946537136, Tokens per sec: 80376.76114242742, Loss: 2.2426233291625977 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19813 , TFLOPS: 96.90159880734585, Tokens per sec: 79180.79085454978, Loss: 2.2428736686706543 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19814 , TFLOPS: 96.99446741468445, Tokens per sec: 79256.6761842568, Loss: 2.2518656253814697 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19815 , TFLOPS: 97.74291245114742, Tokens per sec: 79868.24989023994, Loss: 2.2540531158447266 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19816 , TFLOPS: 97.21141362186992, Tokens per sec: 79433.94851484023, Loss: 2.2592904567718506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19817 , TFLOPS: 97.07889631495607, Tokens per sec: 79325.66521206197, Loss: 2.2446682453155518 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19818 , TFLOPS: 97.02985216387182, Tokens per sec: 79285.58997370227, Loss: 2.2778713703155518 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19819 , TFLOPS: 97.07573566004373, Tokens per sec: 79323.08255957061, Loss: 2.2553534507751465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19820 , TFLOPS: 98.35772359297059, Tokens per sec: 80370.6279008704, Loss: 2.25821590423584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19821 , TFLOPS: 95.4502572307884, Tokens per sec: 77994.86229148878, Loss: 2.2477738857269287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19822 , TFLOPS: 97.75721977199835, Tokens per sec: 79879.94077041042, Loss: 2.2568492889404297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19823 , TFLOPS: 96.265036720557, Tokens per sec: 78660.63958686873, Loss: 2.262305974960327 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19824 , TFLOPS: 95.9628254883071, Tokens per sec: 78413.6950093881, Loss: 2.2630772590637207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19825 , TFLOPS: 97.76792100756327, Tokens per sec: 79888.68502546485, Loss: 2.243516683578491 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19826 , TFLOPS: 97.39004996981798, Tokens per sec: 79579.91687325729, Loss: 2.2457361221313477 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19827 , TFLOPS: 95.45449209647194, Tokens per sec: 77998.32270925395, Loss: 2.245889663696289 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19828 , TFLOPS: 97.87035730101364, Tokens per sec: 79972.38835778792, Loss: 2.231407642364502 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19829 , TFLOPS: 96.05813733247982, Tokens per sec: 78491.57677080677, Loss: 2.243107318878174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19830 , TFLOPS: 97.23007383384392, Tokens per sec: 79449.19625440078, Loss: 2.2690513134002686 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19831 , TFLOPS: 97.53028204607597, Tokens per sec: 79694.50411266247, Loss: 2.273259401321411 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19832 , TFLOPS: 96.58622423816199, Tokens per sec: 78923.090175606, Loss: 2.2476329803466797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19833 , TFLOPS: 96.93498122480851, Tokens per sec: 79208.0684871984, Loss: 2.266629934310913 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19834 , TFLOPS: 96.18654665950194, Tokens per sec: 78596.5033374667, Loss: 2.236593008041382 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19835 , TFLOPS: 97.80479437416777, Tokens per sec: 79918.81520252227, Loss: 2.27103853225708 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19836 , TFLOPS: 95.48486006781195, Tokens per sec: 78023.13716037701, Loss: 2.2691752910614014 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19837 , TFLOPS: 97.18918577597425, Tokens per sec: 79415.78556976322, Loss: 2.2562785148620605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19838 , TFLOPS: 96.65363542904892, Tokens per sec: 78978.17359499814, Loss: 2.2386534214019775 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19839 , TFLOPS: 97.95473932542696, Tokens per sec: 80041.33908212259, Loss: 2.2450942993164062 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19840 , TFLOPS: 96.28476522588042, Tokens per sec: 78676.76025642539, Loss: 2.235168695449829 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19841 , TFLOPS: 97.29960962464233, Tokens per sec: 79506.01573907265, Loss: 2.2574238777160645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19842 , TFLOPS: 96.61855850124908, Tokens per sec: 78949.51133433245, Loss: 2.274383544921875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19843 , TFLOPS: 96.97459307222262, Tokens per sec: 79240.43634742014, Loss: 2.2541208267211914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19844 , TFLOPS: 98.38457867494186, Tokens per sec: 80392.57187966045, Loss: 2.2647011280059814 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19845 , TFLOPS: 96.04065399670989, Tokens per sec: 78477.29068708807, Loss: 2.2723050117492676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19846 , TFLOPS: 97.2960561204638, Tokens per sec: 79503.11207933305, Loss: 2.2566184997558594 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19847 , TFLOPS: 98.39888100793354, Tokens per sec: 80404.25868412275, Loss: 2.2473080158233643 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19848 , TFLOPS: 97.74618025818025, Tokens per sec: 79870.92009948728, Loss: 2.2493112087249756 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19849 , TFLOPS: 97.2722090401268, Tokens per sec: 79483.62601611123, Loss: 2.2620787620544434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19850 , TFLOPS: 97.81659909892933, Tokens per sec: 79928.4611470056, Loss: 2.2442197799682617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19851 , TFLOPS: 97.10512297229593, Tokens per sec: 79347.0956889086, Loss: 2.246845006942749 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19852 , TFLOPS: 97.19696679548211, Tokens per sec: 79422.14364110437, Loss: 2.2551686763763428 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19853 , TFLOPS: 97.76209583893653, Tokens per sec: 79883.92513022675, Loss: 2.2340476512908936 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19854 , TFLOPS: 97.17755867623654, Tokens per sec: 79406.28476725936, Loss: 2.2679367065429688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19855 , TFLOPS: 97.18901941992914, Tokens per sec: 79415.64963595638, Loss: 2.2700307369232178 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19856 , TFLOPS: 97.12907517354633, Tokens per sec: 79366.66764913479, Loss: 2.254380702972412 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19857 , TFLOPS: 97.20483893323717, Tokens per sec: 79428.57616751088, Loss: 2.276906967163086 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19858 , TFLOPS: 97.89613154487238, Tokens per sec: 79993.44915593295, Loss: 2.242997407913208 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19859 , TFLOPS: 96.04995942938108, Tokens per sec: 78484.89439566684, Loss: 2.27642560005188 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19860 , TFLOPS: 97.10317639248123, Tokens per sec: 79345.50508843258, Loss: 2.237372875213623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19861 , TFLOPS: 96.62790651067333, Tokens per sec: 78957.14983347217, Loss: 2.2233877182006836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19862 , TFLOPS: 97.3068884109056, Tokens per sec: 79511.96342270101, Loss: 2.260303020477295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19863 , TFLOPS: 97.11289138851178, Tokens per sec: 79353.44346177539, Loss: 2.253382682800293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19864 , TFLOPS: 97.58742166588952, Tokens per sec: 79741.1943669168, Loss: 2.266176462173462 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19865 , TFLOPS: 96.19963763488511, Tokens per sec: 78607.20031044396, Loss: 2.2748680114746094 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19866 , TFLOPS: 97.69939985376773, Tokens per sec: 79832.69462680709, Loss: 2.2547099590301514 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19867 , TFLOPS: 97.12821955579984, Tokens per sec: 79365.96850184859, Loss: 2.2536373138427734 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19868 , TFLOPS: 96.56790072321895, Tokens per sec: 78908.11755985682, Loss: 2.263786792755127 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19869 , TFLOPS: 97.81873306686352, Tokens per sec: 79930.20486713784, Loss: 2.2353971004486084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19870 , TFLOPS: 96.0088923921993, Tokens per sec: 78451.33746242602, Loss: 2.2517805099487305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19871 , TFLOPS: 97.78389803374002, Tokens per sec: 79901.7402648391, Loss: 2.260655164718628 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19872 , TFLOPS: 96.09113266955897, Tokens per sec: 78518.5381101097, Loss: 2.2571020126342773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19873 , TFLOPS: 97.83351465701806, Tokens per sec: 79942.28328496509, Loss: 2.248598337173462 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19874 , TFLOPS: 95.48851676345843, Tokens per sec: 78026.12514052147, Loss: 2.2608835697174072 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19875 , TFLOPS: 97.14829573178393, Tokens per sec: 79382.37326205193, Loss: 2.2506251335144043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19876 , TFLOPS: 96.19459246521879, Tokens per sec: 78603.07777243541, Loss: 2.2225563526153564 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19877 , TFLOPS: 97.02391832061402, Tokens per sec: 79280.7412776256, Loss: 2.2421226501464844 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19878 , TFLOPS: 96.16230429588899, Tokens per sec: 78576.69427810461, Loss: 2.264814615249634 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19879 , TFLOPS: 97.3557437890084, Tokens per sec: 79551.88440979812, Loss: 2.2448792457580566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19880 , TFLOPS: 96.7359720250001, Tokens per sec: 79045.45294708239, Loss: 2.2772738933563232 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19881 , TFLOPS: 97.01741725474166, Tokens per sec: 79275.42908934897, Loss: 2.254462957382202 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19882 , TFLOPS: 98.34013335090992, Tokens per sec: 80356.25446126953, Loss: 2.238828420639038 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19883 , TFLOPS: 96.67641478600552, Tokens per sec: 78996.7871939602, Loss: 2.2618136405944824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19884 , TFLOPS: 97.29541159184275, Tokens per sec: 79502.58541840514, Loss: 2.217569589614868 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19885 , TFLOPS: 98.41537790212924, Tokens per sec: 80417.7387210379, Loss: 2.2524499893188477 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19886 , TFLOPS: 96.9590828542422, Tokens per sec: 79227.7625490398, Loss: 2.264791965484619 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19887 , TFLOPS: 97.22747246574976, Tokens per sec: 79447.07060956587, Loss: 2.24774432182312 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19888 , TFLOPS: 97.80299393253931, Tokens per sec: 79917.34401532017, Loss: 2.268474578857422 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19889 , TFLOPS: 97.75770702954965, Tokens per sec: 79880.33892109872, Loss: 2.234907627105713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19890 , TFLOPS: 97.25623407858629, Tokens per sec: 79470.57246380438, Loss: 2.2282330989837646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19891 , TFLOPS: 97.8531063306124, Tokens per sec: 79958.29214579359, Loss: 2.2697761058807373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19892 , TFLOPS: 97.24913425943944, Tokens per sec: 79464.77101881389, Loss: 2.2677061557769775 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19893 , TFLOPS: 95.8140476353669, Tokens per sec: 78292.12479586802, Loss: 2.2769315242767334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19894 , TFLOPS: 97.69387453731052, Tokens per sec: 79828.17974849559, Loss: 2.233973503112793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19895 , TFLOPS: 97.80863970315154, Tokens per sec: 79921.95731982248, Loss: 2.2501323223114014 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19896 , TFLOPS: 97.22627788315695, Tokens per sec: 79446.094485378, Loss: 2.273263454437256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19897 , TFLOPS: 95.5131928668926, Tokens per sec: 78046.28861985693, Loss: 2.2452352046966553 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19898 , TFLOPS: 97.82441410847409, Tokens per sec: 79934.84699248086, Loss: 2.245037317276001 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19899 , TFLOPS: 96.60746332044077, Tokens per sec: 78940.4451764786, Loss: 2.2680771350860596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19900 , TFLOPS: 97.31825252139743, Tokens per sec: 79521.24932992211, Loss: 2.2380788326263428 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19901 , TFLOPS: 97.1795066221193, Tokens per sec: 79407.87648398476, Loss: 2.2461440563201904 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19902 , TFLOPS: 97.4000921584758, Tokens per sec: 79588.12260411853, Loss: 2.257000684738159 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19903 , TFLOPS: 96.65445314281149, Tokens per sec: 78978.84176996337, Loss: 2.2609190940856934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19904 , TFLOPS: 97.08431824374132, Tokens per sec: 79330.09560964527, Loss: 2.264730930328369 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19905 , TFLOPS: 97.2247378148917, Tokens per sec: 79444.83605595356, Loss: 2.2264556884765625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19906 , TFLOPS: 97.26793812494161, Tokens per sec: 79480.13614137027, Loss: 2.267446279525757 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19907 , TFLOPS: 97.02989746018827, Tokens per sec: 79285.62698648845, Loss: 2.2772092819213867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19908 , TFLOPS: 96.63217930426136, Tokens per sec: 78960.64124310444, Loss: 2.237074136734009 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19909 , TFLOPS: 97.31222647431723, Tokens per sec: 79516.32529172863, Loss: 2.2531440258026123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19910 , TFLOPS: 96.5068686161295, Tokens per sec: 78858.2466540478, Loss: 2.2534477710723877 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19911 , TFLOPS: 97.24216427606996, Tokens per sec: 79459.07566597934, Loss: 2.275331497192383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19912 , TFLOPS: 95.57776282900511, Tokens per sec: 78099.05039807777, Loss: 2.253861665725708 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19913 , TFLOPS: 97.31380599525964, Tokens per sec: 79517.61595895115, Loss: 2.2732625007629395 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19914 , TFLOPS: 96.64922643022983, Tokens per sec: 78974.57088856534, Loss: 2.246603012084961 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19915 , TFLOPS: 97.86178123276969, Tokens per sec: 79965.38063165815, Loss: 2.2549538612365723 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19916 , TFLOPS: 95.9759664768518, Tokens per sec: 78424.43284940711, Loss: 2.2605459690093994 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19917 , TFLOPS: 97.13627399723734, Tokens per sec: 79372.54999328694, Loss: 2.2473535537719727 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19918 , TFLOPS: 96.65728517268275, Tokens per sec: 78981.1558944741, Loss: 2.2526121139526367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19919 , TFLOPS: 96.8168763273688, Tokens per sec: 79111.56193520989, Loss: 2.2540884017944336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19920 , TFLOPS: 97.75290866660829, Tokens per sec: 79876.41805521848, Loss: 2.2453248500823975 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19921 , TFLOPS: 96.80776999052947, Tokens per sec: 79104.12091296048, Loss: 2.2538628578186035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19922 , TFLOPS: 97.2228773783707, Tokens per sec: 79443.31584538036, Loss: 2.273364782333374 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19923 , TFLOPS: 98.44778527222903, Tokens per sec: 80444.21961738635, Loss: 2.2398109436035156 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19924 , TFLOPS: 97.85242370662823, Tokens per sec: 79957.7343561638, Loss: 2.2531189918518066 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19925 , TFLOPS: 96.69898637700928, Tokens per sec: 79015.2310220138, Loss: 2.233788013458252 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19926 , TFLOPS: 97.83340229529766, Tokens per sec: 79942.19147131091, Loss: 2.2332253456115723 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19927 , TFLOPS: 97.6754724861895, Tokens per sec: 79813.14295881367, Loss: 2.229038953781128 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19928 , TFLOPS: 96.66295656830351, Tokens per sec: 78985.79013783055, Loss: 2.270336866378784 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19929 , TFLOPS: 97.82228190089953, Tokens per sec: 79933.10471078378, Loss: 2.24983811378479 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19930 , TFLOPS: 97.25628415825878, Tokens per sec: 79470.61338519394, Loss: 2.271415948867798 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19931 , TFLOPS: 96.46998364454934, Tokens per sec: 78828.10699426603, Loss: 2.245424747467041 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19932 , TFLOPS: 96.97297828726305, Tokens per sec: 79239.11686506141, Loss: 2.2544021606445312 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19933 , TFLOPS: 97.82656560095218, Tokens per sec: 79936.60503236904, Loss: 2.2634479999542236 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19934 , TFLOPS: 97.20428987372335, Tokens per sec: 79428.12751684808, Loss: 2.248462677001953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19935 , TFLOPS: 96.18414966361274, Tokens per sec: 78594.54469042148, Loss: 2.2625110149383545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19936 , TFLOPS: 96.47187745728162, Tokens per sec: 78829.65447740065, Loss: 2.271571159362793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19937 , TFLOPS: 97.22065275991271, Tokens per sec: 79441.49805237156, Loss: 2.2390055656433105 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19938 , TFLOPS: 96.46640608167284, Tokens per sec: 78825.18367554498, Loss: 2.2653467655181885 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19939 , TFLOPS: 96.22259820843564, Tokens per sec: 78625.96198614986, Loss: 2.2460780143737793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19940 , TFLOPS: 97.33446515593647, Tokens per sec: 79534.49709095429, Loss: 2.262878656387329 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19941 , TFLOPS: 96.14831107524708, Tokens per sec: 78565.26005730055, Loss: 2.2594969272613525 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19942 , TFLOPS: 97.75161683323932, Tokens per sec: 79875.36246491795, Loss: 2.2437684535980225 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19943 , TFLOPS: 96.77007536125154, Tokens per sec: 79073.31966103142, Loss: 2.256622076034546 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19944 , TFLOPS: 96.63920824332739, Tokens per sec: 78966.38477015629, Loss: 2.255859613418579 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19945 , TFLOPS: 97.85789166324993, Tokens per sec: 79962.20236427712, Loss: 2.2583227157592773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19946 , TFLOPS: 94.93320501855143, Tokens per sec: 77572.36561876193, Loss: 2.250004291534424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19947 , TFLOPS: 97.81859229518012, Tokens per sec: 79930.08983897154, Loss: 2.266798496246338 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19948 , TFLOPS: 96.56059780443367, Tokens per sec: 78902.1501569235, Loss: 2.258547306060791 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19949 , TFLOPS: 97.80083619094175, Tokens per sec: 79915.58086911558, Loss: 2.2331089973449707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19950 , TFLOPS: 95.14373027584666, Tokens per sec: 77744.39122590011, Loss: 2.264791488647461 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19951 , TFLOPS: 97.8557589444635, Tokens per sec: 79960.45966485566, Loss: 2.2450079917907715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19952 , TFLOPS: 96.01717981722852, Tokens per sec: 78458.1093307547, Loss: 2.2514891624450684 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19953 , TFLOPS: 96.74942339908989, Tokens per sec: 79056.44441112033, Loss: 2.2382218837738037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19954 , TFLOPS: 96.78006623946268, Tokens per sec: 79081.48346480583, Loss: 2.2698466777801514 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19955 , TFLOPS: 98.45259172318785, Tokens per sec: 80448.14709219386, Loss: 2.249129056930542 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19956 , TFLOPS: 96.08423906000937, Tokens per sec: 78512.9051642894, Loss: 2.2504384517669678 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19957 , TFLOPS: 96.71250581137272, Tokens per sec: 79026.27809985339, Loss: 2.2623510360717773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19958 , TFLOPS: 97.73643243417409, Tokens per sec: 79862.95490156054, Loss: 2.25526762008667 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19959 , TFLOPS: 97.36045648819216, Tokens per sec: 79555.73527761683, Loss: 2.2664449214935303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19960 , TFLOPS: 96.28207528136089, Tokens per sec: 78674.56223351313, Loss: 2.243821620941162 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19961 , TFLOPS: 97.95402466860493, Tokens per sec: 80040.7551176364, Loss: 2.280285120010376 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19962 , TFLOPS: 98.38673759369871, Tokens per sec: 80394.33598775217, Loss: 2.261720895767212 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19963 , TFLOPS: 95.82726206625236, Tokens per sec: 78302.92264751423, Loss: 2.2732880115509033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19964 , TFLOPS: 97.69400291340287, Tokens per sec: 79828.28464790536, Loss: 2.244215965270996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19965 , TFLOPS: 97.84229239181903, Tokens per sec: 79949.45579802996, Loss: 2.25288987159729 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19966 , TFLOPS: 97.20327902454746, Tokens per sec: 79427.30152596488, Loss: 2.246614456176758 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19967 , TFLOPS: 97.82236841930117, Tokens per sec: 79933.17540719695, Loss: 2.2675013542175293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19968 , TFLOPS: 96.81289829577707, Tokens per sec: 79108.31138319263, Loss: 2.274563789367676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19969 , TFLOPS: 95.49792286151212, Tokens per sec: 78033.81110537564, Loss: 2.256279468536377 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19970 , TFLOPS: 97.80530138506833, Tokens per sec: 79919.22949418073, Loss: 2.252441167831421 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19971 , TFLOPS: 97.26109001180514, Tokens per sec: 79474.54037184034, Loss: 2.2329468727111816 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19972 , TFLOPS: 98.37956792800296, Tokens per sec: 80388.47746934857, Loss: 2.255021810531616 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19973 , TFLOPS: 95.41048429076189, Tokens per sec: 77962.36279834657, Loss: 2.2439775466918945 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19974 , TFLOPS: 97.78238570362261, Tokens per sec: 79900.50450096933, Loss: 2.257587432861328 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19975 , TFLOPS: 97.19110101882009, Tokens per sec: 79417.35056399531, Loss: 2.26465106010437 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19976 , TFLOPS: 96.84195492313408, Tokens per sec: 79132.05430139018, Loss: 2.2393877506256104 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19977 , TFLOPS: 97.1201843556747, Tokens per sec: 79359.40273298188, Loss: 2.2591726779937744 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19978 , TFLOPS: 97.21435616160042, Tokens per sec: 79436.35293980174, Loss: 2.2605628967285156 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19979 , TFLOPS: 96.68076563492707, Tokens per sec: 79000.3423846148, Loss: 2.2464840412139893 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19980 , TFLOPS: 97.50054270050305, Tokens per sec: 79670.20332783597, Loss: 2.2367746829986572 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19981 , TFLOPS: 97.27691650769553, Tokens per sec: 79487.47260904263, Loss: 2.2620394229888916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19982 , TFLOPS: 97.84435119100992, Tokens per sec: 79951.13809584704, Loss: 2.238866090774536 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19983 , TFLOPS: 97.04972799469981, Tokens per sec: 79301.83102672122, Loss: 2.245090961456299 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19984 , TFLOPS: 97.24829547358895, Tokens per sec: 79464.08562530318, Loss: 2.2347071170806885 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19985 , TFLOPS: 97.25559793580499, Tokens per sec: 79470.052655162, Loss: 2.258375406265259 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19986 , TFLOPS: 96.47918142012684, Tokens per sec: 78835.6227334622, Loss: 2.252387046813965 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19987 , TFLOPS: 97.90300206569367, Tokens per sec: 79999.06323535898, Loss: 2.2691564559936523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19988 , TFLOPS: 95.5115515523019, Tokens per sec: 78044.94745945367, Loss: 2.264003276824951 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19989 , TFLOPS: 97.80126846323066, Tokens per sec: 79915.93408993054, Loss: 2.2246241569519043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19990 , TFLOPS: 96.04095049169301, Tokens per sec: 78477.53296077123, Loss: 2.2467939853668213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19991 , TFLOPS: 97.34850235388602, Tokens per sec: 79545.9672467485, Loss: 2.2708539962768555 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19992 , TFLOPS: 96.16510947056365, Tokens per sec: 78578.98645854289, Loss: 2.2470638751983643 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19993 , TFLOPS: 97.76324240808704, Tokens per sec: 79884.86202139495, Loss: 2.255739212036133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19994 , TFLOPS: 96.11614111368996, Tokens per sec: 78538.97315358375, Loss: 2.2590622901916504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19995 , TFLOPS: 96.19996432293816, Tokens per sec: 78607.46725566182, Loss: 2.2979061603546143 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19996 , TFLOPS: 97.76547138393303, Tokens per sec: 79886.68337493764, Loss: 2.258730173110962 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19997 , TFLOPS: 97.31224811353424, Tokens per sec: 79516.3429736899, Loss: 2.2662458419799805 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19998 , TFLOPS: 96.75228891761756, Tokens per sec: 79058.7859000743, Loss: 2.2513656616210938 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 19999 , TFLOPS: 97.85857058813257, Tokens per sec: 79962.7571312758, Loss: 2.263601541519165 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20000 , TFLOPS: 98.2988242593169, Tokens per sec: 80322.49973913823, Loss: 2.2492587566375732 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020000 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020000/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020000/model.pt +[rank0]:[2024-08-31 00:39:10,980] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007877502997871488, 'preprocessing_with_comm': 0.001755616016453132, 'state_converting': 2.6495150970004033, : 2.660835311980918}) +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020000/optimizer.pt +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020000/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020000/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020000/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020000/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020000/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020000, took 14.99s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018000 + eval ppl=7.286187171936035, eval loss=1.9859803915023804 +------------------------------------------------------------------ +iteration: 20001 , TFLOPS: 95.44234331512517, Tokens per sec: 77988.39562727805, Loss: 2.2566018104553223 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20002 , TFLOPS: 95.70532407474082, Tokens per sec: 78203.28397568701, Loss: 2.2551684379577637 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20003 , TFLOPS: 97.78408033201036, Tokens per sec: 79901.88922544857, Loss: 2.265177011489868 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20004 , TFLOPS: 96.14403854053222, Tokens per sec: 78561.76885920021, Loss: 2.251593589782715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20005 , TFLOPS: 96.82077576296635, Tokens per sec: 79114.74826441704, Loss: 2.248049259185791 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20006 , TFLOPS: 97.66191500012103, Tokens per sec: 79802.06478794638, Loss: 2.2757134437561035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20007 , TFLOPS: 96.25967003536128, Tokens per sec: 78656.25433024453, Loss: 2.255305290222168 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20008 , TFLOPS: 96.45712921735577, Tokens per sec: 78817.60331090375, Loss: 2.2664856910705566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20009 , TFLOPS: 97.64074982243987, Tokens per sec: 79784.77017642297, Loss: 2.2249655723571777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20010 , TFLOPS: 97.13514224557959, Tokens per sec: 79371.62520987351, Loss: 2.2616238594055176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20011 , TFLOPS: 97.74136112504864, Tokens per sec: 79866.98226174993, Loss: 2.257966995239258 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20012 , TFLOPS: 97.19896635790573, Tokens per sec: 79423.77753503401, Loss: 2.225034236907959 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20013 , TFLOPS: 97.24551742222879, Tokens per sec: 79461.81560802337, Loss: 2.260854482650757 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20014 , TFLOPS: 97.68047849329949, Tokens per sec: 79817.23349608933, Loss: 2.2715744972229004 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20015 , TFLOPS: 97.09028465172764, Tokens per sec: 79334.9709151999, Loss: 2.250976800918579 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20016 , TFLOPS: 96.57697174593038, Tokens per sec: 78915.52972602322, Loss: 2.252849817276001 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20017 , TFLOPS: 97.2146891128522, Tokens per sec: 79436.62500284049, Loss: 2.2513844966888428 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20018 , TFLOPS: 98.39443164393815, Tokens per sec: 80400.6229942653, Loss: 2.246913433074951 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20019 , TFLOPS: 96.74795564119809, Tokens per sec: 79055.24506836348, Loss: 2.216829538345337 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20020 , TFLOPS: 97.94795653377027, Tokens per sec: 80035.79668845532, Loss: 2.2146942615509033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20021 , TFLOPS: 97.81362870026301, Tokens per sec: 79926.03395779048, Loss: 2.2489612102508545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20022 , TFLOPS: 97.09500137745738, Tokens per sec: 79338.82507321297, Loss: 2.2395451068878174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20023 , TFLOPS: 96.71713627104015, Tokens per sec: 79030.06176764627, Loss: 2.2536609172821045 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20024 , TFLOPS: 97.78983448307852, Tokens per sec: 79906.5910904114, Loss: 2.2500903606414795 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20025 , TFLOPS: 98.44083505539015, Tokens per sec: 80438.5404162928, Loss: 2.2513341903686523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20026 , TFLOPS: 96.56777855378908, Tokens per sec: 78908.01773207073, Loss: 2.256436347961426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20027 , TFLOPS: 97.15116949662922, Tokens per sec: 79384.7214892843, Loss: 2.258119821548462 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20028 , TFLOPS: 96.52261873150024, Tokens per sec: 78871.11647876128, Loss: 2.240889072418213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20029 , TFLOPS: 97.7514804028426, Tokens per sec: 79875.25098412875, Loss: 2.2379660606384277 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20030 , TFLOPS: 95.57209803967366, Tokens per sec: 78094.42155289039, Loss: 2.2717881202697754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20031 , TFLOPS: 98.39386850874217, Tokens per sec: 80400.16284200027, Loss: 2.266300678253174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20032 , TFLOPS: 96.64762193333362, Tokens per sec: 78973.25981284802, Loss: 2.2521722316741943 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20033 , TFLOPS: 96.6384841060014, Tokens per sec: 78965.79305890627, Loss: 2.2468249797821045 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20034 , TFLOPS: 96.6141346162247, Tokens per sec: 78945.8964639981, Loss: 2.242553949356079 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20035 , TFLOPS: 97.14589316406203, Tokens per sec: 79380.41006211878, Loss: 2.2549095153808594 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20036 , TFLOPS: 96.27934165065227, Tokens per sec: 78672.32851349121, Loss: 2.2573118209838867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20037 , TFLOPS: 96.12363369695453, Tokens per sec: 78545.09553624547, Loss: 2.2317581176757812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20038 , TFLOPS: 97.35002491409931, Tokens per sec: 79547.21136989296, Loss: 2.24004864692688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20039 , TFLOPS: 96.37417490443991, Tokens per sec: 78749.81920638622, Loss: 2.259949207305908 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20040 , TFLOPS: 95.95951232200193, Tokens per sec: 78410.98773591167, Loss: 2.263266086578369 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20041 , TFLOPS: 97.50610612841939, Tokens per sec: 79674.7493480018, Loss: 2.233213424682617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20042 , TFLOPS: 97.27726880581136, Tokens per sec: 79487.76048090204, Loss: 2.2613024711608887 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20043 , TFLOPS: 96.63874551382847, Tokens per sec: 78966.006661971, Loss: 2.2579269409179688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20044 , TFLOPS: 98.40880406854083, Tokens per sec: 80412.36707238728, Loss: 2.2730612754821777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20045 , TFLOPS: 96.0855768591153, Tokens per sec: 78513.99831437689, Loss: 2.2618167400360107 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20046 , TFLOPS: 97.42278227514824, Tokens per sec: 79606.66328254713, Loss: 2.257617950439453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20047 , TFLOPS: 97.18509231969436, Tokens per sec: 79412.44070126199, Loss: 2.24289870262146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20048 , TFLOPS: 97.14169529644248, Tokens per sec: 79376.97987642411, Loss: 2.2675514221191406 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20049 , TFLOPS: 97.86055995449581, Tokens per sec: 79964.38269374207, Loss: 2.259695529937744 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20050 , TFLOPS: 97.18794630748032, Tokens per sec: 79414.77276815007, Loss: 2.2280378341674805 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20051 , TFLOPS: 97.28000758538309, Tokens per sec: 79489.99840819255, Loss: 2.275876045227051 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20052 , TFLOPS: 97.87593844434173, Tokens per sec: 79976.94885367271, Loss: 2.2660255432128906 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20053 , TFLOPS: 96.89907198434874, Tokens per sec: 79178.72612243262, Loss: 2.2591309547424316 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20054 , TFLOPS: 96.54894523866642, Tokens per sec: 78892.62854547134, Loss: 2.2340238094329834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20055 , TFLOPS: 95.56194103779688, Tokens per sec: 78086.12200519239, Loss: 2.240508794784546 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20056 , TFLOPS: 98.38776503199642, Tokens per sec: 80395.17553403386, Loss: 2.2477259635925293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20057 , TFLOPS: 97.22461699789407, Tokens per sec: 79444.73733327471, Loss: 2.2628085613250732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20058 , TFLOPS: 97.20047872439372, Tokens per sec: 79425.01332862313, Loss: 2.2430527210235596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20059 , TFLOPS: 97.8557717906528, Tokens per sec: 79960.47016180764, Loss: 2.2596354484558105 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20060 , TFLOPS: 97.22979443890334, Tokens per sec: 79448.96795360257, Loss: 2.261455535888672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20061 , TFLOPS: 97.26690092672453, Tokens per sec: 79479.28862000724, Loss: 2.2556464672088623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20062 , TFLOPS: 97.21123803739619, Tokens per sec: 79433.80504024679, Loss: 2.2697012424468994 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20063 , TFLOPS: 98.42188685356626, Tokens per sec: 80423.05735281241, Loss: 2.2672810554504395 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20064 , TFLOPS: 95.9294105619066, Tokens per sec: 78386.39081284952, Loss: 2.2417144775390625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20065 , TFLOPS: 97.1865833891254, Tokens per sec: 79413.65909247762, Loss: 2.258464813232422 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20066 , TFLOPS: 97.23771513658863, Tokens per sec: 79455.44015957811, Loss: 2.256648063659668 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20067 , TFLOPS: 97.85677763096405, Tokens per sec: 79961.29205981857, Loss: 2.283517837524414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20068 , TFLOPS: 95.65902834897813, Tokens per sec: 78165.45454640832, Loss: 2.252990245819092 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20069 , TFLOPS: 97.87313621899717, Tokens per sec: 79974.65908320798, Loss: 2.2416348457336426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20070 , TFLOPS: 96.54104623722525, Tokens per sec: 78886.17406805538, Loss: 2.24281907081604 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20071 , TFLOPS: 96.64109859072384, Tokens per sec: 78967.92942167578, Loss: 2.261173725128174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20072 , TFLOPS: 97.28190019242376, Tokens per sec: 79491.54490612552, Loss: 2.271266460418701 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20073 , TFLOPS: 97.18970248505273, Tokens per sec: 79416.20778605253, Loss: 2.244826555252075 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20074 , TFLOPS: 96.78411913594505, Tokens per sec: 79084.79519085241, Loss: 2.234875440597534 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20075 , TFLOPS: 96.77688793746846, Tokens per sec: 79078.88639242957, Loss: 2.2527379989624023 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20076 , TFLOPS: 97.23864391635406, Tokens per sec: 79456.19908943336, Loss: 2.2612087726593018 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20077 , TFLOPS: 96.7159266461964, Tokens per sec: 79029.07335204793, Loss: 2.252020835876465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20078 , TFLOPS: 96.11810191343125, Tokens per sec: 78540.57537352783, Loss: 2.2601537704467773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20079 , TFLOPS: 97.17822912327082, Tokens per sec: 79406.83260678992, Loss: 2.247666120529175 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20080 , TFLOPS: 97.21222543914165, Tokens per sec: 79434.611871631, Loss: 2.254415512084961 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20081 , TFLOPS: 97.37526662398892, Tokens per sec: 79567.83702082318, Loss: 2.264821767807007 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20082 , TFLOPS: 97.66054001544134, Tokens per sec: 79800.94125256935, Loss: 2.2832140922546387 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20083 , TFLOPS: 96.28940896891827, Tokens per sec: 78680.55477840219, Loss: 2.2367236614227295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20084 , TFLOPS: 96.72383341498855, Tokens per sec: 79035.53417636633, Loss: 2.2456653118133545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20085 , TFLOPS: 97.79714317522854, Tokens per sec: 79912.56321090965, Loss: 2.2686891555786133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20086 , TFLOPS: 97.19801335254444, Tokens per sec: 79422.99880982075, Loss: 2.243727684020996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20087 , TFLOPS: 96.59712372182008, Tokens per sec: 78931.99642428069, Loss: 2.267338991165161 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20088 , TFLOPS: 97.88764900346962, Tokens per sec: 79986.5178529927, Loss: 2.2449827194213867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20089 , TFLOPS: 97.82820793179192, Tokens per sec: 79937.94702317555, Loss: 2.2510337829589844 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20090 , TFLOPS: 96.69641959749735, Tokens per sec: 79013.13364039983, Loss: 2.25982666015625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20091 , TFLOPS: 97.10387521728973, Tokens per sec: 79346.07611617296, Loss: 2.239758253097534 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20092 , TFLOPS: 96.0225968600214, Tokens per sec: 78462.53573586784, Loss: 2.2541515827178955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20093 , TFLOPS: 96.59843658362948, Tokens per sec: 78933.06919746123, Loss: 2.255066156387329 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20094 , TFLOPS: 97.40008059546386, Tokens per sec: 79588.11315568381, Loss: 2.2603228092193604 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20095 , TFLOPS: 96.67777891714302, Tokens per sec: 78997.90186062851, Loss: 2.2601733207702637 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20096 , TFLOPS: 96.66684049538105, Tokens per sec: 78988.96379462679, Loss: 2.241678476333618 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20097 , TFLOPS: 97.21624590898853, Tokens per sec: 79437.89710103898, Loss: 2.2479546070098877 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20098 , TFLOPS: 97.20291892951172, Tokens per sec: 79427.00728304152, Loss: 2.258718252182007 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20099 , TFLOPS: 96.71657590583717, Tokens per sec: 79029.60387881384, Loss: 2.2261922359466553 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20100 , TFLOPS: 97.3429504102503, Tokens per sec: 79541.430610684, Loss: 2.245222330093384 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20101 , TFLOPS: 97.80172157784067, Tokens per sec: 79916.30434154258, Loss: 2.2571146488189697 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20102 , TFLOPS: 96.72002959724591, Tokens per sec: 79032.42597896984, Loss: 2.2364816665649414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20103 , TFLOPS: 96.7560397140005, Tokens per sec: 79061.85077235289, Loss: 2.2270724773406982 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20104 , TFLOPS: 97.19271306313557, Tokens per sec: 79418.66780690319, Loss: 2.2710957527160645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20105 , TFLOPS: 96.51815422385768, Tokens per sec: 78867.4684146399, Loss: 2.253866672515869 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20106 , TFLOPS: 96.19244712883285, Tokens per sec: 78601.32476284858, Loss: 2.271361827850342 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20107 , TFLOPS: 97.84519181799679, Tokens per sec: 79951.82499379768, Loss: 2.2702560424804688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20108 , TFLOPS: 97.34911558423458, Tokens per sec: 79546.46833305227, Loss: 2.2279837131500244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20109 , TFLOPS: 96.83239631101651, Tokens per sec: 79124.24371336834, Loss: 2.2617251873016357 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20110 , TFLOPS: 96.00054921124931, Tokens per sec: 78444.5200344992, Loss: 2.281296491622925 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20111 , TFLOPS: 96.5631418202798, Tokens per sec: 78904.22893776016, Loss: 2.246474266052246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20112 , TFLOPS: 96.61626310705341, Tokens per sec: 78947.63570864646, Loss: 2.2640228271484375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20113 , TFLOPS: 96.69353806658026, Tokens per sec: 79010.77906731027, Loss: 2.247511386871338 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20114 , TFLOPS: 97.321125224871, Tokens per sec: 79523.59668988033, Loss: 2.2617948055267334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20115 , TFLOPS: 96.29015191220583, Tokens per sec: 78681.16185648751, Loss: 2.2128512859344482 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20116 , TFLOPS: 96.85223642886919, Tokens per sec: 79140.45558439584, Loss: 2.242908477783203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20117 , TFLOPS: 97.07962535366687, Tokens per sec: 79326.2609283597, Loss: 2.2596116065979004 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20118 , TFLOPS: 96.685190914305, Tokens per sec: 79003.95839431134, Loss: 2.270749092102051 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20119 , TFLOPS: 96.06098337543511, Tokens per sec: 78493.90234576922, Loss: 2.2356557846069336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20120 , TFLOPS: 98.48248803796473, Tokens per sec: 80472.57614059765, Loss: 2.2538232803344727 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20121 , TFLOPS: 96.8706124599051, Tokens per sec: 79155.47111239641, Loss: 2.2547333240509033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20122 , TFLOPS: 95.47937722987476, Tokens per sec: 78018.65699235772, Loss: 2.265529155731201 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20123 , TFLOPS: 97.79330550454465, Tokens per sec: 79909.42735140341, Loss: 2.220093011856079 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20124 , TFLOPS: 96.49632146418435, Tokens per sec: 78849.62830468558, Loss: 2.2678306102752686 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20125 , TFLOPS: 97.756279149204, Tokens per sec: 79879.17216331138, Loss: 2.2560291290283203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20126 , TFLOPS: 96.22615495919166, Tokens per sec: 78628.8682987515, Loss: 2.240161418914795 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20127 , TFLOPS: 98.42962942505173, Tokens per sec: 80429.3840072847, Loss: 2.2686586380004883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20128 , TFLOPS: 96.08903070921502, Tokens per sec: 78516.82054420336, Loss: 2.2368876934051514 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20129 , TFLOPS: 97.22293004321648, Tokens per sec: 79443.35887918157, Loss: 2.2628228664398193 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20130 , TFLOPS: 95.93248844762005, Tokens per sec: 78388.90583249781, Loss: 2.246218681335449 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20131 , TFLOPS: 96.14246813014203, Tokens per sec: 78560.48563644443, Loss: 2.2596521377563477 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20132 , TFLOPS: 97.95425306605762, Tokens per sec: 80040.94174707447, Loss: 2.267333745956421 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20133 , TFLOPS: 96.9110024058684, Tokens per sec: 79188.47477697271, Loss: 2.262009620666504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20134 , TFLOPS: 96.73465895385328, Tokens per sec: 79044.38000284687, Loss: 2.2605106830596924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20135 , TFLOPS: 96.49551863134225, Tokens per sec: 78848.97228930354, Loss: 2.2775909900665283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20136 , TFLOPS: 97.81253726954778, Tokens per sec: 79925.14212165713, Loss: 2.2595033645629883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20137 , TFLOPS: 97.86256458770036, Tokens per sec: 79966.02073113734, Loss: 2.2392592430114746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20138 , TFLOPS: 97.26610848878592, Tokens per sec: 79478.64109856843, Loss: 2.2394967079162598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20139 , TFLOPS: 97.34370225805932, Tokens per sec: 79542.04496488304, Loss: 2.2535624504089355 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20140 , TFLOPS: 97.31271969781344, Tokens per sec: 79516.72831734408, Loss: 2.2498538494110107 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20141 , TFLOPS: 96.6635694557024, Tokens per sec: 78986.2909439013, Loss: 2.2440900802612305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20142 , TFLOPS: 97.30634131178326, Tokens per sec: 79511.51637392455, Loss: 2.2542810440063477 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20143 , TFLOPS: 96.50735360503306, Tokens per sec: 78858.64295096564, Loss: 2.2601513862609863 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20144 , TFLOPS: 97.20859911113318, Tokens per sec: 79431.6487056653, Loss: 2.246502637863159 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20145 , TFLOPS: 98.3690635512029, Tokens per sec: 80379.89407265851, Loss: 2.2444028854370117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20146 , TFLOPS: 96.0882758924038, Tokens per sec: 78516.20376395622, Loss: 2.254908561706543 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20147 , TFLOPS: 97.28771966623798, Tokens per sec: 79496.3001479857, Loss: 2.244844913482666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20148 , TFLOPS: 96.9975493809108, Tokens per sec: 79259.1945381972, Loss: 2.267425060272217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20149 , TFLOPS: 97.19935794470977, Tokens per sec: 79424.09751069194, Loss: 2.271423578262329 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20150 , TFLOPS: 96.48734287499018, Tokens per sec: 78842.29166832558, Loss: 2.26112699508667 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20151 , TFLOPS: 96.48975737815786, Tokens per sec: 78844.26462101874, Loss: 2.244642972946167 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20152 , TFLOPS: 98.4318913295023, Tokens per sec: 80431.23226763787, Loss: 2.2501041889190674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20153 , TFLOPS: 96.20641237019085, Tokens per sec: 78612.7361210595, Loss: 2.2618463039398193 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20154 , TFLOPS: 97.31085977918782, Tokens per sec: 79515.20852995652, Loss: 2.262437343597412 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20155 , TFLOPS: 97.73140500358969, Tokens per sec: 79858.84685861245, Loss: 2.2404096126556396 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20156 , TFLOPS: 96.49504893197062, Tokens per sec: 78848.58848585584, Loss: 2.2516355514526367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20157 , TFLOPS: 97.79217836771204, Tokens per sec: 79908.50633888239, Loss: 2.2368900775909424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20158 , TFLOPS: 97.11886433436352, Tokens per sec: 79358.32410958815, Loss: 2.256457805633545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20159 , TFLOPS: 98.41305133368708, Tokens per sec: 80415.8376220726, Loss: 2.2543082237243652 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20160 , TFLOPS: 96.62751993902336, Tokens per sec: 78956.83395582574, Loss: 2.2457022666931152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20161 , TFLOPS: 97.04559649653129, Tokens per sec: 79298.4550732135, Loss: 2.248601198196411 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20162 , TFLOPS: 95.93195987532562, Tokens per sec: 78388.47392246938, Loss: 2.270528793334961 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20163 , TFLOPS: 97.05061119117524, Tokens per sec: 79302.55270929678, Loss: 2.248649835586548 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20164 , TFLOPS: 96.83959187401263, Tokens per sec: 79130.12339312273, Loss: 2.228973627090454 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20165 , TFLOPS: 97.23772909887323, Tokens per sec: 79455.45156852028, Loss: 2.260688543319702 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20166 , TFLOPS: 97.25332570777846, Tokens per sec: 79468.19595914918, Loss: 2.241502285003662 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20167 , TFLOPS: 95.74180887373763, Tokens per sec: 78233.09664414957, Loss: 2.2512941360473633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20168 , TFLOPS: 96.62524655836103, Tokens per sec: 78954.97631796452, Loss: 2.24542236328125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20169 , TFLOPS: 96.68774137150568, Tokens per sec: 79006.04243854462, Loss: 2.2381350994110107 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20170 , TFLOPS: 97.34967940184777, Tokens per sec: 79546.92904293795, Loss: 2.2582244873046875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20171 , TFLOPS: 96.71614062604483, Tokens per sec: 79029.24820049042, Loss: 2.2736902236938477 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20172 , TFLOPS: 97.20690197780743, Tokens per sec: 79430.26193433673, Loss: 2.242644786834717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20173 , TFLOPS: 95.53093327545436, Tokens per sec: 78060.78476437149, Loss: 2.2419538497924805 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20174 , TFLOPS: 97.85772621264027, Tokens per sec: 79962.0671703249, Loss: 2.2593202590942383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20175 , TFLOPS: 97.19675357157945, Tokens per sec: 79421.96941036462, Loss: 2.2527410984039307 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20176 , TFLOPS: 96.71069977237897, Tokens per sec: 79024.80233890002, Loss: 2.254464626312256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20177 , TFLOPS: 97.28008171886219, Tokens per sec: 79490.0589845667, Loss: 2.2556304931640625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20178 , TFLOPS: 97.4160967457677, Tokens per sec: 79601.20036438946, Loss: 2.2484543323516846 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20179 , TFLOPS: 96.01957712281481, Tokens per sec: 78460.06823086139, Loss: 2.2571537494659424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20180 , TFLOPS: 96.83732293515004, Tokens per sec: 79128.26938477044, Loss: 2.2728378772735596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20181 , TFLOPS: 97.15178674468387, Tokens per sec: 79385.22585855915, Loss: 2.2537665367126465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20182 , TFLOPS: 93.29051053206021, Tokens per sec: 76230.07766713193, Loss: 2.2348275184631348 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20183 , TFLOPS: 97.12455140494441, Tokens per sec: 79362.97116135801, Loss: 2.234675645828247 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20184 , TFLOPS: 96.84064199293975, Tokens per sec: 79130.98147233056, Loss: 2.279417037963867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20185 , TFLOPS: 96.21987244004133, Tokens per sec: 78623.73469062735, Loss: 2.275425434112549 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20186 , TFLOPS: 96.6911159220874, Tokens per sec: 79008.7998706936, Loss: 2.2533583641052246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20187 , TFLOPS: 97.89470112192473, Tokens per sec: 79992.28032052006, Loss: 2.2630114555358887 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20188 , TFLOPS: 96.55085050726952, Tokens per sec: 78894.18538949292, Loss: 2.2647502422332764 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20189 , TFLOPS: 96.70543082242833, Tokens per sec: 79020.49694426049, Loss: 2.259835720062256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20190 , TFLOPS: 97.85580367346824, Tokens per sec: 79960.49621407686, Loss: 2.2702951431274414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20191 , TFLOPS: 97.46649916949633, Tokens per sec: 79642.38548229194, Loss: 2.2565572261810303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20192 , TFLOPS: 95.77295562825898, Tokens per sec: 78258.54745905778, Loss: 2.2620813846588135 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20193 , TFLOPS: 97.70324154875695, Tokens per sec: 79835.8337746768, Loss: 2.27258563041687 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20194 , TFLOPS: 95.92696536872953, Tokens per sec: 78384.39278255968, Loss: 2.2502095699310303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20195 , TFLOPS: 98.47214755961558, Tokens per sec: 80464.12666955097, Loss: 2.2654941082000732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20196 , TFLOPS: 96.12357278448509, Tokens per sec: 78545.04576309868, Loss: 2.2320234775543213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20197 , TFLOPS: 97.79502006062083, Tokens per sec: 79910.82835930985, Loss: 2.258005380630493 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20198 , TFLOPS: 97.85167797002089, Tokens per sec: 79957.12499558496, Loss: 2.2721784114837646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20199 , TFLOPS: 96.11616172923466, Tokens per sec: 78538.98999907602, Loss: 2.264134407043457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20200 , TFLOPS: 96.61149146066785, Tokens per sec: 78943.73667355171, Loss: 2.2474753856658936 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020200 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020200/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020200/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020200/optimizer.pt +[rank0]:[2024-08-31 01:34:42,520] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007930002000648528, 'preprocessing_with_comm': 0.0015773200138937682, 'state_converting': 2.6855735790159088, : 2.696747752983356}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020200/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020200/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020200/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020200/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020200/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020200, took 14.97s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018200 + eval ppl=7.885667324066162, eval loss=2.065046787261963 +------------------------------------------------------------------ +iteration: 20201 , TFLOPS: 96.8134443668373, Tokens per sec: 79108.75759191306, Loss: 2.2522761821746826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20202 , TFLOPS: 94.80591991002817, Tokens per sec: 77468.35767997739, Loss: 2.2603635787963867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20203 , TFLOPS: 95.95809882762131, Tokens per sec: 78409.83273326668, Loss: 2.252312421798706 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20204 , TFLOPS: 96.82114397899348, Tokens per sec: 79115.04914321152, Loss: 2.2769196033477783 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20205 , TFLOPS: 96.82329375701323, Tokens per sec: 79116.80578217168, Loss: 2.2423477172851562 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20206 , TFLOPS: 97.20564682731379, Tokens per sec: 79429.23631855792, Loss: 2.2536771297454834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20207 , TFLOPS: 97.8314412443964, Tokens per sec: 79940.58904613755, Loss: 2.270228147506714 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20208 , TFLOPS: 97.32587095482666, Tokens per sec: 79527.47454799304, Loss: 2.248720407485962 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20209 , TFLOPS: 97.83240458352859, Tokens per sec: 79941.376215341, Loss: 2.2557120323181152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20210 , TFLOPS: 97.73151450180585, Tokens per sec: 79858.93633242362, Loss: 2.264413356781006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20211 , TFLOPS: 97.67046621992019, Tokens per sec: 79809.05220977226, Loss: 2.252465009689331 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20212 , TFLOPS: 96.47748363163814, Tokens per sec: 78834.23542678313, Loss: 2.255476951599121 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20213 , TFLOPS: 96.71205010398829, Tokens per sec: 79025.90572961861, Loss: 2.2517025470733643 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20214 , TFLOPS: 97.19241114969044, Tokens per sec: 79418.42110565523, Loss: 2.235339641571045 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20215 , TFLOPS: 97.83497718256083, Tokens per sec: 79943.47835223486, Loss: 2.261775255203247 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20216 , TFLOPS: 97.32578734189501, Tokens per sec: 79527.40622571422, Loss: 2.25706148147583 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20217 , TFLOPS: 96.02432549082269, Tokens per sec: 78463.9482445945, Loss: 2.245392322540283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20218 , TFLOPS: 98.40217558886695, Tokens per sec: 80406.9507710135, Loss: 2.2506508827209473 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20219 , TFLOPS: 96.46111050019893, Tokens per sec: 78820.85651960224, Loss: 2.2678005695343018 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20220 , TFLOPS: 97.74643672026424, Tokens per sec: 79871.12966125805, Loss: 2.2550578117370605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20221 , TFLOPS: 96.50493638836483, Tokens per sec: 78856.66778100132, Loss: 2.269139289855957 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20222 , TFLOPS: 98.3888669624602, Tokens per sec: 80396.07594978201, Loss: 2.258223533630371 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20223 , TFLOPS: 96.6642414650887, Tokens per sec: 78986.84006006991, Loss: 2.251365900039673 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20224 , TFLOPS: 96.7212446967002, Tokens per sec: 79033.41886801338, Loss: 2.267223596572876 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20225 , TFLOPS: 96.98513444856054, Tokens per sec: 79249.04997738502, Loss: 2.226712465286255 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20226 , TFLOPS: 97.2403161401363, Tokens per sec: 79457.5655065327, Loss: 2.2406721115112305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20227 , TFLOPS: 94.7905919502914, Tokens per sec: 77455.83280949968, Loss: 2.2588765621185303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20228 , TFLOPS: 97.92335188223839, Tokens per sec: 80015.69159430855, Loss: 2.2284438610076904 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20229 , TFLOPS: 96.76645546260728, Tokens per sec: 79070.36174866471, Loss: 2.2648138999938965 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20230 , TFLOPS: 95.41486261234489, Tokens per sec: 77965.94043762007, Loss: 2.258533239364624 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20231 , TFLOPS: 97.32661971316938, Tokens per sec: 79528.08637770964, Loss: 2.2464847564697266 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20232 , TFLOPS: 95.67442231347503, Tokens per sec: 78178.03335107463, Loss: 2.253035068511963 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20233 , TFLOPS: 96.4716708109751, Tokens per sec: 78829.48562138404, Loss: 2.259099245071411 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20234 , TFLOPS: 97.25627676494027, Tokens per sec: 79470.60734392305, Loss: 2.250927448272705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20235 , TFLOPS: 95.46771264879568, Tokens per sec: 78009.12556288498, Loss: 2.243802070617676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20236 , TFLOPS: 97.30574769124833, Tokens per sec: 79511.03131130431, Loss: 2.279581308364868 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20237 , TFLOPS: 96.32278706632063, Tokens per sec: 78707.82888101833, Loss: 2.2478139400482178 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20238 , TFLOPS: 95.67504509445953, Tokens per sec: 78178.54224144887, Loss: 2.239506721496582 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20239 , TFLOPS: 97.23834203541307, Tokens per sec: 79455.95241474536, Loss: 2.2648191452026367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20240 , TFLOPS: 95.60654957927575, Tokens per sec: 78122.57279276078, Loss: 2.2590816020965576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20241 , TFLOPS: 97.91238891187288, Tokens per sec: 80006.73346901093, Loss: 2.249133348464966 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20242 , TFLOPS: 96.40956490821557, Tokens per sec: 78778.73728949107, Loss: 2.2296500205993652 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20243 , TFLOPS: 97.36721226797344, Tokens per sec: 79561.25559918537, Loss: 2.2349817752838135 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20244 , TFLOPS: 97.26432221625576, Tokens per sec: 79477.18148930128, Loss: 2.2901177406311035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20245 , TFLOPS: 97.24121505838728, Tokens per sec: 79458.3000357758, Loss: 2.250584602355957 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20246 , TFLOPS: 97.27122512812265, Tokens per sec: 79482.82203628449, Loss: 2.237626314163208 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20247 , TFLOPS: 96.70184873400318, Tokens per sec: 79017.56992759714, Loss: 2.258028984069824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20248 , TFLOPS: 98.45658787839706, Tokens per sec: 80451.4124534855, Loss: 2.2547647953033447 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20249 , TFLOPS: 97.29941550813008, Tokens per sec: 79505.8571214735, Loss: 2.255641222000122 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20250 , TFLOPS: 97.82738551977087, Tokens per sec: 79937.2750091424, Loss: 2.233245849609375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20251 , TFLOPS: 96.56080581872382, Tokens per sec: 78902.32013075474, Loss: 2.2406229972839355 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20252 , TFLOPS: 98.35815517449213, Tokens per sec: 80370.98055724155, Loss: 2.2608208656311035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20253 , TFLOPS: 98.34290064785536, Tokens per sec: 80358.51569084021, Loss: 2.250061511993408 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20254 , TFLOPS: 97.21235209310042, Tokens per sec: 79434.71536384107, Loss: 2.2491326332092285 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20255 , TFLOPS: 95.4937174342362, Tokens per sec: 78030.37474249116, Loss: 2.25471830368042 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20256 , TFLOPS: 98.40664871067287, Tokens per sec: 80410.60587398951, Loss: 2.2620162963867188 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20257 , TFLOPS: 97.18509071965643, Tokens per sec: 79412.4393938298, Loss: 2.2450714111328125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20258 , TFLOPS: 97.2504192545304, Tokens per sec: 79465.82102138142, Loss: 2.252007246017456 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20259 , TFLOPS: 95.7282469617983, Tokens per sec: 78222.01485678929, Loss: 2.2469706535339355 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20260 , TFLOPS: 98.33753821867465, Tokens per sec: 80354.13391191544, Loss: 2.2661571502685547 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20261 , TFLOPS: 97.0950876357025, Tokens per sec: 79338.89555704556, Loss: 2.275634765625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20262 , TFLOPS: 95.87031839014747, Tokens per sec: 78338.10507813732, Loss: 2.248478889465332 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20263 , TFLOPS: 97.26074104175468, Tokens per sec: 79474.25521942883, Loss: 2.2473597526550293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20264 , TFLOPS: 96.03153201873393, Tokens per sec: 78469.83688406329, Loss: 2.2595913410186768 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20265 , TFLOPS: 95.9524208851013, Tokens per sec: 78405.19314027032, Loss: 2.2679531574249268 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20266 , TFLOPS: 97.37297236679046, Tokens per sec: 79565.96232420689, Loss: 2.253650665283203 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20267 , TFLOPS: 96.85000600295395, Tokens per sec: 79138.63304596422, Loss: 2.2525744438171387 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20268 , TFLOPS: 95.34846247193919, Tokens per sec: 77911.68317360288, Loss: 2.246865749359131 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20269 , TFLOPS: 97.38147914925088, Tokens per sec: 79572.91343513934, Loss: 2.2604897022247314 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20270 , TFLOPS: 95.13688863765496, Tokens per sec: 77738.80074721423, Loss: 2.245872735977173 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20271 , TFLOPS: 97.10112488886374, Tokens per sec: 79343.82875201633, Loss: 2.2652456760406494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20272 , TFLOPS: 96.18064286335986, Tokens per sec: 78591.67919366178, Loss: 2.2513184547424316 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20273 , TFLOPS: 96.06290114769777, Tokens per sec: 78495.46940685288, Loss: 2.2569797039031982 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20274 , TFLOPS: 96.79985130244357, Tokens per sec: 79097.65034908034, Loss: 2.2472639083862305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20275 , TFLOPS: 96.22447698623436, Tokens per sec: 78627.49718385324, Loss: 2.235496759414673 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20276 , TFLOPS: 96.63306075018683, Tokens per sec: 78961.36149526057, Loss: 2.2632296085357666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20277 , TFLOPS: 97.29247945943195, Tokens per sec: 79500.18949753248, Loss: 2.256211042404175 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20278 , TFLOPS: 96.28607607773837, Tokens per sec: 78677.83138722285, Loss: 2.254589796066284 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20279 , TFLOPS: 97.8024927360138, Tokens per sec: 79916.93447473722, Loss: 2.2657084465026855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20280 , TFLOPS: 96.66510544389745, Tokens per sec: 78987.54603939533, Loss: 2.2619643211364746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20281 , TFLOPS: 98.35703760987717, Tokens per sec: 80370.067366426, Loss: 2.2672407627105713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20282 , TFLOPS: 97.36108928629196, Tokens per sec: 79556.25235323385, Loss: 2.254401922225952 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20283 , TFLOPS: 96.04641152626485, Tokens per sec: 78481.99531269942, Loss: 2.264946222305298 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20284 , TFLOPS: 97.30449281139322, Tokens per sec: 79510.00591667122, Loss: 2.2591137886047363 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20285 , TFLOPS: 97.39157956583382, Tokens per sec: 79581.16674553714, Loss: 2.2524325847625732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20286 , TFLOPS: 98.3439226292728, Tokens per sec: 80359.35077816453, Loss: 2.238443374633789 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20287 , TFLOPS: 96.80863768073084, Tokens per sec: 79104.82992496037, Loss: 2.257544994354248 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20288 , TFLOPS: 97.91353020817212, Tokens per sec: 80007.6660515966, Loss: 2.260234832763672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20289 , TFLOPS: 96.43199879395034, Tokens per sec: 78797.06859502454, Loss: 2.2474262714385986 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20290 , TFLOPS: 98.28786309282827, Tokens per sec: 80313.54308783486, Loss: 2.2419052124023438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20291 , TFLOPS: 97.79879654528466, Tokens per sec: 79913.91422214403, Loss: 2.262450695037842 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20292 , TFLOPS: 97.94361634115528, Tokens per sec: 80032.2502053431, Loss: 2.271103858947754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20293 , TFLOPS: 96.04684548104285, Tokens per sec: 78482.3499083195, Loss: 2.254122018814087 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20294 , TFLOPS: 97.12800361356959, Tokens per sec: 79365.7920498934, Loss: 2.2364182472229004 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20295 , TFLOPS: 97.18588749679336, Tokens per sec: 79413.09046093926, Loss: 2.254837989807129 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20296 , TFLOPS: 97.29230316951804, Tokens per sec: 79500.04544650571, Loss: 2.250612258911133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20297 , TFLOPS: 96.05648596508554, Tokens per sec: 78490.22739599893, Loss: 2.2435708045959473 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20298 , TFLOPS: 97.77542390314004, Tokens per sec: 79894.81583458232, Loss: 2.255488634109497 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20299 , TFLOPS: 97.16224894109881, Tokens per sec: 79393.77478857068, Loss: 2.2575838565826416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20300 , TFLOPS: 96.55593741951373, Tokens per sec: 78898.34203643644, Loss: 2.251955270767212 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20301 , TFLOPS: 96.39200451645054, Tokens per sec: 78764.38824133517, Loss: 2.256950616836548 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20302 , TFLOPS: 96.13182270637304, Tokens per sec: 78551.78698664521, Loss: 2.264104127883911 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20303 , TFLOPS: 95.88531949693085, Tokens per sec: 78350.36286865285, Loss: 2.273130416870117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20304 , TFLOPS: 96.79912771433116, Tokens per sec: 79097.05908660688, Loss: 2.2687175273895264 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20305 , TFLOPS: 96.9311612731588, Tokens per sec: 79204.94710637112, Loss: 2.270811080932617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20306 , TFLOPS: 95.53342281825911, Tokens per sec: 78062.81903388379, Loss: 2.2504284381866455 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20307 , TFLOPS: 97.36981653139958, Tokens per sec: 79563.38360987057, Loss: 2.2735018730163574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20308 , TFLOPS: 95.09767792731404, Tokens per sec: 77706.76066642118, Loss: 2.2398934364318848 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20309 , TFLOPS: 97.8161871842304, Tokens per sec: 79928.12456090146, Loss: 2.2443978786468506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20310 , TFLOPS: 96.09637891718566, Tokens per sec: 78522.8249540959, Loss: 2.2530298233032227 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20311 , TFLOPS: 96.73246485028169, Tokens per sec: 79042.58714433726, Loss: 2.2623114585876465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20312 , TFLOPS: 96.62622749364195, Tokens per sec: 78955.77786543408, Loss: 2.2592740058898926 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20313 , TFLOPS: 96.7459345616399, Tokens per sec: 79053.59359222927, Loss: 2.245741367340088 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20314 , TFLOPS: 96.70138643533988, Tokens per sec: 79017.19217145875, Loss: 2.223783016204834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20315 , TFLOPS: 96.078581515229, Tokens per sec: 78508.28223881121, Loss: 2.2581160068511963 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20316 , TFLOPS: 96.87368189024382, Tokens per sec: 79157.97922294034, Loss: 2.2360427379608154 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20317 , TFLOPS: 97.86725345628604, Tokens per sec: 79969.85212636091, Loss: 2.2733585834503174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20318 , TFLOPS: 97.17746255525002, Tokens per sec: 79406.2062243269, Loss: 2.2631795406341553 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20319 , TFLOPS: 97.78702259498259, Tokens per sec: 79904.29342426376, Loss: 2.2409462928771973 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20320 , TFLOPS: 97.1938374020447, Tokens per sec: 79419.5865331689, Loss: 2.261134147644043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20321 , TFLOPS: 96.6255499011262, Tokens per sec: 78955.2241871467, Loss: 2.244380474090576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20322 , TFLOPS: 97.3749179739742, Tokens per sec: 79567.55212992111, Loss: 2.2533230781555176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20323 , TFLOPS: 96.8258109188313, Tokens per sec: 79118.86261989071, Loss: 2.2688467502593994 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20324 , TFLOPS: 97.71924978795542, Tokens per sec: 79848.91451901461, Loss: 2.2695064544677734 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20325 , TFLOPS: 96.20674867339896, Tokens per sec: 78613.01092306805, Loss: 2.243501901626587 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20326 , TFLOPS: 97.19180460459658, Tokens per sec: 79417.92548204518, Loss: 2.2461764812469482 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20327 , TFLOPS: 98.46105850977945, Tokens per sec: 80455.06552147234, Loss: 2.2409815788269043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20328 , TFLOPS: 97.83925229469322, Tokens per sec: 79946.9716564094, Loss: 2.2585933208465576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20329 , TFLOPS: 97.2446709774207, Tokens per sec: 79461.12395618127, Loss: 2.258619546890259 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20330 , TFLOPS: 96.65641594009405, Tokens per sec: 78980.44562214984, Loss: 2.248114824295044 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20331 , TFLOPS: 97.14560237387221, Tokens per sec: 79380.17244996906, Loss: 2.2499799728393555 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20332 , TFLOPS: 96.63136778869053, Tokens per sec: 78959.97813284124, Loss: 2.252323865890503 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20333 , TFLOPS: 97.1121316601108, Tokens per sec: 79352.82266814179, Loss: 2.2626678943634033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20334 , TFLOPS: 96.64839308589887, Tokens per sec: 78973.88994146032, Loss: 2.2271413803100586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20335 , TFLOPS: 97.08004120753134, Tokens per sec: 79326.60073325738, Loss: 2.260075330734253 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20336 , TFLOPS: 96.73395489604458, Tokens per sec: 79043.80469908731, Loss: 2.249274492263794 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20337 , TFLOPS: 97.30144254947386, Tokens per sec: 79507.51346913619, Loss: 2.25762939453125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20338 , TFLOPS: 96.75473132183953, Tokens per sec: 79060.78165143715, Loss: 2.2550504207611084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20339 , TFLOPS: 95.51805850131709, Tokens per sec: 78050.26445499776, Loss: 2.251387119293213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20340 , TFLOPS: 96.6546686368937, Tokens per sec: 78979.01785572525, Loss: 2.2573587894439697 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20341 , TFLOPS: 96.63819645627277, Tokens per sec: 78965.55801290824, Loss: 2.2448768615722656 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20342 , TFLOPS: 96.61751717583611, Tokens per sec: 78948.6604405314, Loss: 2.2659332752227783 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20343 , TFLOPS: 96.48205583621142, Tokens per sec: 78837.97149283896, Loss: 2.269324779510498 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20344 , TFLOPS: 97.21970725872876, Tokens per sec: 79440.7254590148, Loss: 2.2576966285705566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20345 , TFLOPS: 96.22267433724438, Tokens per sec: 78626.02419295922, Loss: 2.269223690032959 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20346 , TFLOPS: 96.81921962677734, Tokens per sec: 79113.47670547849, Loss: 2.2583446502685547 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20347 , TFLOPS: 96.52324233320459, Tokens per sec: 78871.62603976685, Loss: 2.267707109451294 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20348 , TFLOPS: 97.82854444537625, Tokens per sec: 79938.22199708794, Loss: 2.282031774520874 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20349 , TFLOPS: 97.1064429487765, Tokens per sec: 79348.17427567008, Loss: 2.236081600189209 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20350 , TFLOPS: 97.23530163135767, Tokens per sec: 79453.46802232474, Loss: 2.2650203704833984 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20351 , TFLOPS: 97.16437309295549, Tokens per sec: 79395.5104877333, Loss: 2.255204677581787 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20352 , TFLOPS: 96.63497351033665, Tokens per sec: 78962.92446081783, Loss: 2.2360522747039795 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20353 , TFLOPS: 96.00720586215446, Tokens per sec: 78449.95935531125, Loss: 2.2402193546295166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20354 , TFLOPS: 96.63826696234246, Tokens per sec: 78965.61562523276, Loss: 2.248080015182495 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20355 , TFLOPS: 96.77157753088106, Tokens per sec: 79074.54712250498, Loss: 2.2421116828918457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20356 , TFLOPS: 97.38617040609441, Tokens per sec: 79576.74678186991, Loss: 2.2528812885284424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20357 , TFLOPS: 97.16448925636364, Tokens per sec: 79395.60540784443, Loss: 2.2767446041107178 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20358 , TFLOPS: 96.55046629384542, Tokens per sec: 78893.87143881356, Loss: 2.2557902336120605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20359 , TFLOPS: 97.24809689702492, Tokens per sec: 79463.92336328089, Loss: 2.2581241130828857 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20360 , TFLOPS: 96.7362863499813, Tokens per sec: 79045.70979011597, Loss: 2.262030839920044 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20361 , TFLOPS: 96.76871761164468, Tokens per sec: 79072.21020887606, Loss: 2.245062828063965 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20362 , TFLOPS: 97.74161045144952, Tokens per sec: 79867.18599277032, Loss: 2.252319812774658 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20363 , TFLOPS: 96.70252478803457, Tokens per sec: 79018.12234874938, Loss: 2.2719461917877197 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20364 , TFLOPS: 96.66794364449956, Tokens per sec: 78989.8652061691, Loss: 2.240767240524292 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20365 , TFLOPS: 98.43151341491607, Tokens per sec: 80430.92346390107, Loss: 2.2450714111328125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20366 , TFLOPS: 96.63068126116832, Tokens per sec: 78959.41715353006, Loss: 2.254537582397461 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20367 , TFLOPS: 97.81863951705672, Tokens per sec: 79930.12842518253, Loss: 2.252290725708008 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20368 , TFLOPS: 96.71534833414039, Tokens per sec: 79028.60079837989, Loss: 2.2400424480438232 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20369 , TFLOPS: 96.51716611168398, Tokens per sec: 78866.66100274648, Loss: 2.2488784790039062 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20370 , TFLOPS: 97.14763021785288, Tokens per sec: 79381.82945348701, Loss: 2.253304958343506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20371 , TFLOPS: 96.6723172048409, Tokens per sec: 78993.43895490945, Loss: 2.2507591247558594 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20372 , TFLOPS: 96.4811309715375, Tokens per sec: 78837.21576210577, Loss: 2.2616000175476074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20373 , TFLOPS: 97.22284902663434, Tokens per sec: 79443.29267844674, Loss: 2.2540721893310547 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20374 , TFLOPS: 97.38252252524681, Tokens per sec: 79573.76600452451, Loss: 2.240121841430664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20375 , TFLOPS: 97.32049223815679, Tokens per sec: 79523.07946014161, Loss: 2.248333215713501 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20376 , TFLOPS: 97.41899881267562, Tokens per sec: 79603.57171796578, Loss: 2.2562406063079834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20377 , TFLOPS: 96.15315464669662, Tokens per sec: 78569.21786421658, Loss: 2.2565371990203857 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20378 , TFLOPS: 97.28117997151739, Tokens per sec: 79490.9563950828, Loss: 2.2711191177368164 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20379 , TFLOPS: 95.92986160593347, Tokens per sec: 78386.75937253478, Loss: 2.237154483795166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20380 , TFLOPS: 97.22238103853456, Tokens per sec: 79442.91027332333, Loss: 2.234292507171631 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20381 , TFLOPS: 96.73960047289991, Tokens per sec: 79048.41784526588, Loss: 2.2422292232513428 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20382 , TFLOPS: 97.22783872424739, Tokens per sec: 79447.36988881261, Loss: 2.2479357719421387 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20383 , TFLOPS: 97.45305146693798, Tokens per sec: 79631.39701835686, Loss: 2.273740530014038 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20384 , TFLOPS: 96.10680848367184, Tokens per sec: 78531.34722135267, Loss: 2.2544949054718018 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20385 , TFLOPS: 96.47821631473232, Tokens per sec: 78834.83412100031, Loss: 2.266526937484741 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20386 , TFLOPS: 97.05317448139729, Tokens per sec: 79304.6472397222, Loss: 2.269279718399048 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20387 , TFLOPS: 97.81805652405717, Tokens per sec: 79929.65204659502, Loss: 2.2591092586517334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20388 , TFLOPS: 97.20479064269247, Tokens per sec: 79428.53670806368, Loss: 2.259371757507324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20389 , TFLOPS: 96.5677652536537, Tokens per sec: 78908.00686418774, Loss: 2.2617387771606445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20390 , TFLOPS: 96.32797170528508, Tokens per sec: 78712.06538298067, Loss: 2.2598650455474854 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20391 , TFLOPS: 96.57480634627555, Tokens per sec: 78913.76032222291, Loss: 2.2590694427490234 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20392 , TFLOPS: 96.70176784399928, Tokens per sec: 79017.50383029267, Loss: 2.2547173500061035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20393 , TFLOPS: 96.19533721966879, Tokens per sec: 78603.68633046819, Loss: 2.2714123725891113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20394 , TFLOPS: 97.32676803960103, Tokens per sec: 79528.2075790552, Loss: 2.2588374614715576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20395 , TFLOPS: 97.2164737395862, Tokens per sec: 79438.08326728521, Loss: 2.2663674354553223 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20396 , TFLOPS: 94.8352960972222, Tokens per sec: 77492.36171874395, Loss: 2.2537124156951904 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20397 , TFLOPS: 97.87823863020274, Tokens per sec: 79978.82839475179, Loss: 2.260629177093506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20398 , TFLOPS: 96.01580697414444, Tokens per sec: 78456.98754533057, Loss: 2.2414121627807617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20399 , TFLOPS: 96.81024124184522, Tokens per sec: 79106.14023602626, Loss: 2.235685110092163 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20400 , TFLOPS: 96.72415461252069, Tokens per sec: 79035.7966351382, Loss: 2.2683541774749756 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020400 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020400/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020400/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020400/optimizer.pt +[rank0]:[2024-08-31 02:30:17,575] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007712081016507, 'preprocessing_with_comm': 0.0014388860145118088, 'state_converting': 2.6408104010042734, : 2.651564982981654}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020400/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020400/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020400/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020400/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020400/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020400, took 14.81s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018400 + eval ppl=7.523248195648193, eval loss=2.017997980117798 +------------------------------------------------------------------ +iteration: 20401 , TFLOPS: 96.53517191727931, Tokens per sec: 78881.37401000912, Loss: 2.2310147285461426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20402 , TFLOPS: 95.11444538112025, Tokens per sec: 77720.4617845589, Loss: 2.250154495239258 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20403 , TFLOPS: 98.2934066668154, Tokens per sec: 80318.07288484403, Loss: 2.263009548187256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20404 , TFLOPS: 96.23122094443998, Tokens per sec: 78633.00784570798, Loss: 2.25876522064209 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20405 , TFLOPS: 95.94673183797354, Tokens per sec: 78400.54447341315, Loss: 2.2458224296569824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20406 , TFLOPS: 97.79837242596969, Tokens per sec: 79913.56766333396, Loss: 2.250335216522217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20407 , TFLOPS: 96.71245422244485, Tokens per sec: 79026.2359452124, Loss: 2.24221134185791 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20408 , TFLOPS: 96.66480617086587, Tokens per sec: 78987.30149569725, Loss: 2.2246077060699463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20409 , TFLOPS: 96.55643648906849, Tokens per sec: 78898.74983901682, Loss: 2.2663660049438477 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20410 , TFLOPS: 97.69606152818875, Tokens per sec: 79829.96679504035, Loss: 2.2601258754730225 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20411 , TFLOPS: 96.41125458163847, Tokens per sec: 78780.11796514098, Loss: 2.2528483867645264 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20412 , TFLOPS: 97.45454015840518, Tokens per sec: 79632.61346647702, Loss: 2.2246251106262207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20413 , TFLOPS: 97.41768215249118, Tokens per sec: 79602.49584103521, Loss: 2.2569687366485596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20414 , TFLOPS: 95.39441408837108, Tokens per sec: 77949.23142228923, Loss: 2.2369728088378906 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20415 , TFLOPS: 97.68635184441503, Tokens per sec: 79822.03276247934, Loss: 2.24300217628479 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20416 , TFLOPS: 97.84627861717603, Tokens per sec: 79952.71304538373, Loss: 2.253732681274414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20417 , TFLOPS: 95.97653049544822, Tokens per sec: 78424.89372352138, Loss: 2.2662816047668457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20418 , TFLOPS: 97.12119546901287, Tokens per sec: 79360.22893971889, Loss: 2.255619525909424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20419 , TFLOPS: 97.06225572179983, Tokens per sec: 79312.06775502703, Loss: 2.2461588382720947 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20420 , TFLOPS: 96.5725265756607, Tokens per sec: 78911.897462967, Loss: 2.25744366645813 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20421 , TFLOPS: 97.18678162655259, Tokens per sec: 79413.82107738247, Loss: 2.251115560531616 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20422 , TFLOPS: 97.45686553683825, Tokens per sec: 79634.51359305525, Loss: 2.267042636871338 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20423 , TFLOPS: 91.73670555638877, Tokens per sec: 74960.42362301236, Loss: 2.259199619293213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20424 , TFLOPS: 96.80231988498497, Tokens per sec: 79099.66749142189, Loss: 2.2559385299682617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20425 , TFLOPS: 97.27269844604965, Tokens per sec: 79484.0259222892, Loss: 2.252943992614746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20426 , TFLOPS: 98.30997301977112, Tokens per sec: 80331.60967830002, Loss: 2.242537021636963 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20427 , TFLOPS: 94.77410658388685, Tokens per sec: 77442.36219223944, Loss: 2.2561492919921875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20428 , TFLOPS: 98.4238475562242, Tokens per sec: 80424.65949342719, Loss: 2.2397727966308594 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20429 , TFLOPS: 97.06593544653572, Tokens per sec: 79315.07455283326, Loss: 2.255858898162842 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20430 , TFLOPS: 96.71268355402793, Tokens per sec: 79026.42333795247, Loss: 2.2498302459716797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20431 , TFLOPS: 96.6605323480177, Tokens per sec: 78983.80924503002, Loss: 2.2440733909606934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20432 , TFLOPS: 96.83606888298131, Tokens per sec: 79127.24466646119, Loss: 2.246225118637085 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20433 , TFLOPS: 96.1482195648098, Tokens per sec: 78565.18528176656, Loss: 2.235959529876709 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20434 , TFLOPS: 96.36133163610678, Tokens per sec: 78739.32464121608, Loss: 2.2564313411712646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20435 , TFLOPS: 97.47964725052869, Tokens per sec: 79653.12911776519, Loss: 2.223550796508789 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20436 , TFLOPS: 97.2450824921895, Tokens per sec: 79461.46021549219, Loss: 2.254826068878174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20437 , TFLOPS: 97.1527119671444, Tokens per sec: 79385.981881649, Loss: 2.2411952018737793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20438 , TFLOPS: 96.09805669884898, Tokens per sec: 78524.19591268292, Loss: 2.2650904655456543 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20439 , TFLOPS: 97.80078143865936, Tokens per sec: 79915.5361296162, Loss: 2.242489814758301 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20440 , TFLOPS: 95.80621115087678, Tokens per sec: 78285.72140266208, Loss: 2.272920846939087 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20441 , TFLOPS: 98.2667740154511, Tokens per sec: 80296.31065982883, Loss: 2.2426199913024902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20442 , TFLOPS: 94.51026103853258, Tokens per sec: 77226.76720514189, Loss: 2.2621936798095703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20443 , TFLOPS: 96.60464976301697, Tokens per sec: 78938.14614628021, Loss: 2.2503364086151123 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20444 , TFLOPS: 97.19365393271048, Tokens per sec: 79419.43661565297, Loss: 2.2520880699157715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20445 , TFLOPS: 97.48193503795747, Tokens per sec: 79654.99852776615, Loss: 2.2516658306121826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20446 , TFLOPS: 95.01390059228787, Tokens per sec: 77638.30404935083, Loss: 2.265380382537842 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20447 , TFLOPS: 97.19095043473877, Tokens per sec: 79417.22751786606, Loss: 2.249143362045288 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20448 , TFLOPS: 98.35798278002048, Tokens per sec: 80370.83968928094, Loss: 2.2427122592926025 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20449 , TFLOPS: 95.97348024472942, Tokens per sec: 78422.4012851386, Loss: 2.2391862869262695 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20450 , TFLOPS: 97.85410648080473, Tokens per sec: 79959.10939426188, Loss: 2.257977247238159 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20451 , TFLOPS: 97.7947501323223, Tokens per sec: 79910.60779394855, Loss: 2.2510945796966553 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20452 , TFLOPS: 96.02006466255278, Tokens per sec: 78460.46661213168, Loss: 2.2403619289398193 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20453 , TFLOPS: 97.77272214248218, Tokens per sec: 79892.60815639928, Loss: 2.2580888271331787 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20454 , TFLOPS: 97.89713636478699, Tokens per sec: 79994.27022015155, Loss: 2.2737464904785156 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20455 , TFLOPS: 95.39105837870181, Tokens per sec: 77946.48938553482, Loss: 2.2685744762420654 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20456 , TFLOPS: 97.90227018917719, Tokens per sec: 79998.46520021716, Loss: 2.2782297134399414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20457 , TFLOPS: 96.22203005986412, Tokens per sec: 78625.49773732663, Loss: 2.274958610534668 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20458 , TFLOPS: 96.92704705470838, Tokens per sec: 79201.58527256588, Loss: 2.251569986343384 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20459 , TFLOPS: 95.04396606858961, Tokens per sec: 77662.8713239912, Loss: 2.2532458305358887 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20460 , TFLOPS: 97.89274363527389, Tokens per sec: 79990.68080778746, Loss: 2.2384824752807617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20461 , TFLOPS: 96.86114481990018, Tokens per sec: 79147.73486002964, Loss: 2.238175868988037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20462 , TFLOPS: 95.68512760898957, Tokens per sec: 78186.78092361838, Loss: 2.2547483444213867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20463 , TFLOPS: 97.40231992059326, Tokens per sec: 79589.94296589239, Loss: 2.254878520965576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20464 , TFLOPS: 96.7947547397497, Tokens per sec: 79093.48581650598, Loss: 2.2649998664855957 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20465 , TFLOPS: 96.6102805412673, Tokens per sec: 78942.74720013769, Loss: 2.2485580444335938 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20466 , TFLOPS: 97.2743135516491, Tokens per sec: 79485.34566665103, Loss: 2.2453572750091553 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20467 , TFLOPS: 98.40589240367163, Tokens per sec: 80409.98787606973, Loss: 2.247462034225464 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20468 , TFLOPS: 96.53986103850136, Tokens per sec: 78885.20561166837, Loss: 2.2462258338928223 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20469 , TFLOPS: 97.70564383761673, Tokens per sec: 79837.79674674456, Loss: 2.243443012237549 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20470 , TFLOPS: 96.72124536107536, Tokens per sec: 79033.41941089144, Loss: 2.2515790462493896 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20471 , TFLOPS: 97.1684673560538, Tokens per sec: 79398.85601550665, Loss: 2.2651991844177246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20472 , TFLOPS: 97.14603193557706, Tokens per sec: 79380.523455896, Loss: 2.26008939743042 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20473 , TFLOPS: 97.24427267644259, Tokens per sec: 79460.79849419887, Loss: 2.2664530277252197 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20474 , TFLOPS: 96.08971079225327, Tokens per sec: 78517.37625756083, Loss: 2.2555863857269287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20475 , TFLOPS: 97.24091011078623, Tokens per sec: 79458.05085524093, Loss: 2.256643772125244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20476 , TFLOPS: 95.96460886451777, Tokens per sec: 78415.15225199804, Loss: 2.238764762878418 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20477 , TFLOPS: 96.72688565499072, Tokens per sec: 79038.02824024376, Loss: 2.254058837890625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20478 , TFLOPS: 96.62649630160254, Tokens per sec: 78955.99751533843, Loss: 2.2855124473571777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20479 , TFLOPS: 97.20759275080451, Tokens per sec: 79430.82638273471, Loss: 2.256899118423462 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20480 , TFLOPS: 96.64685694797292, Tokens per sec: 78972.63472361732, Loss: 2.259345054626465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20481 , TFLOPS: 96.64488788988362, Tokens per sec: 78971.0257555645, Loss: 2.2393112182617188 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20482 , TFLOPS: 97.86501340684988, Tokens per sec: 79968.02172430263, Loss: 2.2562599182128906 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20483 , TFLOPS: 97.72422613694707, Tokens per sec: 79852.9808218783, Loss: 2.250977039337158 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20484 , TFLOPS: 96.18720051673655, Tokens per sec: 78597.03762104489, Loss: 2.2507283687591553 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20485 , TFLOPS: 97.65286966989459, Tokens per sec: 79794.67361577084, Loss: 2.2570712566375732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20486 , TFLOPS: 98.40799456785713, Tokens per sec: 80411.70560854021, Loss: 2.26487398147583 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20487 , TFLOPS: 94.28925500361079, Tokens per sec: 77046.17748480583, Loss: 2.261168956756592 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20488 , TFLOPS: 98.35908841172085, Tokens per sec: 80371.7431294048, Loss: 2.246776819229126 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20489 , TFLOPS: 96.6165870262203, Tokens per sec: 78947.90039133615, Loss: 2.253425121307373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20490 , TFLOPS: 96.06759769100451, Tokens per sec: 78499.3070732886, Loss: 2.255887985229492 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20491 , TFLOPS: 96.11326747055693, Tokens per sec: 78536.6250258056, Loss: 2.2621614933013916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20492 , TFLOPS: 97.96384372788016, Tokens per sec: 80048.77852373527, Loss: 2.247499704360962 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20493 , TFLOPS: 96.5128694467605, Tokens per sec: 78863.15008723152, Loss: 2.2527365684509277 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20494 , TFLOPS: 97.21605834075152, Tokens per sec: 79437.74383420408, Loss: 2.2411787509918213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20495 , TFLOPS: 95.7322714761651, Tokens per sec: 78225.30339107862, Loss: 2.2481887340545654 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20496 , TFLOPS: 97.27090975080912, Tokens per sec: 79482.56433336306, Loss: 2.2635157108306885 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20497 , TFLOPS: 95.49543879786073, Tokens per sec: 78031.7813130206, Loss: 2.235186815261841 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20498 , TFLOPS: 97.33298639076946, Tokens per sec: 79533.28875386954, Loss: 2.269050121307373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20499 , TFLOPS: 96.52153939620763, Tokens per sec: 78870.23452610912, Loss: 2.2531049251556396 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20500 , TFLOPS: 95.15836296820004, Tokens per sec: 77756.34797550105, Loss: 2.268862009048462 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20501 , TFLOPS: 98.37064811367169, Tokens per sec: 80381.18885944203, Loss: 2.232431173324585 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20502 , TFLOPS: 96.78413205242025, Tokens per sec: 79084.80574523678, Loss: 2.2669005393981934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20503 , TFLOPS: 97.34956762945575, Tokens per sec: 79546.83771083916, Loss: 2.271250009536743 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20504 , TFLOPS: 96.59171211952084, Tokens per sec: 78927.57446473493, Loss: 2.2435507774353027 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20505 , TFLOPS: 98.39339472627337, Tokens per sec: 80399.77570214878, Loss: 2.240450143814087 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20506 , TFLOPS: 96.04871566102274, Tokens per sec: 78483.87808052379, Loss: 2.24387788772583 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20507 , TFLOPS: 97.82900128784158, Tokens per sec: 79938.59529482653, Loss: 2.255560874938965 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20508 , TFLOPS: 96.64450568491, Tokens per sec: 78970.71344604171, Loss: 2.265838146209717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20509 , TFLOPS: 96.15094895017306, Tokens per sec: 78567.4155328075, Loss: 2.2572202682495117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20510 , TFLOPS: 97.33235413496794, Tokens per sec: 79532.7721213784, Loss: 2.2390151023864746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20511 , TFLOPS: 96.65046531518891, Tokens per sec: 78975.58321335714, Loss: 2.256533622741699 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20512 , TFLOPS: 97.79789737360522, Tokens per sec: 79913.17948581802, Loss: 2.2639105319976807 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20513 , TFLOPS: 97.75729768780774, Tokens per sec: 79880.004437424, Loss: 2.240244150161743 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20514 , TFLOPS: 96.01672240521859, Tokens per sec: 78457.73556762654, Loss: 2.255167007446289 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20515 , TFLOPS: 97.7838369105961, Tokens per sec: 79901.69031954475, Loss: 2.255434989929199 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20516 , TFLOPS: 96.07774816852749, Tokens per sec: 78507.60128976915, Loss: 2.2552490234375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20517 , TFLOPS: 98.41629009288468, Tokens per sec: 80418.48409558604, Loss: 2.2542243003845215 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20518 , TFLOPS: 96.66980515985205, Tokens per sec: 78991.38629828328, Loss: 2.2512850761413574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20519 , TFLOPS: 95.46723835910849, Tokens per sec: 78008.73800857227, Loss: 2.2404017448425293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20520 , TFLOPS: 97.01473307731042, Tokens per sec: 79273.23577887303, Loss: 2.2544727325439453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20521 , TFLOPS: 97.80693956421179, Tokens per sec: 79920.56809252934, Loss: 2.2501883506774902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20522 , TFLOPS: 96.10339554187976, Tokens per sec: 78528.55841875743, Loss: 2.245344877243042 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20523 , TFLOPS: 97.8459847576488, Tokens per sec: 79952.47292519941, Loss: 2.2428641319274902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20524 , TFLOPS: 97.7733040533787, Tokens per sec: 79893.08365077255, Loss: 2.2430431842803955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20525 , TFLOPS: 94.67265259663529, Tokens per sec: 77359.4615275982, Loss: 2.2613723278045654 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20526 , TFLOPS: 97.79605143922481, Tokens per sec: 79911.67112531721, Loss: 2.257070541381836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20527 , TFLOPS: 96.79676854042717, Tokens per sec: 79095.13134487938, Loss: 2.222346782684326 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20528 , TFLOPS: 96.62583955750965, Tokens per sec: 78955.46087283401, Loss: 2.238308906555176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20529 , TFLOPS: 96.18749216651811, Tokens per sec: 78597.27593558913, Loss: 2.2750606536865234 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20530 , TFLOPS: 97.39988143981006, Tokens per sec: 79587.95042047236, Loss: 2.2586655616760254 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20531 , TFLOPS: 97.1305632083148, Tokens per sec: 79367.88356064954, Loss: 2.253147602081299 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20532 , TFLOPS: 97.11026639145048, Tokens per sec: 79351.29850910316, Loss: 2.2473926544189453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20533 , TFLOPS: 96.0448489167252, Tokens per sec: 78480.71846421942, Loss: 2.2615761756896973 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20534 , TFLOPS: 97.11836494989284, Tokens per sec: 79357.91604968185, Loss: 2.2277040481567383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20535 , TFLOPS: 95.42674602287458, Tokens per sec: 77975.65067827007, Loss: 2.2668707370758057 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20536 , TFLOPS: 97.23213819700086, Tokens per sec: 79450.88309867775, Loss: 2.2548482418060303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20537 , TFLOPS: 97.85600042200491, Tokens per sec: 79960.65698237093, Loss: 2.242281913757324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20538 , TFLOPS: 95.08580889052712, Tokens per sec: 77697.06217092641, Loss: 2.2559447288513184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20539 , TFLOPS: 98.4125977544052, Tokens per sec: 80415.46699076521, Loss: 2.22255802154541 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20540 , TFLOPS: 97.31177071242081, Tokens per sec: 79515.9528769507, Loss: 2.256908416748047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20541 , TFLOPS: 97.10184162426077, Tokens per sec: 79344.41441495965, Loss: 2.2458670139312744 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20542 , TFLOPS: 97.14727062926461, Tokens per sec: 79381.53562439492, Loss: 2.265169382095337 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20543 , TFLOPS: 98.49447581153134, Tokens per sec: 80482.37165897168, Loss: 2.251347303390503 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20544 , TFLOPS: 95.49603460082945, Tokens per sec: 78032.26815896368, Loss: 2.2587673664093018 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20545 , TFLOPS: 97.76046686981584, Tokens per sec: 79882.59405762484, Loss: 2.2761423587799072 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20546 , TFLOPS: 96.61868716557571, Tokens per sec: 78949.61646926585, Loss: 2.247999906539917 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20547 , TFLOPS: 96.27465682454812, Tokens per sec: 78668.50042148336, Loss: 2.232573986053467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20548 , TFLOPS: 96.79725861848176, Tokens per sec: 79095.53180027353, Loss: 2.286653518676758 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20549 , TFLOPS: 96.58960639525225, Tokens per sec: 78925.85382322891, Loss: 2.2518792152404785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20550 , TFLOPS: 97.74432285182763, Tokens per sec: 79869.40236494267, Loss: 2.2567272186279297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20551 , TFLOPS: 97.84395628599111, Tokens per sec: 79950.81540879041, Loss: 2.2644503116607666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20552 , TFLOPS: 96.61854339654145, Tokens per sec: 78949.49899188701, Loss: 2.2601828575134277 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20553 , TFLOPS: 97.13486498253607, Tokens per sec: 79371.39865110326, Loss: 2.264902114868164 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20554 , TFLOPS: 96.69198233770094, Tokens per sec: 79009.50784119495, Loss: 2.2581381797790527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20555 , TFLOPS: 97.73331525738008, Tokens per sec: 79860.4077761589, Loss: 2.212906837463379 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20556 , TFLOPS: 97.30541651030431, Tokens per sec: 79510.76069482959, Loss: 2.2462046146392822 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20557 , TFLOPS: 95.56407483700785, Tokens per sec: 78087.86558745654, Loss: 2.2414751052856445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20558 , TFLOPS: 97.12164353754075, Tokens per sec: 79360.59506804732, Loss: 2.255601167678833 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20559 , TFLOPS: 97.8899705801329, Tokens per sec: 79988.41487304698, Loss: 2.253174304962158 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20560 , TFLOPS: 95.98212841107153, Tokens per sec: 78429.46792448036, Loss: 2.263227939605713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20561 , TFLOPS: 97.27813502558591, Tokens per sec: 79488.4682913783, Loss: 2.2709250450134277 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20562 , TFLOPS: 97.77507359611666, Tokens per sec: 79894.52958969581, Loss: 2.262446403503418 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20563 , TFLOPS: 95.4704925505931, Tokens per sec: 78011.39709220469, Loss: 2.2549941539764404 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20564 , TFLOPS: 97.17013478567024, Tokens per sec: 79400.2185151697, Loss: 2.2587521076202393 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20565 , TFLOPS: 96.24274227312716, Tokens per sec: 78642.42221996366, Loss: 2.2586045265197754 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20566 , TFLOPS: 96.73646518270087, Tokens per sec: 79045.8559189347, Loss: 2.2403125762939453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20567 , TFLOPS: 95.56833424540689, Tokens per sec: 78091.34605970632, Loss: 2.2363617420196533 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20568 , TFLOPS: 97.38918219281824, Tokens per sec: 79579.20779033222, Loss: 2.2462520599365234 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20569 , TFLOPS: 97.23558039848619, Tokens per sec: 79453.69581012157, Loss: 2.2377336025238037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20570 , TFLOPS: 97.31919039051698, Tokens per sec: 79522.01568692259, Loss: 2.279540538787842 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20571 , TFLOPS: 96.07909330684754, Tokens per sec: 78508.70043691742, Loss: 2.2407684326171875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20572 , TFLOPS: 96.76862476469334, Tokens per sec: 79072.13434124204, Loss: 2.2528045177459717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20573 , TFLOPS: 96.67212446585573, Tokens per sec: 78993.28146292314, Loss: 2.2327880859375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20574 , TFLOPS: 96.69405777004543, Tokens per sec: 79011.20373039001, Loss: 2.23439359664917 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20575 , TFLOPS: 97.9555403166882, Tokens per sec: 80041.99359270149, Loss: 2.2584095001220703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20576 , TFLOPS: 94.39070076767366, Tokens per sec: 77129.0714300679, Loss: 2.233126401901245 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20577 , TFLOPS: 98.44192296532384, Tokens per sec: 80439.42937550491, Loss: 2.252591133117676 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20578 , TFLOPS: 97.27761691025616, Tokens per sec: 79488.04492600488, Loss: 2.269678831100464 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20579 , TFLOPS: 97.38506347335468, Tokens per sec: 79575.84227863328, Loss: 2.2526590824127197 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20580 , TFLOPS: 97.74758313637014, Tokens per sec: 79872.06642736954, Loss: 2.2602388858795166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20581 , TFLOPS: 97.84434286170277, Tokens per sec: 79951.13128975575, Loss: 2.2528748512268066 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20582 , TFLOPS: 95.06221638984378, Tokens per sec: 77677.78413129304, Loss: 2.252075672149658 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20583 , TFLOPS: 98.35182234078361, Tokens per sec: 80365.80583579668, Loss: 2.2924537658691406 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20584 , TFLOPS: 97.28810956935013, Tokens per sec: 79496.61874785564, Loss: 2.234719753265381 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20585 , TFLOPS: 96.68041061749382, Tokens per sec: 79000.05229073114, Loss: 2.2647409439086914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20586 , TFLOPS: 97.33719146695333, Tokens per sec: 79536.72482986766, Loss: 2.233987331390381 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20587 , TFLOPS: 96.65054483931188, Tokens per sec: 78975.64819456515, Loss: 2.25319504737854 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20588 , TFLOPS: 98.38718547847799, Tokens per sec: 80394.70196593492, Loss: 2.244342088699341 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20589 , TFLOPS: 97.04111270512895, Tokens per sec: 79294.79125183559, Loss: 2.261094808578491 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20590 , TFLOPS: 96.12617098012527, Tokens per sec: 78547.16881563977, Loss: 2.2397115230560303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20591 , TFLOPS: 97.24047942131897, Tokens per sec: 79457.69892779029, Loss: 2.280162811279297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20592 , TFLOPS: 97.25282090439545, Tokens per sec: 79467.78347131012, Loss: 2.2537171840667725 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20593 , TFLOPS: 97.7726943086781, Tokens per sec: 79892.58541268147, Loss: 2.2479357719421387 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20594 , TFLOPS: 96.63083503431935, Tokens per sec: 78959.54280553023, Loss: 2.2587828636169434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20595 , TFLOPS: 94.9667908599155, Tokens per sec: 77599.80947431698, Loss: 2.2633838653564453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20596 , TFLOPS: 98.41701043113244, Tokens per sec: 80419.07270251145, Loss: 2.2604105472564697 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20597 , TFLOPS: 96.76082543981272, Tokens per sec: 79065.76131210961, Loss: 2.2454161643981934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20598 , TFLOPS: 97.25343147468205, Tokens per sec: 79468.28238400878, Loss: 2.2177798748016357 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20599 , TFLOPS: 96.03238409239799, Tokens per sec: 78470.53313538854, Loss: 2.2515649795532227 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20600 , TFLOPS: 98.37773457521445, Tokens per sec: 80386.97938959037, Loss: 2.2662816047668457 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020600 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020600/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020600/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020600/optimizer.pt +[rank0]:[2024-08-31 03:25:53,557] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007670792023418471, 'preprocessing_with_comm': 0.0016859019815456122, 'state_converting': 2.639180141995894, : 2.6501752980111632}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020600/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020600/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020600/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020600/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020600/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020600, took 14.92s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018600 + eval ppl=7.271195411682129, eval loss=1.983920693397522 +------------------------------------------------------------------ +iteration: 20601 , TFLOPS: 94.5300180794344, Tokens per sec: 77242.91119185425, Loss: 2.2492220401763916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20602 , TFLOPS: 97.29911417553063, Tokens per sec: 79505.6108948495, Loss: 2.2506141662597656 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20603 , TFLOPS: 97.22677578177512, Tokens per sec: 79446.50133115595, Loss: 2.237523078918457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20604 , TFLOPS: 96.50866763020608, Tokens per sec: 78859.71667476052, Loss: 2.251434803009033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20605 , TFLOPS: 96.65091492683857, Tokens per sec: 78975.95060261012, Loss: 2.25408935546875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20606 , TFLOPS: 97.81395319609378, Tokens per sec: 79926.29911168705, Loss: 2.2776927947998047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20607 , TFLOPS: 97.25015171169183, Tokens per sec: 79465.60240524086, Loss: 2.2417056560516357 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20608 , TFLOPS: 97.89226274766682, Tokens per sec: 79990.28786214459, Loss: 2.2452023029327393 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20609 , TFLOPS: 96.56697845753814, Tokens per sec: 78907.3639528278, Loss: 2.264847993850708 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20610 , TFLOPS: 97.84889758686008, Tokens per sec: 79954.85307293084, Loss: 2.23903751373291 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20611 , TFLOPS: 96.5498732384369, Tokens per sec: 78893.38683797265, Loss: 2.2520813941955566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20612 , TFLOPS: 97.81725511585897, Tokens per sec: 79928.99719532617, Loss: 2.2427945137023926 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20613 , TFLOPS: 96.60581339618014, Tokens per sec: 78939.09698089241, Loss: 2.254239082336426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20614 , TFLOPS: 98.35429685170824, Tokens per sec: 80367.82782237278, Loss: 2.2701032161712646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20615 , TFLOPS: 95.4821945935886, Tokens per sec: 78020.9591327737, Loss: 2.249741554260254 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20616 , TFLOPS: 97.3953108534146, Tokens per sec: 79584.21567667127, Loss: 2.253697633743286 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20617 , TFLOPS: 97.30399995446037, Tokens per sec: 79509.60319058411, Loss: 2.2535152435302734 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20618 , TFLOPS: 96.81875789244182, Tokens per sec: 79113.09941046684, Loss: 2.238123893737793 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20619 , TFLOPS: 96.61952592830302, Tokens per sec: 78950.301843882, Loss: 2.261375904083252 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20620 , TFLOPS: 98.36079339939108, Tokens per sec: 80373.13631872044, Loss: 2.258617877960205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20621 , TFLOPS: 97.25138621137665, Tokens per sec: 79466.61114671218, Loss: 2.2351653575897217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20622 , TFLOPS: 97.00205979685458, Tokens per sec: 79262.8801151733, Loss: 2.2607128620147705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20623 , TFLOPS: 97.27311814191772, Tokens per sec: 79484.36886658694, Loss: 2.2716193199157715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20624 , TFLOPS: 97.5486708926228, Tokens per sec: 79709.53011254691, Loss: 2.253045082092285 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20625 , TFLOPS: 96.24900193140869, Tokens per sec: 78647.53714788343, Loss: 2.2550368309020996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20626 , TFLOPS: 96.01745950429331, Tokens per sec: 78458.33787025513, Loss: 2.2519447803497314 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20627 , TFLOPS: 96.55207982865207, Tokens per sec: 78895.18989964014, Loss: 2.2733347415924072 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20628 , TFLOPS: 96.46645790704918, Tokens per sec: 78825.22602339409, Loss: 2.2564597129821777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20629 , TFLOPS: 97.82876324563968, Tokens per sec: 79938.4007844156, Loss: 2.2697603702545166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20630 , TFLOPS: 95.51873631089289, Tokens per sec: 78050.81831065052, Loss: 2.253004312515259 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20631 , TFLOPS: 97.75980270388123, Tokens per sec: 79882.0513505425, Loss: 2.2599685192108154 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20632 , TFLOPS: 94.78067670254407, Tokens per sec: 77447.73080532414, Loss: 2.272048234939575 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20633 , TFLOPS: 96.69008038024728, Tokens per sec: 79007.95370279883, Loss: 2.249922513961792 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20634 , TFLOPS: 96.0051296381174, Tokens per sec: 78448.26281920378, Loss: 2.2670884132385254 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20635 , TFLOPS: 97.20024803860883, Tokens per sec: 79424.82482932991, Loss: 2.2613072395324707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20636 , TFLOPS: 95.83692498256589, Tokens per sec: 78310.81846517962, Loss: 2.2524805068969727 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20637 , TFLOPS: 95.15954467260218, Tokens per sec: 77757.31357658783, Loss: 2.2603232860565186 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20638 , TFLOPS: 97.80837282751159, Tokens per sec: 79921.73924886709, Loss: 2.2537012100219727 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20639 , TFLOPS: 95.78159007283323, Tokens per sec: 78265.60288598976, Loss: 2.2347350120544434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20640 , TFLOPS: 97.79132280872442, Tokens per sec: 79907.80723960957, Loss: 2.2367818355560303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20641 , TFLOPS: 97.26685169064483, Tokens per sec: 79479.24838793905, Loss: 2.264171600341797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20642 , TFLOPS: 97.78157095743305, Tokens per sec: 79899.8387508844, Loss: 2.278611898422241 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20643 , TFLOPS: 97.02248544259669, Tokens per sec: 79279.57043611207, Loss: 2.2632486820220947 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20644 , TFLOPS: 97.83985268069333, Tokens per sec: 79947.46224726539, Loss: 2.257605791091919 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20645 , TFLOPS: 96.59784622815457, Tokens per sec: 78932.58680280557, Loss: 2.2566821575164795 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20646 , TFLOPS: 97.13088463902108, Tokens per sec: 79368.14620995401, Loss: 2.253464937210083 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20647 , TFLOPS: 96.12688004174726, Tokens per sec: 78547.74820814397, Loss: 2.2425243854522705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20648 , TFLOPS: 97.83865782392589, Tokens per sec: 79946.48589904238, Loss: 2.244216203689575 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20649 , TFLOPS: 97.1841372454266, Tokens per sec: 79411.660285492, Loss: 2.2258217334747314 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20650 , TFLOPS: 97.2155773683143, Tokens per sec: 79437.35081924431, Loss: 2.2283334732055664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20651 , TFLOPS: 97.20139435929849, Tokens per sec: 79425.76151747436, Loss: 2.244328022003174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20652 , TFLOPS: 97.62758274759955, Tokens per sec: 79774.01102062028, Loss: 2.233311414718628 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20653 , TFLOPS: 95.66334853750166, Tokens per sec: 78168.98468366243, Loss: 2.2399580478668213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20654 , TFLOPS: 97.32002819941226, Tokens per sec: 79522.70028213804, Loss: 2.2497975826263428 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20655 , TFLOPS: 97.19888453391735, Tokens per sec: 79423.71067454676, Loss: 2.2471909523010254 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20656 , TFLOPS: 96.7356211654764, Tokens per sec: 79045.16625073366, Loss: 2.242682933807373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20657 , TFLOPS: 96.41128760854201, Tokens per sec: 78780.14495227406, Loss: 2.23748779296875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20658 , TFLOPS: 98.40380128127538, Tokens per sec: 80408.27916612948, Loss: 2.266982316970825 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20659 , TFLOPS: 97.10074631889468, Tokens per sec: 79343.51941274935, Loss: 2.2685539722442627 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20660 , TFLOPS: 97.54137129749107, Tokens per sec: 79703.56542545647, Loss: 2.260460376739502 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20661 , TFLOPS: 93.10489246223419, Tokens per sec: 76078.4043640429, Loss: 2.262752056121826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20662 , TFLOPS: 98.40446778552383, Tokens per sec: 80408.82378390823, Loss: 2.2217209339141846 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20663 , TFLOPS: 97.13816344293781, Tokens per sec: 79374.09390800746, Loss: 2.269029378890991 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20664 , TFLOPS: 95.83712113288296, Tokens per sec: 78310.97874465305, Loss: 2.224996328353882 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20665 , TFLOPS: 97.22809791361502, Tokens per sec: 79447.58167911702, Loss: 2.24037504196167 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20666 , TFLOPS: 96.03323118391826, Tokens per sec: 78471.22531567582, Loss: 2.264108180999756 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20667 , TFLOPS: 97.75383067998989, Tokens per sec: 79877.17145608732, Loss: 2.249781370162964 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20668 , TFLOPS: 95.63440703551815, Tokens per sec: 78145.33583737117, Loss: 2.2142202854156494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20669 , TFLOPS: 97.1357711838633, Tokens per sec: 79372.13913153544, Loss: 2.258653163909912 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20670 , TFLOPS: 95.0349874254291, Tokens per sec: 77655.5346435339, Loss: 2.244127035140991 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20671 , TFLOPS: 97.28529662828703, Tokens per sec: 79494.32022130143, Loss: 2.2527966499328613 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20672 , TFLOPS: 96.71145234151822, Tokens per sec: 79025.41728251654, Loss: 2.2674312591552734 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20673 , TFLOPS: 97.11932540508961, Tokens per sec: 79358.70086234745, Loss: 2.258983850479126 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20674 , TFLOPS: 96.20086938964876, Tokens per sec: 78608.20680896955, Loss: 2.259094476699829 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20675 , TFLOPS: 95.7888138085673, Tokens per sec: 78271.50558641358, Loss: 2.2258427143096924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20676 , TFLOPS: 97.78162905281835, Tokens per sec: 79899.88622211922, Loss: 2.270801544189453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20677 , TFLOPS: 94.51233826589979, Tokens per sec: 77228.46456109623, Loss: 2.2604544162750244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20678 , TFLOPS: 97.10697208066793, Tokens per sec: 79348.60664295964, Loss: 2.2466535568237305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20679 , TFLOPS: 97.2119350521684, Tokens per sec: 79434.37458895992, Loss: 2.2358195781707764 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20680 , TFLOPS: 97.77690460323988, Tokens per sec: 79896.02575275052, Loss: 2.247196912765503 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20681 , TFLOPS: 97.31800507877202, Tokens per sec: 79521.04713818314, Loss: 2.249040126800537 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20682 , TFLOPS: 97.36115838945321, Tokens per sec: 79556.30881920579, Loss: 2.2662997245788574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20683 , TFLOPS: 97.2547484055533, Tokens per sec: 79469.3584821245, Loss: 2.2587504386901855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20684 , TFLOPS: 97.81618491974218, Tokens per sec: 79928.12271052986, Loss: 2.241427421569824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20685 , TFLOPS: 96.21178961249856, Tokens per sec: 78617.13000417166, Loss: 2.2385342121124268 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20686 , TFLOPS: 97.97590954296815, Tokens per sec: 80058.63781184537, Loss: 2.255005121231079 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20687 , TFLOPS: 96.50406358296162, Tokens per sec: 78855.95458923836, Loss: 2.2288811206817627 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20688 , TFLOPS: 97.30305518394862, Tokens per sec: 79508.83119427842, Loss: 2.2605695724487305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20689 , TFLOPS: 97.33142613667381, Tokens per sec: 79532.01383008347, Loss: 2.27350115776062 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20690 , TFLOPS: 98.46805812923846, Tokens per sec: 80460.78509071852, Loss: 2.2704484462738037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20691 , TFLOPS: 95.52794163728846, Tokens per sec: 78058.34021981452, Loss: 2.2507376670837402 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20692 , TFLOPS: 97.33698853792788, Tokens per sec: 79536.55901133711, Loss: 2.250164747238159 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20693 , TFLOPS: 96.74921551555799, Tokens per sec: 79056.27454413503, Loss: 2.25740122795105 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20694 , TFLOPS: 96.70874841735365, Tokens per sec: 79023.20783647647, Loss: 2.258100748062134 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20695 , TFLOPS: 96.61218709801997, Tokens per sec: 78944.30509673942, Loss: 2.2629430294036865 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20696 , TFLOPS: 98.39149815889198, Tokens per sec: 80398.22596811946, Loss: 2.241502285003662 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20697 , TFLOPS: 97.20998205705699, Tokens per sec: 79432.77874637983, Loss: 2.2696938514709473 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20698 , TFLOPS: 96.42646283199826, Tokens per sec: 78792.54501800516, Loss: 2.2363498210906982 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20699 , TFLOPS: 97.33804232013395, Tokens per sec: 79537.42008390445, Loss: 2.2566285133361816 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20700 , TFLOPS: 98.00677023366914, Tokens per sec: 80083.85487664201, Loss: 2.252748966217041 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20701 , TFLOPS: 97.22792295204275, Tokens per sec: 79447.43871351241, Loss: 2.2565691471099854 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20702 , TFLOPS: 95.43172021977426, Tokens per sec: 77979.71522260402, Loss: 2.2768146991729736 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20703 , TFLOPS: 97.74397161323985, Tokens per sec: 79869.1153588509, Loss: 2.243281364440918 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20704 , TFLOPS: 96.52232754084199, Tokens per sec: 78870.87853937852, Loss: 2.247746467590332 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20705 , TFLOPS: 97.6321126123034, Tokens per sec: 79777.71248967889, Loss: 2.2289371490478516 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20706 , TFLOPS: 96.19030406639736, Tokens per sec: 78599.57361136521, Loss: 2.2731378078460693 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20707 , TFLOPS: 98.36738304535658, Tokens per sec: 80378.52088807142, Loss: 2.2645986080169678 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20708 , TFLOPS: 94.82824473577826, Tokens per sec: 77486.59986978992, Loss: 2.264436721801758 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20709 , TFLOPS: 97.69870197235063, Tokens per sec: 79832.12436993611, Loss: 2.233078718185425 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20710 , TFLOPS: 95.34272334783425, Tokens per sec: 77906.99358755855, Loss: 2.2400383949279785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20711 , TFLOPS: 97.77282558299834, Tokens per sec: 79892.69268030784, Loss: 2.280348300933838 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20712 , TFLOPS: 96.19823444626749, Tokens per sec: 78606.05372890322, Loss: 2.2604005336761475 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20713 , TFLOPS: 95.7785462696557, Tokens per sec: 78263.11571605894, Loss: 2.236215591430664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20714 , TFLOPS: 97.87667218843141, Tokens per sec: 79977.54841485665, Loss: 2.2777087688446045 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20715 , TFLOPS: 94.44971080129466, Tokens per sec: 77177.29004759301, Loss: 2.247001886367798 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20716 , TFLOPS: 97.13605898293169, Tokens per sec: 79372.37429956283, Loss: 2.273440361022949 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20717 , TFLOPS: 97.39689367519776, Tokens per sec: 79585.50904109575, Loss: 2.2311041355133057 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20718 , TFLOPS: 97.22928797206625, Tokens per sec: 79448.55410651235, Loss: 2.247333526611328 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20719 , TFLOPS: 97.36503975092373, Tokens per sec: 79559.48037957848, Loss: 2.2541356086730957 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20720 , TFLOPS: 96.65231942562255, Tokens per sec: 78977.09825472151, Loss: 2.2309162616729736 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20721 , TFLOPS: 97.24606660578934, Tokens per sec: 79462.26436004795, Loss: 2.262519598007202 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20722 , TFLOPS: 97.886023569719, Tokens per sec: 79985.1896692327, Loss: 2.2458407878875732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20723 , TFLOPS: 96.205588339317, Tokens per sec: 78612.06278422003, Loss: 2.27673602104187 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20724 , TFLOPS: 97.86913850497932, Tokens per sec: 79971.39244817368, Loss: 2.2332732677459717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20725 , TFLOPS: 97.21380977273337, Tokens per sec: 79435.90647139316, Loss: 2.2670540809631348 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20726 , TFLOPS: 97.08615305195516, Tokens per sec: 79331.59487866896, Loss: 2.255584955215454 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20727 , TFLOPS: 96.63661050112825, Tokens per sec: 78964.26208813353, Loss: 2.264874219894409 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20728 , TFLOPS: 98.3727097653199, Tokens per sec: 80382.87348807542, Loss: 2.2309587001800537 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20729 , TFLOPS: 95.52169824269218, Tokens per sec: 78053.23858137062, Loss: 2.2459716796875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20730 , TFLOPS: 96.7589916480467, Tokens per sec: 79064.26287365159, Loss: 2.2591683864593506 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20731 , TFLOPS: 96.84105639158058, Tokens per sec: 79131.32008812754, Loss: 2.2535438537597656 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20732 , TFLOPS: 97.24745971217304, Tokens per sec: 79463.40270313581, Loss: 2.2656657695770264 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20733 , TFLOPS: 96.6116287733939, Tokens per sec: 78943.84887531491, Loss: 2.2512197494506836 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20734 , TFLOPS: 97.84670848270443, Tokens per sec: 79953.06429957272, Loss: 2.2559125423431396 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20735 , TFLOPS: 96.59061322545739, Tokens per sec: 78926.67653010765, Loss: 2.2445201873779297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20736 , TFLOPS: 97.2774267711761, Tokens per sec: 79487.88955846796, Loss: 2.2524685859680176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20737 , TFLOPS: 97.32829861019391, Tokens per sec: 79529.45824768704, Loss: 2.257659912109375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20738 , TFLOPS: 97.34781458177238, Tokens per sec: 79545.40525044962, Loss: 2.266569137573242 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20739 , TFLOPS: 97.36803857536688, Tokens per sec: 79561.93079622758, Loss: 2.232978582382202 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20740 , TFLOPS: 95.37046033096189, Tokens per sec: 77929.65819048561, Loss: 2.250105857849121 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20741 , TFLOPS: 97.77621463470317, Tokens per sec: 79895.46196169779, Loss: 2.256901502609253 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20742 , TFLOPS: 96.57657406868492, Tokens per sec: 78915.20477370883, Loss: 2.2429375648498535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20743 , TFLOPS: 97.83335686539033, Tokens per sec: 79942.15434936415, Loss: 2.2490158081054688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20744 , TFLOPS: 96.01208868391642, Tokens per sec: 78453.94923466796, Loss: 2.257075786590576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20745 , TFLOPS: 97.30592492045629, Tokens per sec: 79511.17612985242, Loss: 2.2319650650024414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20746 , TFLOPS: 96.02242166791892, Tokens per sec: 78462.39258189106, Loss: 2.2551426887512207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20747 , TFLOPS: 96.68028124164633, Tokens per sec: 78999.94657439573, Loss: 2.2608203887939453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20748 , TFLOPS: 96.62311042104079, Tokens per sec: 78953.2308251711, Loss: 2.2401559352874756 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20749 , TFLOPS: 97.87005491523384, Tokens per sec: 79972.14127058313, Loss: 2.2451744079589844 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20750 , TFLOPS: 95.59220768185433, Tokens per sec: 78110.85365918446, Loss: 2.24261736869812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20751 , TFLOPS: 97.3338410411118, Tokens per sec: 79533.98711066491, Loss: 2.269752264022827 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20752 , TFLOPS: 96.19534518437445, Tokens per sec: 78603.6928386342, Loss: 2.248307228088379 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20753 , TFLOPS: 96.4194260116806, Tokens per sec: 78786.79505097972, Loss: 2.2402896881103516 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20754 , TFLOPS: 97.15940636907499, Tokens per sec: 79391.45204979571, Loss: 2.252460241317749 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20755 , TFLOPS: 96.74932869447161, Tokens per sec: 79056.36702553883, Loss: 2.247631549835205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20756 , TFLOPS: 97.14689095884867, Tokens per sec: 79381.22538592447, Loss: 2.2510111331939697 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20757 , TFLOPS: 97.75926044576059, Tokens per sec: 79881.60825747317, Loss: 2.2447080612182617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20758 , TFLOPS: 97.25084410667468, Tokens per sec: 79466.16817900518, Loss: 2.2360167503356934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20759 , TFLOPS: 96.61264598047995, Tokens per sec: 78944.68006141017, Loss: 2.239701271057129 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20760 , TFLOPS: 97.74375465004917, Tokens per sec: 79868.93807264268, Loss: 2.2572338581085205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20761 , TFLOPS: 96.23924334776457, Tokens per sec: 78639.56315797953, Loss: 2.255995035171509 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20762 , TFLOPS: 96.72789830367113, Tokens per sec: 79038.85570154841, Loss: 2.2649130821228027 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20763 , TFLOPS: 96.73539166657939, Tokens per sec: 79044.97872127708, Loss: 2.2355663776397705 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20764 , TFLOPS: 97.79638696917795, Tokens per sec: 79911.94529547922, Loss: 2.2601332664489746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20765 , TFLOPS: 95.55761396058733, Tokens per sec: 78082.58623901558, Loss: 2.269503116607666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20766 , TFLOPS: 98.33976491011268, Tokens per sec: 80355.95339880958, Loss: 2.2642667293548584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20767 , TFLOPS: 96.66531277982276, Tokens per sec: 78987.7154589172, Loss: 2.2646937370300293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20768 , TFLOPS: 96.6648720078945, Tokens per sec: 78987.35529282813, Loss: 2.2475268840789795 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20769 , TFLOPS: 96.31851065985926, Tokens per sec: 78704.33451921424, Loss: 2.261197566986084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20770 , TFLOPS: 95.70787175507333, Tokens per sec: 78205.36575086982, Loss: 2.2530689239501953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20771 , TFLOPS: 98.42755085750358, Tokens per sec: 80427.685556234, Loss: 2.2624495029449463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20772 , TFLOPS: 96.12293942619415, Tokens per sec: 78544.52822973506, Loss: 2.2630367279052734 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20773 , TFLOPS: 97.84232230433994, Tokens per sec: 79949.48024032082, Loss: 2.273390293121338 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20774 , TFLOPS: 97.28453954620947, Tokens per sec: 79493.70159004684, Loss: 2.247772455215454 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20775 , TFLOPS: 97.12056448707027, Tokens per sec: 79359.71334813065, Loss: 2.2300403118133545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20776 , TFLOPS: 97.76574702612808, Tokens per sec: 79886.90860927089, Loss: 2.2400360107421875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20777 , TFLOPS: 96.64616554068253, Tokens per sec: 78972.06975692193, Loss: 2.239093065261841 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20778 , TFLOPS: 96.0569906081012, Tokens per sec: 78490.6397527977, Loss: 2.243983030319214 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20779 , TFLOPS: 97.7829845405903, Tokens per sec: 79900.99382607106, Loss: 2.261276960372925 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20780 , TFLOPS: 96.6337087641847, Tokens per sec: 78961.8910041796, Loss: 2.270420551300049 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20781 , TFLOPS: 97.04505613823736, Tokens per sec: 79298.01353254146, Loss: 2.254558801651001 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20782 , TFLOPS: 97.17204126223213, Tokens per sec: 79401.77634624549, Loss: 2.260375499725342 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20783 , TFLOPS: 97.37194287748373, Tokens per sec: 79565.12110199295, Loss: 2.2578299045562744 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20784 , TFLOPS: 96.54529757062357, Tokens per sec: 78889.64794201421, Loss: 2.2441723346710205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20785 , TFLOPS: 96.67887149430679, Tokens per sec: 78998.79463355447, Loss: 2.2406251430511475 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20786 , TFLOPS: 97.153268091776, Tokens per sec: 79386.4363054014, Loss: 2.2386510372161865 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20787 , TFLOPS: 97.28428485974516, Tokens per sec: 79493.49347918064, Loss: 2.26213002204895 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20788 , TFLOPS: 95.91806724269925, Tokens per sec: 78377.12189472218, Loss: 2.239792823791504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20789 , TFLOPS: 97.92374293744105, Tokens per sec: 80016.01113558128, Loss: 2.2542884349823 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20790 , TFLOPS: 96.85362901234568, Tokens per sec: 79141.59350020277, Loss: 2.238994598388672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20791 , TFLOPS: 96.26316343993139, Tokens per sec: 78659.10888104698, Loss: 2.257373094558716 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20792 , TFLOPS: 97.77119643493339, Tokens per sec: 79891.36146148595, Loss: 2.2519261837005615 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20793 , TFLOPS: 95.32974322283546, Tokens per sec: 77896.38719328388, Loss: 2.2441818714141846 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20794 , TFLOPS: 97.75158405406351, Tokens per sec: 79875.33568020958, Loss: 2.249192476272583 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20795 , TFLOPS: 97.79266673013473, Tokens per sec: 79908.90539238954, Loss: 2.232295513153076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20796 , TFLOPS: 97.04399631452246, Tokens per sec: 79297.14752330173, Loss: 2.250239133834839 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20797 , TFLOPS: 96.5659685192328, Tokens per sec: 78906.53870624036, Loss: 2.2532670497894287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20798 , TFLOPS: 97.79160119340126, Tokens per sec: 79908.03471489527, Loss: 2.2563533782958984 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20799 , TFLOPS: 95.4729367983754, Tokens per sec: 78013.39434998814, Loss: 2.2668957710266113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20800 , TFLOPS: 97.25796753995233, Tokens per sec: 79471.98891970984, Loss: 2.2425925731658936 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020800 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020800/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020800/model.pt +[rank0]:[2024-08-31 04:21:27,868] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007675549015402794, 'preprocessing_with_comm': 0.001611721992958337, 'state_converting': 2.5999442700122017, : 2.6108370869769715}) +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020800/optimizer.pt +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020800/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020800/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020800/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020800/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020800/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0020800, took 14.72s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0018800 + eval ppl=7.840056896209717, eval loss=2.059246063232422 +------------------------------------------------------------------ +iteration: 20801 , TFLOPS: 97.19183990379562, Tokens per sec: 79417.9543259294, Loss: 2.2485930919647217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20802 , TFLOPS: 94.36255907529471, Tokens per sec: 77106.07612879344, Loss: 2.2484536170959473 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20803 , TFLOPS: 96.8360433029267, Tokens per sec: 79127.22376434009, Loss: 2.2682597637176514 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20804 , TFLOPS: 97.82034484814274, Tokens per sec: 79931.52189511203, Loss: 2.263887643814087 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20805 , TFLOPS: 95.66725124063888, Tokens per sec: 78172.17368286038, Loss: 2.2556233406066895 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20806 , TFLOPS: 97.7762087759669, Tokens per sec: 79895.45717437357, Loss: 2.2377769947052 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20807 , TFLOPS: 97.78601320350985, Tokens per sec: 79903.46862450734, Loss: 2.244354248046875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20808 , TFLOPS: 97.10407179610668, Tokens per sec: 79346.23674578467, Loss: 2.260932683944702 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20809 , TFLOPS: 97.73170330292318, Tokens per sec: 79859.09060667676, Loss: 2.23068904876709 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20810 , TFLOPS: 96.58598102690522, Tokens per sec: 78922.89144142727, Loss: 2.2413594722747803 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20811 , TFLOPS: 97.8174151558239, Tokens per sec: 79929.12796810131, Loss: 2.2744860649108887 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20812 , TFLOPS: 97.268501314923, Tokens per sec: 79480.59633840185, Loss: 2.268073081970215 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20813 , TFLOPS: 95.61049881541327, Tokens per sec: 78125.79981527114, Loss: 2.2681920528411865 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20814 , TFLOPS: 96.43140797493112, Tokens per sec: 78796.58582159488, Loss: 2.2521610260009766 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20815 , TFLOPS: 96.95005206932706, Tokens per sec: 79220.38326221255, Loss: 2.263242721557617 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20816 , TFLOPS: 96.45938293612429, Tokens per sec: 78819.44488252541, Loss: 2.276911735534668 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20817 , TFLOPS: 97.21550607626159, Tokens per sec: 79437.29256467283, Loss: 2.259964942932129 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20818 , TFLOPS: 96.71995577890212, Tokens per sec: 79032.36566010088, Loss: 2.25045108795166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20819 , TFLOPS: 96.05635102997944, Tokens per sec: 78490.1171370502, Loss: 2.2542662620544434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20820 , TFLOPS: 96.07075936781376, Tokens per sec: 78501.89056079825, Loss: 2.2770884037017822 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20821 , TFLOPS: 96.77057725356165, Tokens per sec: 79073.72977015786, Loss: 2.260183334350586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20822 , TFLOPS: 98.38771067563862, Tokens per sec: 80395.13111805463, Loss: 2.2555899620056152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20823 , TFLOPS: 96.17938949183744, Tokens per sec: 78590.65503152616, Loss: 2.262014627456665 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20824 , TFLOPS: 96.75617191646973, Tokens per sec: 79061.9587983937, Loss: 2.2575337886810303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20825 , TFLOPS: 98.48405995287895, Tokens per sec: 80473.86059273875, Loss: 2.23372483253479 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20826 , TFLOPS: 97.34822955535469, Tokens per sec: 79545.74433604609, Loss: 2.250854253768921 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20827 , TFLOPS: 96.7003760190288, Tokens per sec: 79016.36653428053, Loss: 2.252789258956909 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20828 , TFLOPS: 98.33856836400236, Tokens per sec: 80354.97567018101, Loss: 2.240267753601074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20829 , TFLOPS: 96.51526115104991, Tokens per sec: 78865.10441037435, Loss: 2.2766451835632324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20830 , TFLOPS: 96.6739171494949, Tokens per sec: 78994.74631087249, Loss: 2.2522904872894287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20831 , TFLOPS: 97.22916802692664, Tokens per sec: 79448.45609625112, Loss: 2.2661914825439453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20832 , TFLOPS: 97.17224348627236, Tokens per sec: 79401.94158871447, Loss: 2.2540440559387207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20833 , TFLOPS: 97.19323859924685, Tokens per sec: 79419.09723598821, Loss: 2.2519257068634033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20834 , TFLOPS: 96.19410486890592, Tokens per sec: 78602.6793449363, Loss: 2.263219118118286 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20835 , TFLOPS: 96.56589287392111, Tokens per sec: 78906.4768945089, Loss: 2.247488021850586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20836 , TFLOPS: 97.31927604464092, Tokens per sec: 79522.08567711219, Loss: 2.258464813232422 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20837 , TFLOPS: 96.08343602033659, Tokens per sec: 78512.24897990072, Loss: 2.2394142150878906 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20838 , TFLOPS: 96.7311208307541, Tokens per sec: 79041.48891138313, Loss: 2.2546794414520264 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20839 , TFLOPS: 97.7140289988112, Tokens per sec: 79844.6484778098, Loss: 2.248412847518921 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20840 , TFLOPS: 94.47199610377434, Tokens per sec: 77195.49994192389, Loss: 2.229992628097534 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20841 , TFLOPS: 96.90475185856748, Tokens per sec: 79183.36729386973, Loss: 2.2544729709625244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20842 , TFLOPS: 97.82904702943128, Tokens per sec: 79938.63267145697, Loss: 2.2566237449645996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20843 , TFLOPS: 95.61947267325156, Tokens per sec: 78133.13258551846, Loss: 2.266207695007324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20844 , TFLOPS: 97.77390086782576, Tokens per sec: 79893.57132322062, Loss: 2.230196952819824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20845 , TFLOPS: 97.40468617962848, Tokens per sec: 79591.876497063, Loss: 2.251561403274536 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20846 , TFLOPS: 98.4216756664844, Tokens per sec: 80422.88478641138, Loss: 2.2452569007873535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20847 , TFLOPS: 96.54132344095862, Tokens per sec: 78886.40057836178, Loss: 2.260690212249756 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20848 , TFLOPS: 97.24633749533562, Tokens per sec: 79462.48571086953, Loss: 2.259476900100708 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20849 , TFLOPS: 97.09846073985003, Tokens per sec: 79341.65180727592, Loss: 2.2619855403900146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20850 , TFLOPS: 97.74166122271787, Tokens per sec: 79867.22747928066, Loss: 2.253599166870117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20851 , TFLOPS: 96.11411990789162, Tokens per sec: 78537.32157429545, Loss: 2.24491024017334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20852 , TFLOPS: 96.16811638882446, Tokens per sec: 78581.44348885876, Loss: 2.264646530151367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20853 , TFLOPS: 97.70745181548577, Tokens per sec: 79839.27409200283, Loss: 2.261112689971924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20854 , TFLOPS: 96.1007120463716, Tokens per sec: 78526.36566549838, Loss: 2.2552013397216797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20855 , TFLOPS: 97.78021529447494, Tokens per sec: 79898.73100378345, Loss: 2.2581329345703125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20856 , TFLOPS: 97.7127938098681, Tokens per sec: 79843.6391731278, Loss: 2.24755859375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20857 , TFLOPS: 96.00921210236986, Tokens per sec: 78451.59870583646, Loss: 2.270132303237915 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20858 , TFLOPS: 96.63818567845536, Tokens per sec: 78965.54920607618, Loss: 2.2482142448425293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20859 , TFLOPS: 96.59071713168406, Tokens per sec: 78926.76143456025, Loss: 2.254317045211792 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20860 , TFLOPS: 98.50264316411752, Tokens per sec: 80489.0454130162, Loss: 2.256288766860962 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20861 , TFLOPS: 95.62731604445595, Tokens per sec: 78139.5416060359, Loss: 2.269888401031494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20862 , TFLOPS: 96.20984645731541, Tokens per sec: 78615.54220205023, Loss: 2.24452543258667 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20863 , TFLOPS: 98.39653522980969, Tokens per sec: 80402.34188843204, Loss: 2.2508795261383057 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20864 , TFLOPS: 97.37800429236373, Tokens per sec: 79570.07404012608, Loss: 2.2574520111083984 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20865 , TFLOPS: 96.58798173980999, Tokens per sec: 78924.52627544469, Loss: 2.2380294799804688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20866 , TFLOPS: 98.37427541125722, Tokens per sec: 80384.15281767413, Loss: 2.2579050064086914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20867 , TFLOPS: 97.15580801919289, Tokens per sec: 79388.51174547727, Loss: 2.255645513534546 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20868 , TFLOPS: 96.56673856657254, Tokens per sec: 78907.16793174445, Loss: 2.261145830154419 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20869 , TFLOPS: 96.42540945778543, Tokens per sec: 78791.6842788196, Loss: 2.2564330101013184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20870 , TFLOPS: 97.17805834047431, Tokens per sec: 79406.69305577055, Loss: 2.2597885131835938 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20871 , TFLOPS: 97.19677115242175, Tokens per sec: 79421.98377612344, Loss: 2.2501416206359863 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20872 , TFLOPS: 96.14465217075022, Tokens per sec: 78562.27027224758, Loss: 2.261983633041382 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20873 , TFLOPS: 96.59088234078033, Tokens per sec: 78926.89643116567, Loss: 2.2524361610412598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20874 , TFLOPS: 96.45241440210955, Tokens per sec: 78813.75071399593, Loss: 2.2607150077819824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20875 , TFLOPS: 96.78870419032751, Tokens per sec: 79088.54175681811, Loss: 2.263399362564087 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20876 , TFLOPS: 95.96200916906793, Tokens per sec: 78413.02797392385, Loss: 2.2503774166107178 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20877 , TFLOPS: 97.78302817863509, Tokens per sec: 79901.0294838408, Loss: 2.2569713592529297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20878 , TFLOPS: 94.5892000101141, Tokens per sec: 77291.27027089105, Loss: 2.2575736045837402 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20879 , TFLOPS: 96.71348051505278, Tokens per sec: 79027.07455532147, Loss: 2.243408679962158 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20880 , TFLOPS: 97.88978284402651, Tokens per sec: 79988.2614690417, Loss: 2.2534210681915283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20881 , TFLOPS: 95.57371095438049, Tokens per sec: 78095.73950701745, Loss: 2.2495081424713135 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20882 , TFLOPS: 97.18084700829424, Tokens per sec: 79408.97174803291, Loss: 2.2375707626342773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20883 , TFLOPS: 97.79122817958527, Tokens per sec: 79907.729915704, Loss: 2.248518943786621 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20884 , TFLOPS: 97.14502219780789, Tokens per sec: 79379.69837317185, Loss: 2.245734691619873 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20885 , TFLOPS: 97.87976279940385, Tokens per sec: 79980.07383264162, Loss: 2.230419158935547 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20886 , TFLOPS: 95.99472608725367, Tokens per sec: 78439.76180998182, Loss: 2.2442686557769775 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20887 , TFLOPS: 97.7877293082556, Tokens per sec: 79904.87089787157, Loss: 2.2638728618621826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20888 , TFLOPS: 97.13310738395232, Tokens per sec: 79369.96247205586, Loss: 2.254373073577881 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20889 , TFLOPS: 96.1732805824444, Tokens per sec: 78585.66328440378, Loss: 2.261606216430664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20890 , TFLOPS: 96.80706496151258, Tokens per sec: 79103.54481560174, Loss: 2.2341275215148926 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20891 , TFLOPS: 96.58659383901707, Tokens per sec: 78923.39218597906, Loss: 2.245058298110962 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20892 , TFLOPS: 96.65782168089308, Tokens per sec: 78981.59428914372, Loss: 2.245615005493164 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20893 , TFLOPS: 96.75993286245654, Tokens per sec: 79065.03196417488, Loss: 2.2381703853607178 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20894 , TFLOPS: 97.85077570885514, Tokens per sec: 79956.38773476018, Loss: 2.25921630859375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20895 , TFLOPS: 96.39313484426224, Tokens per sec: 78765.31186128731, Loss: 2.2645723819732666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20896 , TFLOPS: 93.91517348590607, Tokens per sec: 76740.50584697435, Loss: 2.274751663208008 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20897 , TFLOPS: 97.89272145658907, Tokens per sec: 79990.66268501319, Loss: 2.245283603668213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20898 , TFLOPS: 98.42247113595808, Tokens per sec: 80423.53478499551, Loss: 2.2759921550750732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20899 , TFLOPS: 96.74347271876881, Tokens per sec: 79051.58195704584, Loss: 2.250308036804199 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20900 , TFLOPS: 96.33224234013935, Tokens per sec: 78715.555028656, Loss: 2.2472734451293945 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20901 , TFLOPS: 97.83773451956384, Tokens per sec: 79945.73144328016, Loss: 2.2431564331054688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20902 , TFLOPS: 97.32624381337433, Tokens per sec: 79527.7792203106, Loss: 2.2496018409729004 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20903 , TFLOPS: 96.75730340224992, Tokens per sec: 79062.88336455172, Loss: 2.2446060180664062 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20904 , TFLOPS: 97.89822436007675, Tokens per sec: 79995.15924911031, Loss: 2.254253625869751 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20905 , TFLOPS: 97.05464395874016, Tokens per sec: 79305.84798748676, Loss: 2.2558209896087646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20906 , TFLOPS: 96.08314364979196, Tokens per sec: 78512.0100764024, Loss: 2.2702648639678955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20907 , TFLOPS: 97.27762928674746, Tokens per sec: 79488.05503915453, Loss: 2.2531793117523193 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20908 , TFLOPS: 97.19294710792187, Tokens per sec: 79418.85905092291, Loss: 2.26562237739563 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20909 , TFLOPS: 96.63993821964965, Tokens per sec: 78966.98125260045, Loss: 2.261859655380249 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20910 , TFLOPS: 96.7155537477965, Tokens per sec: 79028.76864716611, Loss: 2.2490532398223877 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20911 , TFLOPS: 95.41032625852883, Tokens per sec: 77962.23366614083, Loss: 2.2451159954071045 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20912 , TFLOPS: 97.28992051449876, Tokens per sec: 79498.09851775446, Loss: 2.2401018142700195 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20913 , TFLOPS: 96.61424600258474, Tokens per sec: 78945.98748066022, Loss: 2.2661986351013184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20914 , TFLOPS: 96.89723842225185, Tokens per sec: 79177.22787164318, Loss: 2.2488369941711426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20915 , TFLOPS: 97.84869718180234, Tokens per sec: 79954.68931679928, Loss: 2.2710373401641846 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20916 , TFLOPS: 94.8365727082245, Tokens per sec: 77493.40487045681, Loss: 2.2509891986846924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20917 , TFLOPS: 97.28047631711924, Tokens per sec: 79490.38142096039, Loss: 2.2803444862365723 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20918 , TFLOPS: 97.91914595550476, Tokens per sec: 80012.25482330429, Loss: 2.2500829696655273 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20919 , TFLOPS: 96.09800408392327, Tokens per sec: 78524.1529196727, Loss: 2.2717316150665283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20920 , TFLOPS: 96.66429103569577, Tokens per sec: 78986.880565489, Loss: 2.2735307216644287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20921 , TFLOPS: 97.83427066235349, Tokens per sec: 79942.90103638599, Loss: 2.2384450435638428 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20922 , TFLOPS: 97.79020145892379, Tokens per sec: 79906.89095582136, Loss: 2.242666006088257 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20923 , TFLOPS: 97.17128559475104, Tokens per sec: 79401.15887089408, Loss: 2.2797257900238037 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20924 , TFLOPS: 96.16428752359091, Tokens per sec: 78578.31482451266, Loss: 2.2511889934539795 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20925 , TFLOPS: 97.18406239339227, Tokens per sec: 79411.59912196788, Loss: 2.270772933959961 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20926 , TFLOPS: 97.90221474402703, Tokens per sec: 79998.4198945577, Loss: 2.2652134895324707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20927 , TFLOPS: 96.70507489593821, Tokens per sec: 79020.20610756306, Loss: 2.262766122817993 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20928 , TFLOPS: 95.57906696107875, Tokens per sec: 78100.11603796648, Loss: 2.230973243713379 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20929 , TFLOPS: 97.31854009897452, Tokens per sec: 79521.48431696325, Loss: 2.248995780944824 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20930 , TFLOPS: 96.59856980885021, Tokens per sec: 78933.17805921861, Loss: 2.231659173965454 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20931 , TFLOPS: 96.71551939375246, Tokens per sec: 79028.74057559238, Loss: 2.2405598163604736 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20932 , TFLOPS: 97.76854493296383, Tokens per sec: 79889.1948509709, Loss: 2.226400375366211 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20933 , TFLOPS: 96.00508164242243, Tokens per sec: 78448.22360068592, Loss: 2.2608182430267334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20934 , TFLOPS: 97.14258206575484, Tokens per sec: 79377.70447845673, Loss: 2.27026629447937 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20935 , TFLOPS: 97.89996804395845, Tokens per sec: 79996.58405809644, Loss: 2.2603254318237305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20936 , TFLOPS: 97.24695675414887, Tokens per sec: 79462.99172318698, Loss: 2.2555246353149414 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20937 , TFLOPS: 96.72735876795635, Tokens per sec: 79038.41483302697, Loss: 2.260256767272949 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20938 , TFLOPS: 95.6311300581215, Tokens per sec: 78142.65813478347, Loss: 2.2393832206726074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20939 , TFLOPS: 97.19993328595085, Tokens per sec: 79424.56763683062, Loss: 2.246901750564575 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20940 , TFLOPS: 97.8838740200311, Tokens per sec: 79983.43321684838, Loss: 2.2704176902770996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20941 , TFLOPS: 96.18234864045911, Tokens per sec: 78593.07302804018, Loss: 2.269939661026001 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20942 , TFLOPS: 98.36666732614071, Tokens per sec: 80377.93605547589, Loss: 2.260877847671509 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20943 , TFLOPS: 96.76206865702157, Tokens per sec: 79066.7771768942, Loss: 2.247936964035034 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20944 , TFLOPS: 96.22027723000674, Tokens per sec: 78624.0654549283, Loss: 2.2514398097991943 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20945 , TFLOPS: 97.90749434310933, Tokens per sec: 80002.7339908768, Loss: 2.2509219646453857 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20946 , TFLOPS: 97.08232780384934, Tokens per sec: 79328.46916997104, Loss: 2.25464129447937 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20947 , TFLOPS: 96.68946789037831, Tokens per sec: 79007.45322156, Loss: 2.2559995651245117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20948 , TFLOPS: 96.65588379379167, Tokens per sec: 78980.01079170751, Loss: 2.257297992706299 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20949 , TFLOPS: 96.72104267314656, Tokens per sec: 79033.25378936715, Loss: 2.2145814895629883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20950 , TFLOPS: 97.86377538805074, Tokens per sec: 79967.01010727242, Loss: 2.2696940898895264 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20951 , TFLOPS: 95.61384348385455, Tokens per sec: 78128.53282995393, Loss: 2.2677724361419678 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20952 , TFLOPS: 96.14008790628777, Tokens per sec: 78558.5406942607, Loss: 2.233241319656372 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20953 , TFLOPS: 97.87058042251067, Tokens per sec: 79972.5706761068, Loss: 2.2431371212005615 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20954 , TFLOPS: 94.53824072556361, Tokens per sec: 77249.63012767617, Loss: 2.2571003437042236 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20955 , TFLOPS: 97.89544667906378, Tokens per sec: 79992.88953445073, Loss: 2.2738547325134277 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20956 , TFLOPS: 96.85445297636363, Tokens per sec: 79142.26678241255, Loss: 2.2204999923706055 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20957 , TFLOPS: 96.63334326321863, Tokens per sec: 78961.59234393139, Loss: 2.2227463722229004 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20958 , TFLOPS: 96.60935178678899, Tokens per sec: 78941.98829094518, Loss: 2.270033597946167 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20959 , TFLOPS: 97.85863022445577, Tokens per sec: 79962.80586165068, Loss: 2.2475736141204834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20960 , TFLOPS: 96.54622894885287, Tokens per sec: 78890.40899514117, Loss: 2.2646517753601074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20961 , TFLOPS: 97.39550771592315, Tokens per sec: 79584.3765380947, Loss: 2.2400734424591064 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20962 , TFLOPS: 97.2487470365565, Tokens per sec: 79464.45460902822, Loss: 2.280010223388672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20963 , TFLOPS: 96.1132171575806, Tokens per sec: 78536.58391377747, Loss: 2.2705206871032715 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20964 , TFLOPS: 98.40980196686648, Tokens per sec: 80413.18248079735, Loss: 2.2757208347320557 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20965 , TFLOPS: 94.85927550146054, Tokens per sec: 77511.95590723188, Loss: 2.2288010120391846 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20966 , TFLOPS: 97.89798203532743, Tokens per sec: 79994.96123931964, Loss: 2.253192901611328 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20967 , TFLOPS: 96.53389201647303, Tokens per sec: 78880.32817011271, Loss: 2.2483646869659424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20968 , TFLOPS: 97.27483572160473, Tokens per sec: 79485.77234516364, Loss: 2.2366042137145996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20969 , TFLOPS: 96.13435054483193, Tokens per sec: 78553.8525485223, Loss: 2.245871067047119 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20970 , TFLOPS: 97.75251072627358, Tokens per sec: 79876.0928879271, Loss: 2.24106764793396 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20971 , TFLOPS: 96.54380116955082, Tokens per sec: 78888.42519417687, Loss: 2.2537312507629395 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20972 , TFLOPS: 97.23897518469391, Tokens per sec: 79456.46977732146, Loss: 2.2501792907714844 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20973 , TFLOPS: 96.8212625401074, Tokens per sec: 79115.14602254974, Loss: 2.2454349994659424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20974 , TFLOPS: 97.96090641405956, Tokens per sec: 80046.37836899929, Loss: 2.266003131866455 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20975 , TFLOPS: 97.25111075227123, Tokens per sec: 79466.3860619862, Loss: 2.232069969177246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20976 , TFLOPS: 95.62241647667723, Tokens per sec: 78135.53804307782, Loss: 2.242450714111328 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20977 , TFLOPS: 98.42250865587296, Tokens per sec: 80423.5654434838, Loss: 2.2610442638397217 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20978 , TFLOPS: 97.2402938264088, Tokens per sec: 79457.54727341156, Loss: 2.2795193195343018 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20979 , TFLOPS: 97.21551118533573, Tokens per sec: 79437.29673942883, Loss: 2.2550430297851562 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20980 , TFLOPS: 98.38795987996052, Tokens per sec: 80395.33474932116, Loss: 2.2593460083007812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20981 , TFLOPS: 96.57388242094922, Tokens per sec: 78913.0053590548, Loss: 2.246809959411621 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20982 , TFLOPS: 97.15234803181853, Tokens per sec: 79385.68450072569, Loss: 2.229342460632324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20983 , TFLOPS: 97.2788246306823, Tokens per sec: 79489.03178545457, Loss: 2.2667627334594727 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20984 , TFLOPS: 96.61782911522414, Tokens per sec: 78948.9153342354, Loss: 2.2554657459259033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20985 , TFLOPS: 97.7351331356956, Tokens per sec: 79861.89321132685, Loss: 2.2641706466674805 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20986 , TFLOPS: 96.16456885733402, Tokens per sec: 78578.54470955633, Loss: 2.2264623641967773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20987 , TFLOPS: 97.11528077597737, Tokens per sec: 79355.39589178182, Loss: 2.2377099990844727 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20988 , TFLOPS: 96.74348528611915, Tokens per sec: 79051.59222615132, Loss: 2.269852876663208 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20989 , TFLOPS: 96.78859999779057, Tokens per sec: 79088.45661841403, Loss: 2.236845016479492 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20990 , TFLOPS: 97.2880596044413, Tokens per sec: 79496.57792024243, Loss: 2.235848903656006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20991 , TFLOPS: 98.37290915836508, Tokens per sec: 80383.03641726548, Loss: 2.2446837425231934 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20992 , TFLOPS: 95.03089767920635, Tokens per sec: 77652.19280661587, Loss: 2.259791374206543 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20993 , TFLOPS: 97.7971578169878, Tokens per sec: 79912.57517506805, Loss: 2.249626398086548 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20994 , TFLOPS: 98.3508071053566, Tokens per sec: 80364.97626079478, Loss: 2.255645990371704 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20995 , TFLOPS: 96.67943155076732, Tokens per sec: 78999.2522701055, Loss: 2.2666406631469727 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20996 , TFLOPS: 97.19704949266048, Tokens per sec: 79422.21121509762, Loss: 2.2565078735351562 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20997 , TFLOPS: 96.70774876275212, Tokens per sec: 79022.39099296815, Loss: 2.259124755859375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20998 , TFLOPS: 98.37630437121801, Tokens per sec: 80385.81073308816, Loss: 2.230225086212158 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 20999 , TFLOPS: 96.58115539714483, Tokens per sec: 78918.9482951273, Loss: 2.2610833644866943 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21000 , TFLOPS: 97.14750333851542, Tokens per sec: 79381.72577711422, Loss: 2.238650321960449 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021000 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021000/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021000/model.pt +[rank0]:[2024-08-31 05:17:01,716] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007679673988604918, 'preprocessing_with_comm': 0.0014638459833804518, 'state_converting': 2.5966296330152545, : 2.607396870997036}) +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021000/optimizer.pt +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021000/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021000/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021000/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021000/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021000/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021000, took 14.81s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019000 + eval ppl=7.714747428894043, eval loss=2.0431337356567383 +------------------------------------------------------------------ +iteration: 21001 , TFLOPS: 95.1828690298512, Tokens per sec: 77776.37250931837, Loss: 2.235534191131592 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21002 , TFLOPS: 96.15046914300609, Tokens per sec: 78567.02347002005, Loss: 2.257408618927002 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21003 , TFLOPS: 97.14695686042727, Tokens per sec: 79381.27923580079, Loss: 2.238769292831421 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21004 , TFLOPS: 96.73276777093493, Tokens per sec: 79042.8346686009, Loss: 2.2395882606506348 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21005 , TFLOPS: 97.43804456301197, Tokens per sec: 79619.13449084683, Loss: 2.2276368141174316 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21006 , TFLOPS: 97.27730746160451, Tokens per sec: 79487.7920675457, Loss: 2.2477128505706787 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21007 , TFLOPS: 97.2097378953638, Tokens per sec: 79432.57923557505, Loss: 2.262916088104248 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21008 , TFLOPS: 97.8261803879451, Tokens per sec: 79936.2902649047, Loss: 2.26566219329834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21009 , TFLOPS: 97.22440769128774, Tokens per sec: 79444.56630345865, Loss: 2.250742197036743 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21010 , TFLOPS: 97.7608412205829, Tokens per sec: 79882.89994927325, Loss: 2.261678695678711 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21011 , TFLOPS: 97.12201849288171, Tokens per sec: 79360.90145370862, Loss: 2.2602977752685547 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21012 , TFLOPS: 97.06422897353372, Tokens per sec: 79313.68014981481, Loss: 2.2406113147735596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21013 , TFLOPS: 96.7112332738198, Tokens per sec: 79025.23827666085, Loss: 2.2577414512634277 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21014 , TFLOPS: 97.54895737413793, Tokens per sec: 79709.76420396763, Loss: 2.270460605621338 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21015 , TFLOPS: 96.57603646931335, Tokens per sec: 78914.76548742327, Loss: 2.254427909851074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21016 , TFLOPS: 96.70301067383369, Tokens per sec: 79018.51937854364, Loss: 2.256679058074951 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21017 , TFLOPS: 97.24336180507831, Tokens per sec: 79460.05419775921, Loss: 2.2589964866638184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21018 , TFLOPS: 96.93925964032277, Tokens per sec: 79211.56449065132, Loss: 2.238635778427124 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21019 , TFLOPS: 97.86198038513591, Tokens per sec: 79965.54336418322, Loss: 2.2483396530151367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21020 , TFLOPS: 96.807879955888, Tokens per sec: 79104.21076848572, Loss: 2.2553062438964844 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21021 , TFLOPS: 97.7731231409657, Tokens per sec: 79892.93582258277, Loss: 2.2498857975006104 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21022 , TFLOPS: 96.68635569064625, Tokens per sec: 79004.9101630438, Loss: 2.257925510406494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21023 , TFLOPS: 97.11725892754106, Tokens per sec: 79357.01229034667, Loss: 2.2624928951263428 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21024 , TFLOPS: 97.87172537361646, Tokens per sec: 79973.506245129, Loss: 2.2488062381744385 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21025 , TFLOPS: 97.75485411957402, Tokens per sec: 79878.0077349172, Loss: 2.2511723041534424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21026 , TFLOPS: 95.11118804632484, Tokens per sec: 77717.80013244631, Loss: 2.2651195526123047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21027 , TFLOPS: 97.45663441890397, Tokens per sec: 79634.32474064166, Loss: 2.2272796630859375 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21028 , TFLOPS: 98.3606453811522, Tokens per sec: 80373.01536920712, Loss: 2.2224280834198 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21029 , TFLOPS: 95.47214216156515, Tokens per sec: 78012.74503179471, Loss: 2.266141414642334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21030 , TFLOPS: 97.06940758490164, Tokens per sec: 79317.91172647284, Loss: 2.256891965866089 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21031 , TFLOPS: 96.00044360319518, Tokens per sec: 78444.43373943957, Loss: 2.254913091659546 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21032 , TFLOPS: 97.18260738798979, Tokens per sec: 79410.41019959835, Loss: 2.2656478881835938 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21033 , TFLOPS: 95.97213960514858, Tokens per sec: 78421.30581402597, Loss: 2.2259433269500732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21034 , TFLOPS: 97.77238310751271, Tokens per sec: 79892.33112219819, Loss: 2.266185760498047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21035 , TFLOPS: 95.46186941710313, Tokens per sec: 78004.35090785082, Loss: 2.253262996673584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21036 , TFLOPS: 96.14119848896715, Tokens per sec: 78559.44817995696, Loss: 2.235387086868286 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21037 , TFLOPS: 96.18932973779785, Tokens per sec: 78598.7774623851, Loss: 2.226486921310425 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21038 , TFLOPS: 96.50350686722781, Tokens per sec: 78855.49968248112, Loss: 2.245171546936035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21039 , TFLOPS: 95.467292681185, Tokens per sec: 78008.78239653936, Loss: 2.240311622619629 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21040 , TFLOPS: 96.38844378082226, Tokens per sec: 78761.47867259181, Loss: 2.2229042053222656 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21041 , TFLOPS: 97.09071273229988, Tokens per sec: 79335.32071085523, Loss: 2.2408673763275146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21042 , TFLOPS: 96.05353701525125, Tokens per sec: 78487.81773317662, Loss: 2.2510056495666504 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21043 , TFLOPS: 96.90742853213735, Tokens per sec: 79185.55447274738, Loss: 2.2540953159332275 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21044 , TFLOPS: 96.66164061961547, Tokens per sec: 78984.714842282, Loss: 2.274573802947998 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21045 , TFLOPS: 97.18857302358984, Tokens per sec: 79415.28487401627, Loss: 2.2670254707336426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21046 , TFLOPS: 97.8666014271989, Tokens per sec: 79969.3193366091, Loss: 2.242863416671753 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21047 , TFLOPS: 97.28566798525375, Tokens per sec: 79494.62366663852, Loss: 2.2232837677001953 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21048 , TFLOPS: 97.1609168522753, Tokens per sec: 79392.68630450199, Loss: 2.2443623542785645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21049 , TFLOPS: 97.23486804116554, Tokens per sec: 79453.11372461716, Loss: 2.2574024200439453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21050 , TFLOPS: 97.64816864653456, Tokens per sec: 79790.83228856824, Loss: 2.2617218494415283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21051 , TFLOPS: 97.2861773268219, Tokens per sec: 79495.03986274547, Loss: 2.255704879760742 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21052 , TFLOPS: 97.86701980932176, Tokens per sec: 79969.6612074118, Loss: 2.2657930850982666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21053 , TFLOPS: 95.3755244629907, Tokens per sec: 77933.79622312871, Loss: 2.243605136871338 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21054 , TFLOPS: 97.34490100875294, Tokens per sec: 79543.0244949334, Loss: 2.258305072784424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21055 , TFLOPS: 97.24686428296401, Tokens per sec: 79462.91616260142, Loss: 2.259085178375244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21056 , TFLOPS: 97.7320032580132, Tokens per sec: 79859.33570770234, Loss: 2.248906135559082 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21057 , TFLOPS: 97.82710252050771, Tokens per sec: 79937.04376315941, Loss: 2.2518835067749023 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21058 , TFLOPS: 95.58061447377818, Tokens per sec: 78101.3805504297, Loss: 2.260148286819458 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21059 , TFLOPS: 97.7616682105368, Tokens per sec: 79883.57570405319, Loss: 2.2498273849487305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21060 , TFLOPS: 97.2694400343215, Tokens per sec: 79481.36339018717, Loss: 2.2518723011016846 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21061 , TFLOPS: 97.21979466297655, Tokens per sec: 79440.79687927566, Loss: 2.2418711185455322 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21062 , TFLOPS: 97.14072993791714, Tokens per sec: 79376.19105712253, Loss: 2.2510812282562256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21063 , TFLOPS: 97.75091681294015, Tokens per sec: 79874.79046031137, Loss: 2.2646870613098145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21064 , TFLOPS: 95.40604800267602, Tokens per sec: 77958.73779315139, Loss: 2.2428479194641113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21065 , TFLOPS: 97.624138081673, Tokens per sec: 79771.19629541809, Loss: 2.252593517303467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21066 , TFLOPS: 98.42354930032529, Tokens per sec: 80424.41578085438, Loss: 2.251220226287842 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21067 , TFLOPS: 95.53519672762306, Tokens per sec: 78064.26854088956, Loss: 2.241269588470459 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21068 , TFLOPS: 97.13797532943394, Tokens per sec: 79373.94019562096, Loss: 2.2519454956054688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21069 , TFLOPS: 95.6175456597548, Tokens per sec: 78131.5579731845, Loss: 2.2533488273620605 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21070 , TFLOPS: 97.11763383141843, Tokens per sec: 79357.31863395574, Loss: 2.2354447841644287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21071 , TFLOPS: 96.12840079944439, Tokens per sec: 78548.99085840608, Loss: 2.2503502368927 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21072 , TFLOPS: 97.10961244611978, Tokens per sec: 79350.76415353939, Loss: 2.2616817951202393 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21073 , TFLOPS: 96.10774281111216, Tokens per sec: 78532.11068435588, Loss: 2.256664752960205 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21074 , TFLOPS: 95.08662523106557, Tokens per sec: 77697.72922379484, Loss: 2.249155044555664 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21075 , TFLOPS: 97.37188862189642, Tokens per sec: 79565.0767683559, Loss: 2.2424166202545166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21076 , TFLOPS: 95.57376111968625, Tokens per sec: 78095.78049838018, Loss: 2.267752170562744 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21077 , TFLOPS: 96.34285750000181, Tokens per sec: 78724.22894903815, Loss: 2.232159376144409 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21078 , TFLOPS: 95.74120893252385, Tokens per sec: 78232.60641673995, Loss: 2.258432149887085 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21079 , TFLOPS: 97.73692785528804, Tokens per sec: 79863.35972290603, Loss: 2.2458534240722656 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21080 , TFLOPS: 96.69463971141114, Tokens per sec: 79011.67924966045, Loss: 2.2534217834472656 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21081 , TFLOPS: 97.86848356943514, Tokens per sec: 79970.85728348097, Loss: 2.2739810943603516 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21082 , TFLOPS: 97.77894437807338, Tokens per sec: 79897.69250527545, Loss: 2.2676331996917725 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21083 , TFLOPS: 97.27470374231392, Tokens per sec: 79485.66450148767, Loss: 2.242842197418213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21084 , TFLOPS: 98.33289283185009, Tokens per sec: 80350.33804675819, Loss: 2.256453275680542 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21085 , TFLOPS: 96.63977727507488, Tokens per sec: 78966.84974064532, Loss: 2.2393391132354736 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21086 , TFLOPS: 97.22405673561136, Tokens per sec: 79444.27952854107, Loss: 2.2430801391601562 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21087 , TFLOPS: 97.16065276484898, Tokens per sec: 79392.47051186771, Loss: 2.2595348358154297 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21088 , TFLOPS: 97.06088333611075, Tokens per sec: 79310.94634335215, Loss: 2.236457586288452 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21089 , TFLOPS: 97.15229007631241, Tokens per sec: 79385.63714378973, Loss: 2.2567992210388184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21090 , TFLOPS: 97.29838358769052, Tokens per sec: 79505.01391271834, Loss: 2.2614476680755615 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21091 , TFLOPS: 96.5700603711225, Tokens per sec: 78909.88226375307, Loss: 2.248544454574585 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21092 , TFLOPS: 97.25214033884652, Tokens per sec: 79467.22736368077, Loss: 2.2187516689300537 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21093 , TFLOPS: 96.75098039987074, Tokens per sec: 79057.71667653913, Loss: 2.242713451385498 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21094 , TFLOPS: 97.77627036594745, Tokens per sec: 79895.50750113216, Loss: 2.2734596729278564 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21095 , TFLOPS: 97.12233759352551, Tokens per sec: 79361.16219905905, Loss: 2.2461659908294678 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21096 , TFLOPS: 96.19194006446409, Tokens per sec: 78600.91042749987, Loss: 2.2477660179138184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21097 , TFLOPS: 97.71291319685034, Tokens per sec: 79843.73672730428, Loss: 2.227592945098877 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21098 , TFLOPS: 96.3570931057404, Tokens per sec: 78735.86122894424, Loss: 2.2412493228912354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21099 , TFLOPS: 97.16115894006559, Tokens per sec: 79392.88412066731, Loss: 2.269986867904663 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21100 , TFLOPS: 97.86966584574554, Tokens per sec: 79971.82335188871, Loss: 2.2509114742279053 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21101 , TFLOPS: 97.25389620980617, Tokens per sec: 79468.66213104205, Loss: 2.266446590423584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21102 , TFLOPS: 95.88784431176008, Tokens per sec: 78352.42595984433, Loss: 2.2645182609558105 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21103 , TFLOPS: 97.02158478349031, Tokens per sec: 79278.83448437123, Loss: 2.2810921669006348 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21104 , TFLOPS: 97.81689440271158, Tokens per sec: 79928.70244732786, Loss: 2.276552200317383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21105 , TFLOPS: 96.02895308217487, Tokens per sec: 78467.72956861315, Loss: 2.2251157760620117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21106 , TFLOPS: 96.6232986911504, Tokens per sec: 78953.38466552415, Loss: 2.2542500495910645 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21107 , TFLOPS: 96.11862459281068, Tokens per sec: 78541.00246830373, Loss: 2.2505862712860107 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21108 , TFLOPS: 97.20921957262738, Tokens per sec: 79432.1557007243, Loss: 2.2563300132751465 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21109 , TFLOPS: 95.5202110145216, Tokens per sec: 78052.02332894779, Loss: 2.270040512084961 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21110 , TFLOPS: 97.77455716384628, Tokens per sec: 79894.10759959354, Loss: 2.2246053218841553 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21111 , TFLOPS: 96.55602862784589, Tokens per sec: 78898.41656511257, Loss: 2.275339365005493 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21112 , TFLOPS: 95.4228391102781, Tokens per sec: 77972.45823941463, Loss: 2.2613208293914795 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21113 , TFLOPS: 97.27915398616284, Tokens per sec: 79489.30091029608, Loss: 2.2386584281921387 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21114 , TFLOPS: 96.01627812316046, Tokens per sec: 78457.37253332001, Loss: 2.251075267791748 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21115 , TFLOPS: 96.89837275349194, Tokens per sec: 79178.15476289974, Loss: 2.2194440364837646 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21116 , TFLOPS: 95.47276264662595, Tokens per sec: 78013.25204611062, Loss: 2.2615861892700195 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21117 , TFLOPS: 98.38561391138612, Tokens per sec: 80393.4177980084, Loss: 2.252230167388916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21118 , TFLOPS: 95.42316712767216, Tokens per sec: 77972.72627087122, Loss: 2.2495172023773193 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21119 , TFLOPS: 97.23985528624578, Tokens per sec: 79457.18893095532, Loss: 2.2442872524261475 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21120 , TFLOPS: 97.84817504596708, Tokens per sec: 79954.26266616733, Loss: 2.253629446029663 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21121 , TFLOPS: 97.18594244531474, Tokens per sec: 79413.13536079058, Loss: 2.2612321376800537 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21122 , TFLOPS: 98.40350885809912, Tokens per sec: 80408.04021962451, Loss: 2.2517247200012207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21123 , TFLOPS: 96.59157875627585, Tokens per sec: 78927.4654901944, Loss: 2.270094871520996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21124 , TFLOPS: 97.19285212734526, Tokens per sec: 79418.78143984877, Loss: 2.2651612758636475 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21125 , TFLOPS: 97.377770164517, Tokens per sec: 79569.88272823555, Loss: 2.2384536266326904 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21126 , TFLOPS: 97.71992797402488, Tokens per sec: 79849.46868231002, Loss: 2.2596311569213867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21127 , TFLOPS: 97.22230052315511, Tokens per sec: 79442.84448213414, Loss: 2.2532031536102295 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21128 , TFLOPS: 96.83840294888743, Tokens per sec: 79129.15189179729, Loss: 2.263089418411255 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21129 , TFLOPS: 96.54191572384866, Tokens per sec: 78886.88454795796, Loss: 2.2538628578186035 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21130 , TFLOPS: 97.07883690346588, Tokens per sec: 79325.61666540394, Loss: 2.2774081230163574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21131 , TFLOPS: 96.2751406541891, Tokens per sec: 78668.89577113789, Loss: 2.239713191986084 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21132 , TFLOPS: 97.7887691032237, Tokens per sec: 79905.7205411067, Loss: 2.2674944400787354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21133 , TFLOPS: 97.93939204789027, Tokens per sec: 80028.7984265733, Loss: 2.2548270225524902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21134 , TFLOPS: 92.5887863656363, Tokens per sec: 75656.68078675988, Loss: 2.249964714050293 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21135 , TFLOPS: 97.82573655913636, Tokens per sec: 79935.9276009599, Loss: 2.24204683303833 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21136 , TFLOPS: 96.74926837991507, Tokens per sec: 79056.31774096204, Loss: 2.2393124103546143 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21137 , TFLOPS: 97.25628849974652, Tokens per sec: 79470.61693273533, Loss: 2.2602345943450928 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21138 , TFLOPS: 97.70375775707426, Tokens per sec: 79836.25558178125, Loss: 2.2564711570739746 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21139 , TFLOPS: 96.72920152369687, Tokens per sec: 79039.92059617932, Loss: 2.2500247955322266 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21140 , TFLOPS: 96.55538533667637, Tokens per sec: 78897.89091533815, Loss: 2.265120267868042 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21141 , TFLOPS: 96.45545029482986, Tokens per sec: 78816.23142008853, Loss: 2.2510898113250732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21142 , TFLOPS: 97.86168334776677, Tokens per sec: 79965.30064730247, Loss: 2.2390336990356445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21143 , TFLOPS: 96.00022213117127, Tokens per sec: 78444.25276894764, Loss: 2.2543509006500244 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21144 , TFLOPS: 96.53205498806267, Tokens per sec: 78878.8270869093, Loss: 2.2525548934936523 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21145 , TFLOPS: 96.22002183492049, Tokens per sec: 78623.85676502886, Loss: 2.263397693634033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21146 , TFLOPS: 96.81214582804394, Tokens per sec: 79107.69652243765, Loss: 2.2517826557159424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21147 , TFLOPS: 95.89135972697912, Tokens per sec: 78355.29849611466, Loss: 2.2577600479125977 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21148 , TFLOPS: 97.30033359744283, Tokens per sec: 79506.60731588467, Loss: 2.2389583587646484 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21149 , TFLOPS: 96.64356872735436, Tokens per sec: 78969.94783390357, Loss: 2.277942419052124 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21150 , TFLOPS: 96.18715073709835, Tokens per sec: 78596.99694482106, Loss: 2.257559299468994 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21151 , TFLOPS: 97.06820284065489, Tokens per sec: 79316.92729893596, Loss: 2.2303824424743652 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21152 , TFLOPS: 97.26646965904793, Tokens per sec: 79478.93622008686, Loss: 2.26198673248291 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21153 , TFLOPS: 96.08568348226629, Tokens per sec: 78514.08543889828, Loss: 2.264251947402954 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21154 , TFLOPS: 96.03357776519313, Tokens per sec: 78471.50851615732, Loss: 2.2783432006835938 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21155 , TFLOPS: 97.8317487268359, Tokens per sec: 79940.84029795417, Loss: 2.252748727798462 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21156 , TFLOPS: 95.45134041975105, Tokens per sec: 77995.74739307394, Loss: 2.271975517272949 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21157 , TFLOPS: 97.96545723429723, Tokens per sec: 80050.096961364, Loss: 2.2358932495117188 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21158 , TFLOPS: 97.86254822077476, Tokens per sec: 79966.0073573011, Loss: 2.258087158203125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21159 , TFLOPS: 97.16078716890958, Tokens per sec: 79392.58033688551, Loss: 2.2360281944274902 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21160 , TFLOPS: 97.23903126496691, Tokens per sec: 79456.51560195617, Loss: 2.254948139190674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21161 , TFLOPS: 97.21371228515422, Tokens per sec: 79435.8268117827, Loss: 2.2351090908050537 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21162 , TFLOPS: 97.856885561371, Tokens per sec: 79961.38025253249, Loss: 2.2208642959594727 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21163 , TFLOPS: 96.06788941473356, Tokens per sec: 78499.54544825724, Loss: 2.2457878589630127 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21164 , TFLOPS: 97.72143384702075, Tokens per sec: 79850.69916989957, Loss: 2.241281747817993 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21165 , TFLOPS: 96.567543508361, Tokens per sec: 78907.82567040084, Loss: 2.2474308013916016 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21166 , TFLOPS: 95.73033028482102, Tokens per sec: 78223.71719366066, Loss: 2.2561843395233154 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21167 , TFLOPS: 97.07128224686409, Tokens per sec: 79319.44356102047, Loss: 2.245910167694092 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21168 , TFLOPS: 97.85521283639433, Tokens per sec: 79960.01342589427, Loss: 2.227020263671875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21169 , TFLOPS: 95.68268355187857, Tokens per sec: 78184.78382163735, Loss: 2.2490296363830566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21170 , TFLOPS: 97.76395509352743, Tokens per sec: 79885.44437501444, Loss: 2.266719341278076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21171 , TFLOPS: 96.1754677056929, Tokens per sec: 78587.45043911168, Loss: 2.2409815788269043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21172 , TFLOPS: 97.31209733044396, Tokens per sec: 79516.21976494532, Loss: 2.258692741394043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21173 , TFLOPS: 96.63336110630398, Tokens per sec: 78961.60692397575, Loss: 2.267676591873169 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21174 , TFLOPS: 96.78452194319404, Tokens per sec: 79085.12433502474, Loss: 2.2486448287963867 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21175 , TFLOPS: 96.60484213442037, Tokens per sec: 78938.303337906, Loss: 2.2397918701171875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21176 , TFLOPS: 97.1521075572874, Tokens per sec: 79385.48800279593, Loss: 2.2505850791931152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21177 , TFLOPS: 96.8572406773969, Tokens per sec: 79144.54468468865, Loss: 2.2518703937530518 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21178 , TFLOPS: 96.26559136354184, Tokens per sec: 78661.0927999294, Loss: 2.266583204269409 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21179 , TFLOPS: 97.37663095366374, Tokens per sec: 79568.95184972144, Loss: 2.254065990447998 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21180 , TFLOPS: 97.30859579570644, Tokens per sec: 79513.35857077377, Loss: 2.2420456409454346 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21181 , TFLOPS: 96.71670292047646, Tokens per sec: 79029.70766574524, Loss: 2.2457468509674072 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21182 , TFLOPS: 97.8396399834606, Tokens per sec: 79947.28844688123, Loss: 2.230355978012085 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21183 , TFLOPS: 96.08465422108522, Tokens per sec: 78513.24440309168, Loss: 2.2480854988098145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21184 , TFLOPS: 97.24584730626411, Tokens per sec: 79462.08516476062, Loss: 2.2554895877838135 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21185 , TFLOPS: 97.82758750633559, Tokens per sec: 79937.44005756402, Loss: 2.268461227416992 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21186 , TFLOPS: 96.52144521842557, Tokens per sec: 78870.15757101905, Loss: 2.2492289543151855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21187 , TFLOPS: 97.76676295234837, Tokens per sec: 79887.73874873774, Loss: 2.278792381286621 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21188 , TFLOPS: 94.92852110638837, Tokens per sec: 77568.53827355886, Loss: 2.2595059871673584 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21189 , TFLOPS: 97.899119378308, Tokens per sec: 79995.89059154686, Loss: 2.269247531890869 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21190 , TFLOPS: 97.64197471704685, Tokens per sec: 79785.77106933792, Loss: 2.2515807151794434 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21191 , TFLOPS: 97.16619950331703, Tokens per sec: 79397.00289465571, Loss: 2.2518699169158936 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21192 , TFLOPS: 96.16857731223064, Tokens per sec: 78581.82012123917, Loss: 2.248385429382324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21193 , TFLOPS: 97.73952278606154, Tokens per sec: 79865.48010764051, Loss: 2.265597105026245 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21194 , TFLOPS: 94.7442450231055, Tokens per sec: 77417.96154222003, Loss: 2.2462105751037598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21195 , TFLOPS: 97.84738966725246, Tokens per sec: 79953.62091300216, Loss: 2.252352714538574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21196 , TFLOPS: 97.2817639853741, Tokens per sec: 79491.43360783896, Loss: 2.263744592666626 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21197 , TFLOPS: 96.57999536728873, Tokens per sec: 78918.00040487006, Loss: 2.260791063308716 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21198 , TFLOPS: 96.70777811967801, Tokens per sec: 79022.41498126803, Loss: 2.2683331966400146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21199 , TFLOPS: 97.24937846582105, Tokens per sec: 79464.97056613474, Loss: 2.2620794773101807 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21200 , TFLOPS: 98.44270133719726, Tokens per sec: 80440.06540319975, Loss: 2.243077516555786 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021200 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021200/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021200/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021200/optimizer.pt +[rank0]:[2024-08-31 06:12:37,256] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007714292994933203, 'preprocessing_with_comm': 0.0016148150025401264, 'state_converting': 2.6293732590274885, : 2.6403167549869977}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021200/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021200/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021200/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021200/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021200/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021200, took 14.96s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019200 + eval ppl=7.848403453826904, eval loss=2.060310125350952 +------------------------------------------------------------------ +iteration: 21201 , TFLOPS: 95.12293486497346, Tokens per sec: 77727.3987603541, Loss: 2.2643587589263916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21202 , TFLOPS: 95.16285588449402, Tokens per sec: 77760.01925306283, Loss: 2.25209903717041 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21203 , TFLOPS: 96.74485170123411, Tokens per sec: 79052.7087591164, Loss: 2.2412703037261963 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21204 , TFLOPS: 97.87402487822699, Tokens per sec: 79975.38522954079, Loss: 2.250196933746338 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21205 , TFLOPS: 96.61048295691978, Tokens per sec: 78942.91259917797, Loss: 2.267040967941284 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21206 , TFLOPS: 97.86125934509037, Tokens per sec: 79964.95418380079, Loss: 2.264496326446533 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21207 , TFLOPS: 97.14114622137217, Tokens per sec: 79376.5312130497, Loss: 2.237987756729126 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21208 , TFLOPS: 97.11254662923352, Tokens per sec: 79353.16175009422, Loss: 2.2700307369232178 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21209 , TFLOPS: 97.0836521797046, Tokens per sec: 79329.55135157485, Loss: 2.2511353492736816 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21210 , TFLOPS: 97.75498730417694, Tokens per sec: 79878.11656348471, Loss: 2.2646219730377197 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21211 , TFLOPS: 95.9159078254854, Tokens per sec: 78375.35737932842, Loss: 2.269407033920288 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21212 , TFLOPS: 97.3561786290088, Tokens per sec: 79552.23972875622, Loss: 2.2533602714538574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21213 , TFLOPS: 97.60965018408173, Tokens per sec: 79759.35786134462, Loss: 2.2571451663970947 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21214 , TFLOPS: 96.27337580427324, Tokens per sec: 78667.45366684036, Loss: 2.2439825534820557 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21215 , TFLOPS: 98.39758433413839, Tokens per sec: 80403.19913858538, Loss: 2.257338047027588 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21216 , TFLOPS: 95.91925677447188, Tokens per sec: 78378.09389175486, Loss: 2.2425336837768555 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21217 , TFLOPS: 97.23744759269195, Tokens per sec: 79455.22154257295, Loss: 2.2709405422210693 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21218 , TFLOPS: 97.96754025862174, Tokens per sec: 80051.79905416137, Loss: 2.2771730422973633 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21219 , TFLOPS: 97.91027461857128, Tokens per sec: 80005.00582552743, Loss: 2.2602720260620117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21220 , TFLOPS: 97.21697127294523, Tokens per sec: 79438.48981460053, Loss: 2.2498531341552734 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21221 , TFLOPS: 96.21450738178575, Tokens per sec: 78619.35076341782, Loss: 2.2527694702148438 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21222 , TFLOPS: 97.16118024144367, Tokens per sec: 79392.90152657169, Loss: 2.25730562210083 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21223 , TFLOPS: 97.75325009588651, Tokens per sec: 79876.6970458709, Loss: 2.2477152347564697 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21224 , TFLOPS: 95.69076499004359, Tokens per sec: 78191.38737279671, Loss: 2.2536652088165283 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21225 , TFLOPS: 98.39793756617269, Tokens per sec: 80403.48777357361, Loss: 2.25006103515625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21226 , TFLOPS: 97.34050948955468, Tokens per sec: 79539.43607154873, Loss: 2.253527879714966 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21227 , TFLOPS: 96.05417914175038, Tokens per sec: 78488.34243126886, Loss: 2.266193389892578 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21228 , TFLOPS: 97.7982135445117, Tokens per sec: 79913.43783720468, Loss: 2.2432494163513184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21229 , TFLOPS: 97.07497108377265, Tokens per sec: 79322.45780461756, Loss: 2.2472639083862305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21230 , TFLOPS: 94.90897695677441, Tokens per sec: 77552.56824579812, Loss: 2.2488160133361816 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21231 , TFLOPS: 97.95145444548805, Tokens per sec: 80038.65492216418, Loss: 2.205761194229126 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21232 , TFLOPS: 96.84508834565018, Tokens per sec: 79134.61470158955, Loss: 2.2457382678985596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21233 , TFLOPS: 96.59676060140973, Tokens per sec: 78931.69970924585, Loss: 2.2545955181121826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21234 , TFLOPS: 96.1284248102344, Tokens per sec: 78549.0104782407, Loss: 2.2650129795074463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21235 , TFLOPS: 96.68861386923861, Tokens per sec: 79006.7553789023, Loss: 2.2490434646606445 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21236 , TFLOPS: 96.66912568714275, Tokens per sec: 78990.83108364134, Loss: 2.251966953277588 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21237 , TFLOPS: 96.35990645875599, Tokens per sec: 78738.16009211542, Loss: 2.2732009887695312 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21238 , TFLOPS: 97.24062458768893, Tokens per sec: 79457.8175469681, Loss: 2.2361483573913574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21239 , TFLOPS: 95.77666781114425, Tokens per sec: 78261.58077925342, Loss: 2.250516414642334 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21240 , TFLOPS: 96.31535580033389, Tokens per sec: 78701.75660228275, Loss: 2.251330614089966 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21241 , TFLOPS: 95.99312741384745, Tokens per sec: 78438.45549278808, Loss: 2.239741802215576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21242 , TFLOPS: 97.8262124242535, Tokens per sec: 79936.31644259699, Loss: 2.269315481185913 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21243 , TFLOPS: 97.27574382948215, Tokens per sec: 79486.51438348705, Loss: 2.269294261932373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21244 , TFLOPS: 97.18412075745292, Tokens per sec: 79411.64681274426, Loss: 2.2455644607543945 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21245 , TFLOPS: 97.9097931168316, Tokens per sec: 80004.61237806098, Loss: 2.25081205368042 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21246 , TFLOPS: 96.71465397182523, Tokens per sec: 79028.0334170577, Loss: 2.2405760288238525 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21247 , TFLOPS: 97.71458886868338, Tokens per sec: 79845.10596189467, Loss: 2.258607864379883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21248 , TFLOPS: 97.09423469523486, Tokens per sec: 79338.1985974328, Loss: 2.2487399578094482 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21249 , TFLOPS: 96.5854658253465, Tokens per sec: 78922.47045697119, Loss: 2.248911142349243 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21250 , TFLOPS: 97.11799535068278, Tokens per sec: 79357.61404065297, Loss: 2.272097110748291 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21251 , TFLOPS: 97.18788848001998, Tokens per sec: 79414.72551584362, Loss: 2.252187490463257 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21252 , TFLOPS: 97.22052218136557, Tokens per sec: 79441.3913532793, Loss: 2.243236780166626 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21253 , TFLOPS: 98.51119421500256, Tokens per sec: 80496.03269681802, Loss: 2.254225015640259 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21254 , TFLOPS: 95.29332295875142, Tokens per sec: 77866.62725795982, Loss: 2.25673770904541 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21255 , TFLOPS: 96.69278128068136, Tokens per sec: 79010.16067807088, Loss: 2.2444379329681396 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21256 , TFLOPS: 97.29616271748849, Tokens per sec: 79503.19918250597, Loss: 2.2468223571777344 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21257 , TFLOPS: 97.90484258350699, Tokens per sec: 80000.56716984337, Loss: 2.244699001312256 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21258 , TFLOPS: 97.74287690499312, Tokens per sec: 79868.22084456221, Loss: 2.2361481189727783 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21259 , TFLOPS: 95.78147067921888, Tokens per sec: 78265.505326394, Loss: 2.227539539337158 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21260 , TFLOPS: 97.07482303667113, Tokens per sec: 79322.33683151985, Loss: 2.2575595378875732 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21261 , TFLOPS: 98.45885974640649, Tokens per sec: 80453.26885531908, Loss: 2.254880905151367 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21262 , TFLOPS: 95.61130543014566, Tokens per sec: 78126.45892093309, Loss: 2.251068353652954 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21263 , TFLOPS: 98.4076783925407, Tokens per sec: 80411.44725355008, Loss: 2.2573368549346924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21264 , TFLOPS: 96.62782758179242, Tokens per sec: 78957.08533865177, Loss: 2.251136302947998 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21265 , TFLOPS: 95.94253277440968, Tokens per sec: 78397.1133104816, Loss: 2.2601399421691895 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21266 , TFLOPS: 97.72639411453582, Tokens per sec: 79854.75233217477, Loss: 2.2731740474700928 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21267 , TFLOPS: 97.20073217213883, Tokens per sec: 79425.22042729998, Loss: 2.2484049797058105 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21268 , TFLOPS: 95.5931455526956, Tokens per sec: 78111.6200175918, Loss: 2.2309796810150146 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21269 , TFLOPS: 96.63876000257677, Tokens per sec: 78966.0185011002, Loss: 2.23695707321167 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21270 , TFLOPS: 96.81867900165386, Tokens per sec: 79113.03494677316, Loss: 2.254227638244629 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21271 , TFLOPS: 96.67651556471719, Tokens per sec: 78996.86954283978, Loss: 2.2762603759765625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21272 , TFLOPS: 95.56290589456016, Tokens per sec: 78086.91041449129, Loss: 2.2748947143554688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21273 , TFLOPS: 97.23628408066986, Tokens per sec: 79454.27080694823, Loss: 2.2750167846679688 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21274 , TFLOPS: 96.10194800449551, Tokens per sec: 78527.37559869782, Loss: 2.252631187438965 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21275 , TFLOPS: 96.41333400499985, Tokens per sec: 78781.81711549868, Loss: 2.2425007820129395 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21276 , TFLOPS: 97.18558459648509, Tokens per sec: 79412.84295330002, Loss: 2.2399332523345947 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21277 , TFLOPS: 96.37406735083545, Tokens per sec: 78749.73132156735, Loss: 2.2630839347839355 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21278 , TFLOPS: 95.73349455049697, Tokens per sec: 78226.30279659992, Loss: 2.232480525970459 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21279 , TFLOPS: 97.27626456526384, Tokens per sec: 79486.93989009921, Loss: 2.2685279846191406 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21280 , TFLOPS: 98.31050152228543, Tokens per sec: 80332.04153130933, Loss: 2.2619564533233643 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21281 , TFLOPS: 96.70803539980339, Tokens per sec: 79022.62521148146, Loss: 2.2825138568878174 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21282 , TFLOPS: 98.48614233948636, Tokens per sec: 80475.56216444107, Loss: 2.269826650619507 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21283 , TFLOPS: 96.64557474135647, Tokens per sec: 78971.58699958408, Loss: 2.224229097366333 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21284 , TFLOPS: 97.15821308220956, Tokens per sec: 79390.47698438003, Loss: 2.2256886959075928 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21285 , TFLOPS: 97.05613893030741, Tokens per sec: 79307.06956723834, Loss: 2.243644952774048 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21286 , TFLOPS: 97.82470911373643, Tokens per sec: 79935.08804887482, Loss: 2.236018180847168 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21287 , TFLOPS: 95.48856583978456, Tokens per sec: 78026.16524205088, Loss: 2.2470080852508545 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21288 , TFLOPS: 97.80444031795493, Tokens per sec: 79918.52589407653, Loss: 2.2656197547912598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21289 , TFLOPS: 97.25111604766454, Tokens per sec: 79466.39038898838, Loss: 2.2443809509277344 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21290 , TFLOPS: 97.2078808754253, Tokens per sec: 79431.06181677936, Loss: 2.258035898208618 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21291 , TFLOPS: 97.76291286166872, Tokens per sec: 79884.59274053325, Loss: 2.257580280303955 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21292 , TFLOPS: 95.92442444713875, Tokens per sec: 78382.31653011871, Loss: 2.2234156131744385 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21293 , TFLOPS: 97.20010516357203, Tokens per sec: 79424.70808245942, Loss: 2.261453151702881 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21294 , TFLOPS: 97.35934725893235, Tokens per sec: 79554.82889783454, Loss: 2.2659151554107666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21295 , TFLOPS: 97.86579912805459, Tokens per sec: 79968.66375732527, Loss: 2.2378671169281006 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21296 , TFLOPS: 97.18194495656317, Tokens per sec: 79409.86890982714, Loss: 2.254087448120117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21297 , TFLOPS: 96.21852969369749, Tokens per sec: 78622.63749802443, Loss: 2.2514824867248535 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21298 , TFLOPS: 97.1650220431817, Tokens per sec: 79396.04076166861, Loss: 2.2368128299713135 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21299 , TFLOPS: 97.12822296365862, Tokens per sec: 79365.9712864977, Loss: 2.2520875930786133 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21300 , TFLOPS: 96.35765129678768, Tokens per sec: 78736.31734121809, Loss: 2.2633612155914307 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21301 , TFLOPS: 97.83075940545152, Tokens per sec: 79940.0318979836, Loss: 2.26796817779541 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21302 , TFLOPS: 96.7118680936998, Tokens per sec: 79025.75700432653, Loss: 2.2645151615142822 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21303 , TFLOPS: 96.563881246378, Tokens per sec: 78904.83314185952, Loss: 2.236891508102417 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21304 , TFLOPS: 97.1811011652651, Tokens per sec: 79409.17942623638, Loss: 2.2455434799194336 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21305 , TFLOPS: 97.13832631624726, Tokens per sec: 79374.22699598133, Loss: 2.2579567432403564 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21306 , TFLOPS: 95.01858066999954, Tokens per sec: 77642.12826132495, Loss: 2.2618815898895264 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21307 , TFLOPS: 97.33724590215847, Tokens per sec: 79536.76931027508, Loss: 2.2472450733184814 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21308 , TFLOPS: 96.896351506859, Tokens per sec: 79176.50315024445, Loss: 2.2777931690216064 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21309 , TFLOPS: 97.31023000456236, Tokens per sec: 79514.69392489821, Loss: 2.2598884105682373 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21310 , TFLOPS: 95.98245080638519, Tokens per sec: 78429.73136199034, Loss: 2.2515056133270264 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21311 , TFLOPS: 97.11655980408908, Tokens per sec: 79356.44101857701, Loss: 2.2450881004333496 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21312 , TFLOPS: 96.79428593297885, Tokens per sec: 79093.10274242537, Loss: 2.2444849014282227 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21313 , TFLOPS: 96.78815084269355, Tokens per sec: 79088.08960222198, Loss: 2.2584378719329834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21314 , TFLOPS: 97.19886781728835, Tokens per sec: 79423.69701495889, Loss: 2.2443687915802 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21315 , TFLOPS: 95.8724902037129, Tokens per sec: 78339.87972290909, Loss: 2.264448642730713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21316 , TFLOPS: 96.26890456822431, Tokens per sec: 78663.80010476493, Loss: 2.2484335899353027 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21317 , TFLOPS: 96.5743744849171, Tokens per sec: 78913.40743718982, Loss: 2.2454328536987305 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21318 , TFLOPS: 98.35985146506229, Tokens per sec: 80372.36663993284, Loss: 2.2686235904693604 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21319 , TFLOPS: 96.7528577893204, Tokens per sec: 79059.25073978676, Loss: 2.274824619293213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21320 , TFLOPS: 97.68483454252906, Tokens per sec: 79820.79293604949, Loss: 2.239485502243042 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21321 , TFLOPS: 97.98037118658041, Tokens per sec: 80062.28353569389, Loss: 2.2549538612365723 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21322 , TFLOPS: 96.42595696869942, Tokens per sec: 78792.1316640816, Loss: 2.265468120574951 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21323 , TFLOPS: 97.25851524402684, Tokens per sec: 79472.43646280827, Loss: 2.2450637817382812 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21324 , TFLOPS: 97.8195386648565, Tokens per sec: 79930.86314199583, Loss: 2.258695602416992 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21325 , TFLOPS: 95.93273118176238, Tokens per sec: 78389.10417681404, Loss: 2.2742509841918945 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21326 , TFLOPS: 97.2283825918948, Tokens per sec: 79447.81429706771, Loss: 2.243427276611328 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21327 , TFLOPS: 97.15518506466688, Tokens per sec: 79388.0027132978, Loss: 2.26350736618042 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21328 , TFLOPS: 96.47300775212632, Tokens per sec: 78830.57807041462, Loss: 2.25106143951416 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21329 , TFLOPS: 98.42214552834564, Tokens per sec: 80423.2687226335, Loss: 2.2666175365448 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21330 , TFLOPS: 94.89413851410355, Tokens per sec: 77540.44337231627, Loss: 2.243082046508789 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21331 , TFLOPS: 97.23820613866478, Tokens per sec: 79455.84137001405, Loss: 2.2634987831115723 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21332 , TFLOPS: 96.7335142730419, Tokens per sec: 79043.44465468918, Loss: 2.253246307373047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21333 , TFLOPS: 97.89775296298934, Tokens per sec: 79994.77405841534, Loss: 2.2313485145568848 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21334 , TFLOPS: 97.06382573574996, Tokens per sec: 79313.35065384144, Loss: 2.248353958129883 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21335 , TFLOPS: 96.25279737447988, Tokens per sec: 78650.6385021203, Loss: 2.2589268684387207 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21336 , TFLOPS: 96.65319221533714, Tokens per sec: 78977.81143366492, Loss: 2.2682509422302246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21337 , TFLOPS: 97.73306826371814, Tokens per sec: 79860.20595127954, Loss: 2.2527916431427 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21338 , TFLOPS: 96.10658872957207, Tokens per sec: 78531.16765462077, Loss: 2.266021966934204 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21339 , TFLOPS: 97.98613506846301, Tokens per sec: 80066.99335195568, Loss: 2.2765111923217773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21340 , TFLOPS: 97.21712496340201, Tokens per sec: 79438.61539902912, Loss: 2.2527167797088623 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21341 , TFLOPS: 95.82725482019224, Tokens per sec: 78302.91672657199, Loss: 2.266735792160034 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21342 , TFLOPS: 97.874641579316, Tokens per sec: 79975.8891518759, Loss: 2.264798402786255 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21343 , TFLOPS: 97.81220523375494, Tokens per sec: 79924.87080666344, Loss: 2.249554395675659 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21344 , TFLOPS: 95.49663143346784, Tokens per sec: 78032.75584627634, Loss: 2.249937057495117 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21345 , TFLOPS: 97.99826842520652, Tokens per sec: 80076.90783009118, Loss: 2.2644786834716797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21346 , TFLOPS: 96.82606269652359, Tokens per sec: 79119.06835392443, Loss: 2.2520647048950195 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21347 , TFLOPS: 95.89532995075062, Tokens per sec: 78358.542668161, Loss: 2.2401082515716553 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21348 , TFLOPS: 96.81607274032706, Tokens per sec: 79110.90530355189, Loss: 2.2606520652770996 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21349 , TFLOPS: 97.1690170811146, Tokens per sec: 79399.30521000501, Loss: 2.244554042816162 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21350 , TFLOPS: 96.53313206879834, Tokens per sec: 78879.70719730493, Loss: 2.25144100189209 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21351 , TFLOPS: 96.88713945455176, Tokens per sec: 79168.97574516488, Loss: 2.2494957447052 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21352 , TFLOPS: 97.23003640481001, Tokens per sec: 79449.16567017366, Loss: 2.253153085708618 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21353 , TFLOPS: 95.87450744248532, Tokens per sec: 78341.52806063837, Loss: 2.2480530738830566 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21354 , TFLOPS: 96.20192683214896, Tokens per sec: 78609.07087245744, Loss: 2.264129638671875 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21355 , TFLOPS: 96.56046108341303, Tokens per sec: 78902.03843865801, Loss: 2.25234055519104 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21356 , TFLOPS: 98.35816168782036, Tokens per sec: 80370.98587944971, Loss: 2.2478911876678467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21357 , TFLOPS: 96.77893166073694, Tokens per sec: 79080.55637132237, Loss: 2.26239275932312 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21358 , TFLOPS: 98.36264163720239, Tokens per sec: 80374.64656141392, Loss: 2.2704758644104004 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21359 , TFLOPS: 97.19465314413098, Tokens per sec: 79420.25309702667, Loss: 2.2476229667663574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21360 , TFLOPS: 97.24947661397057, Tokens per sec: 79465.05076551426, Loss: 2.2598767280578613 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21361 , TFLOPS: 96.46070439412179, Tokens per sec: 78820.52467987251, Loss: 2.2536137104034424 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21362 , TFLOPS: 97.75376055426902, Tokens per sec: 79877.1141545556, Loss: 2.248015880584717 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21363 , TFLOPS: 96.60375564131056, Tokens per sec: 78937.41553641716, Loss: 2.267782211303711 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21364 , TFLOPS: 96.59009116440319, Tokens per sec: 78926.24994058123, Loss: 2.25766658782959 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21365 , TFLOPS: 97.78625273473709, Tokens per sec: 79903.66435163919, Loss: 2.249241828918457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21366 , TFLOPS: 96.64320302414777, Tokens per sec: 78969.64900839941, Loss: 2.256038188934326 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21367 , TFLOPS: 98.39018140347962, Tokens per sec: 80397.1500133757, Loss: 2.2425923347473145 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21368 , TFLOPS: 94.709920926887, Tokens per sec: 77389.91443961905, Loss: 2.225803852081299 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21369 , TFLOPS: 97.24432867455052, Tokens per sec: 79460.84425169438, Loss: 2.257282257080078 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21370 , TFLOPS: 94.44709272225536, Tokens per sec: 77175.15074781465, Loss: 2.2713308334350586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21371 , TFLOPS: 97.87031480764152, Tokens per sec: 79972.35363535967, Loss: 2.2659761905670166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21372 , TFLOPS: 96.56802995550163, Tokens per sec: 78908.22315888172, Loss: 2.2903859615325928 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21373 , TFLOPS: 95.75717420295416, Tokens per sec: 78245.65205019117, Loss: 2.246796131134033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21374 , TFLOPS: 96.62845732383293, Tokens per sec: 78957.59991708407, Loss: 2.2374989986419678 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21375 , TFLOPS: 97.8745165046675, Tokens per sec: 79975.78695016091, Loss: 2.261272430419922 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21376 , TFLOPS: 96.04800321878866, Tokens per sec: 78483.29592563443, Loss: 2.260404586791992 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21377 , TFLOPS: 97.84877149871375, Tokens per sec: 79954.75004306069, Loss: 2.2617475986480713 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21378 , TFLOPS: 97.35362862174311, Tokens per sec: 79550.1560521764, Loss: 2.2471182346343994 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21379 , TFLOPS: 96.45557863539968, Tokens per sec: 78816.33629047191, Loss: 2.255082130432129 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21380 , TFLOPS: 97.11003109413424, Tokens per sec: 79351.10624160898, Loss: 2.2491211891174316 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21381 , TFLOPS: 97.78403721679041, Tokens per sec: 79901.85399489246, Loss: 2.2556614875793457 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21382 , TFLOPS: 94.46872810278202, Tokens per sec: 77192.82957418723, Loss: 2.2525851726531982 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21383 , TFLOPS: 97.92202680305749, Tokens per sec: 80014.60883799924, Loss: 2.2441763877868652 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21384 , TFLOPS: 96.75939433950255, Tokens per sec: 79064.59192320636, Loss: 2.2483325004577637 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21385 , TFLOPS: 96.60110516686714, Tokens per sec: 78935.24976552017, Loss: 2.239929437637329 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21386 , TFLOPS: 96.85020129238406, Tokens per sec: 79138.79262198477, Loss: 2.2403759956359863 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21387 , TFLOPS: 97.20304343521092, Tokens per sec: 79427.1090198534, Loss: 2.2462425231933594 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21388 , TFLOPS: 95.98856545631867, Tokens per sec: 78434.72779985607, Loss: 2.272066354751587 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21389 , TFLOPS: 96.40713615367302, Tokens per sec: 78776.75269163265, Loss: 2.274425506591797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21390 , TFLOPS: 97.92378970538954, Tokens per sec: 80016.04935087591, Loss: 2.256657123565674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21391 , TFLOPS: 96.38082718020362, Tokens per sec: 78755.25495215757, Loss: 2.256845474243164 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21392 , TFLOPS: 96.27339335859708, Tokens per sec: 78667.46801093026, Loss: 2.261455535888672 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21393 , TFLOPS: 95.9903603657162, Tokens per sec: 78436.19446652988, Loss: 2.2591354846954346 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21394 , TFLOPS: 98.42920189242648, Tokens per sec: 80429.03465937094, Loss: 2.2572262287139893 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21395 , TFLOPS: 97.31230643666778, Tokens per sec: 79516.39063102368, Loss: 2.2789289951324463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21396 , TFLOPS: 97.83271947534503, Tokens per sec: 79941.63352155028, Loss: 2.2242684364318848 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21397 , TFLOPS: 97.2612284116658, Tokens per sec: 79474.6534619292, Loss: 2.25394344329834 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21398 , TFLOPS: 96.61985589863552, Tokens per sec: 78950.57147113488, Loss: 2.275468111038208 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21399 , TFLOPS: 97.81287264452747, Tokens per sec: 79925.41616518637, Loss: 2.2494540214538574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21400 , TFLOPS: 96.58354260321465, Tokens per sec: 78920.89894265911, Loss: 2.2526021003723145 +------------------------------------------------------------------ +Saving checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021400 +Saving model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021400/model.pt +Saved model state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021400/model.pt +Saving optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021400/optimizer.pt +[rank0]:[2024-08-31 07:08:12,045] torch.distributed.fsdp._debug_utils: [WARNING] FSDP _optim_state_dict() profiling: defaultdict(, {'preprocessing': 0.007742210000287741, 'preprocessing_with_comm': 0.0015775659994687885, 'state_converting': 2.6756865630159155, : 2.6866186279803514}) +Saved optimizer state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021400/optimizer.pt +Saving scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021400/scheduler.pt +Saved scheduler state dict to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021400/scheduler.pt +Saving RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021400/rng.pt +Saved RNG states to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021400/rng.pt +Saved checkpoint to /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0021400, took 15.02s +Deleting checkpoint /work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5/iter_0019400 + eval ppl=7.595958232879639, eval loss=2.027616262435913 +------------------------------------------------------------------ +iteration: 21401 , TFLOPS: 96.89492355195719, Tokens per sec: 79175.33633153535, Loss: 2.2659411430358887 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21402 , TFLOPS: 96.28945016410731, Tokens per sec: 78680.58844005162, Loss: 2.241241455078125 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21403 , TFLOPS: 97.89752824322373, Tokens per sec: 79994.59043411005, Loss: 2.2610256671905518 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21404 , TFLOPS: 96.0792530104197, Tokens per sec: 78508.83093481736, Loss: 2.2676901817321777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21405 , TFLOPS: 95.91123049618275, Tokens per sec: 78371.53541315005, Loss: 2.2509334087371826 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21406 , TFLOPS: 98.40595241336085, Tokens per sec: 80410.03691153157, Loss: 2.27065372467041 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21407 , TFLOPS: 97.82130919018425, Tokens per sec: 79932.30988381853, Loss: 2.2397546768188477 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21408 , TFLOPS: 97.24017619793199, Tokens per sec: 79457.45115615513, Loss: 2.2657065391540527 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21409 , TFLOPS: 98.40140935799553, Tokens per sec: 80406.32466404393, Loss: 2.2446281909942627 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21410 , TFLOPS: 97.78822148547594, Tokens per sec: 79905.27306854806, Loss: 2.2653162479400635 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21411 , TFLOPS: 97.75100725196684, Tokens per sec: 79874.86436036818, Loss: 2.2313027381896973 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21412 , TFLOPS: 98.40829702400362, Tokens per sec: 80411.95275324341, Loss: 2.2617526054382324 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21413 , TFLOPS: 96.42229997763937, Tokens per sec: 78789.1434425471, Loss: 2.261070489883423 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21414 , TFLOPS: 96.75737832124813, Tokens per sec: 79062.94458279375, Loss: 2.2453160285949707 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21415 , TFLOPS: 98.31960155341972, Tokens per sec: 80339.47740100478, Loss: 2.2860350608825684 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21416 , TFLOPS: 97.29672971874844, Tokens per sec: 79503.66249383139, Loss: 2.2469823360443115 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21417 , TFLOPS: 97.83073894803422, Tokens per sec: 79940.0151817013, Loss: 2.261326789855957 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21418 , TFLOPS: 97.77102660977792, Tokens per sec: 79891.22269297975, Loss: 2.258791208267212 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21419 , TFLOPS: 95.80084525105737, Tokens per sec: 78281.3367877891, Loss: 2.238032341003418 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21420 , TFLOPS: 98.32961435375869, Tokens per sec: 80347.65911791417, Loss: 2.2644906044006348 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21421 , TFLOPS: 96.57481123134399, Tokens per sec: 78913.76431393808, Loss: 2.2771716117858887 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21422 , TFLOPS: 97.76918312442137, Tokens per sec: 79889.71633363937, Loss: 2.271467447280884 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21423 , TFLOPS: 96.81935309976997, Tokens per sec: 79113.58576969667, Loss: 2.2248363494873047 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21424 , TFLOPS: 97.25964155391077, Tokens per sec: 79473.35679960823, Loss: 2.257624864578247 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21425 , TFLOPS: 97.1432643901966, Tokens per sec: 79378.26202332268, Loss: 2.2631258964538574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21426 , TFLOPS: 97.24436553128346, Tokens per sec: 79460.87436827965, Loss: 2.2411482334136963 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21427 , TFLOPS: 97.1029020953484, Tokens per sec: 79345.2809531843, Loss: 2.259799003601074 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21428 , TFLOPS: 97.98786867553251, Tokens per sec: 80068.40992691918, Loss: 2.2399566173553467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21429 , TFLOPS: 96.60974836806774, Tokens per sec: 78942.31234771697, Loss: 2.256971597671509 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21430 , TFLOPS: 97.19550964419639, Tokens per sec: 79420.95296527838, Loss: 2.244828939437866 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21431 , TFLOPS: 98.34098185571612, Tokens per sec: 80356.94779638913, Loss: 2.2602691650390625 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21432 , TFLOPS: 97.06793609399867, Tokens per sec: 79316.7093333765, Loss: 2.2486250400543213 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21433 , TFLOPS: 96.42481660594403, Tokens per sec: 78791.19984431862, Loss: 2.2661404609680176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21434 , TFLOPS: 97.79324955764726, Tokens per sec: 79909.38163575334, Loss: 2.2531025409698486 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21435 , TFLOPS: 97.04546194749759, Tokens per sec: 79298.34512973446, Loss: 2.2747693061828613 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21436 , TFLOPS: 97.14308033602002, Tokens per sec: 79378.11162791701, Loss: 2.264702558517456 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21437 , TFLOPS: 97.20007221806509, Tokens per sec: 79424.68116183761, Loss: 2.2576804161071777 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21438 , TFLOPS: 96.06297884040157, Tokens per sec: 78495.53289156115, Loss: 2.2551825046539307 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21439 , TFLOPS: 97.19001782995846, Tokens per sec: 79416.46546249269, Loss: 2.2408103942871094 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21440 , TFLOPS: 96.36901732860244, Tokens per sec: 78745.60481840161, Loss: 2.2384021282196045 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21441 , TFLOPS: 97.92974984839653, Tokens per sec: 80020.91953715465, Loss: 2.2531139850616455 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21442 , TFLOPS: 96.57145574834345, Tokens per sec: 78911.02246240052, Loss: 2.269077777862549 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21443 , TFLOPS: 95.52892936039142, Tokens per sec: 78059.1473137882, Loss: 2.247511386871338 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21444 , TFLOPS: 98.36692075090521, Tokens per sec: 80378.14313537469, Loss: 2.259159803390503 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21445 , TFLOPS: 97.16310727511181, Tokens per sec: 79394.47615538818, Loss: 2.2531983852386475 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21446 , TFLOPS: 97.22018439257229, Tokens per sec: 79441.11533736087, Loss: 2.255211353302002 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21447 , TFLOPS: 97.8028437099287, Tokens per sec: 79917.22126455796, Loss: 2.2472939491271973 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21448 , TFLOPS: 97.87085814915665, Tokens per sec: 79972.79761369854, Loss: 2.255535125732422 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21449 , TFLOPS: 97.76776061198738, Tokens per sec: 79888.55396211082, Loss: 2.21968150138855 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21450 , TFLOPS: 98.46147519129408, Tokens per sec: 80455.40600266428, Loss: 2.253953456878662 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21451 , TFLOPS: 96.68321437870576, Tokens per sec: 79002.34331619265, Loss: 2.27426815032959 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21452 , TFLOPS: 97.72053869630835, Tokens per sec: 79849.96771920923, Loss: 2.267565965652466 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21453 , TFLOPS: 98.31863103442623, Tokens per sec: 80338.68436495183, Loss: 2.2431881427764893 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21454 , TFLOPS: 96.66809338653798, Tokens per sec: 78989.98756424339, Loss: 2.2488276958465576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21455 , TFLOPS: 97.62918771327625, Tokens per sec: 79775.3224793902, Loss: 2.263223648071289 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21456 , TFLOPS: 97.85009129839692, Tokens per sec: 79955.82848535647, Loss: 2.26161789894104 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21457 , TFLOPS: 95.95728535886532, Tokens per sec: 78409.16802700561, Loss: 2.2705135345458984 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21458 , TFLOPS: 98.41861975251739, Tokens per sec: 80420.3877204427, Loss: 2.2654850482940674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21459 , TFLOPS: 96.66784648033253, Tokens per sec: 78989.78581082699, Loss: 2.247437000274658 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21460 , TFLOPS: 97.69145074580166, Tokens per sec: 79826.19920605975, Loss: 2.262471914291382 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21461 , TFLOPS: 96.74712337995521, Tokens per sec: 79054.56500627763, Loss: 2.2355942726135254 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21462 , TFLOPS: 96.73449388183717, Tokens per sec: 79044.24511825317, Loss: 2.232785940170288 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21463 , TFLOPS: 97.91590198255994, Tokens per sec: 80009.6040895038, Loss: 2.252548933029175 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21464 , TFLOPS: 96.63773932007462, Tokens per sec: 78965.18447515299, Loss: 2.267796039581299 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21465 , TFLOPS: 97.17943059504637, Tokens per sec: 79407.81436030635, Loss: 2.2114758491516113 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21466 , TFLOPS: 97.98561262509504, Tokens per sec: 80066.56645003082, Loss: 2.263296604156494 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21467 , TFLOPS: 96.5409357798074, Tokens per sec: 78886.08381045584, Loss: 2.2649857997894287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21468 , TFLOPS: 97.08335906783171, Tokens per sec: 79329.31184231813, Loss: 2.2640457153320312 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21469 , TFLOPS: 98.31112810605921, Tokens per sec: 80332.55352904058, Loss: 2.218315601348877 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21470 , TFLOPS: 97.0355867533739, Tokens per sec: 79290.27585440589, Loss: 2.263118028640747 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21471 , TFLOPS: 97.13252997177462, Tokens per sec: 79369.49065370156, Loss: 2.243798017501831 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21472 , TFLOPS: 96.53796459643274, Tokens per sec: 78883.65598003533, Loss: 2.2468409538269043 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21473 , TFLOPS: 97.75884685333982, Tokens per sec: 79881.27030045933, Loss: 2.2421023845672607 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21474 , TFLOPS: 96.62890990597705, Tokens per sec: 78957.96973360454, Loss: 2.2495100498199463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21475 , TFLOPS: 97.12042804993015, Tokens per sec: 79359.60186183127, Loss: 2.249310255050659 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21476 , TFLOPS: 95.09596967914825, Tokens per sec: 77705.3648128707, Loss: 2.250483989715576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21477 , TFLOPS: 96.80645885069994, Tokens per sec: 79103.04954685405, Loss: 2.275467872619629 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21478 , TFLOPS: 96.58123638188862, Tokens per sec: 78919.01446984618, Loss: 2.2323191165924072 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21479 , TFLOPS: 98.01085421186686, Tokens per sec: 80087.19200035832, Loss: 2.269991397857666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21480 , TFLOPS: 96.6074786600466, Tokens per sec: 78940.4577108654, Loss: 2.273125171661377 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21481 , TFLOPS: 95.88937354429488, Tokens per sec: 78353.67553511447, Loss: 2.257598400115967 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21482 , TFLOPS: 98.33317568187614, Tokens per sec: 80350.56917079569, Loss: 2.264270544052124 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21483 , TFLOPS: 98.49212995633069, Tokens per sec: 80480.45480029953, Loss: 2.27075457572937 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21484 , TFLOPS: 97.73796582647338, Tokens per sec: 79864.20787588133, Loss: 2.262040853500366 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21485 , TFLOPS: 98.40309200974083, Tokens per sec: 80407.69960210031, Loss: 2.279754400253296 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21486 , TFLOPS: 97.22666975482208, Tokens per sec: 79446.41469380325, Loss: 2.276459217071533 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21487 , TFLOPS: 97.7965781476493, Tokens per sec: 79912.10151232954, Loss: 2.2577321529388428 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21488 , TFLOPS: 98.35754455437066, Tokens per sec: 80370.48160382155, Loss: 2.2592129707336426 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21489 , TFLOPS: 96.66672408707706, Tokens per sec: 78988.86867440496, Loss: 2.2671570777893066 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21490 , TFLOPS: 97.71780298927115, Tokens per sec: 79847.73230256577, Loss: 2.257542133331299 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21491 , TFLOPS: 96.99801707069709, Tokens per sec: 79259.57669956099, Loss: 2.2495510578155518 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21492 , TFLOPS: 97.24615421051375, Tokens per sec: 79462.3359441234, Loss: 2.259248971939087 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21493 , TFLOPS: 98.39433150857622, Tokens per sec: 80400.54117108334, Loss: 2.2623274326324463 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21494 , TFLOPS: 97.15477789633651, Tokens per sec: 79387.67000557361, Loss: 2.211467981338501 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21495 , TFLOPS: 95.95895018934013, Tokens per sec: 78410.52840284312, Loss: 2.2496118545532227 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21496 , TFLOPS: 98.41784735967346, Tokens per sec: 80419.75657836677, Loss: 2.231121063232422 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21497 , TFLOPS: 95.95064809940196, Tokens per sec: 78403.74455143984, Loss: 2.250826358795166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21498 , TFLOPS: 98.44219178235022, Tokens per sec: 80439.64903281713, Loss: 2.265886068344116 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21499 , TFLOPS: 96.46424949689516, Tokens per sec: 78823.4214746077, Loss: 2.255512237548828 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21500 , TFLOPS: 96.55958186729023, Tokens per sec: 78901.32000853105, Loss: 2.26222825050354 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21501 , TFLOPS: 97.06699046235929, Tokens per sec: 79315.93663342122, Loss: 2.243229389190674 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21502 , TFLOPS: 96.3459196554092, Tokens per sec: 78726.73111504842, Loss: 2.2389566898345947 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21503 , TFLOPS: 97.80043993334431, Tokens per sec: 79915.25707683215, Loss: 2.256441831588745 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21504 , TFLOPS: 97.5250123766748, Tokens per sec: 79690.19813014142, Loss: 2.248905658721924 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21505 , TFLOPS: 96.57423784135949, Tokens per sec: 78913.29578222138, Loss: 2.261678695678711 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21506 , TFLOPS: 97.10592634948618, Tokens per sec: 79347.75214909151, Loss: 2.2626404762268066 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21507 , TFLOPS: 97.79617754045354, Tokens per sec: 79911.77416587733, Loss: 2.2313976287841797 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21508 , TFLOPS: 97.01845525913764, Tokens per sec: 79276.27726946154, Loss: 2.2609071731567383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21509 , TFLOPS: 96.67095158674238, Tokens per sec: 78992.32307320701, Loss: 2.252833127975464 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21510 , TFLOPS: 97.2652374868569, Tokens per sec: 79477.92938047065, Loss: 2.2557411193847656 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21511 , TFLOPS: 97.02466783424512, Tokens per sec: 79281.35372450778, Loss: 2.2531001567840576 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21512 , TFLOPS: 97.71764251405246, Tokens per sec: 79847.60117413358, Loss: 2.256805896759033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21513 , TFLOPS: 97.74034065146535, Tokens per sec: 79866.1484065157, Loss: 2.2656145095825195 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21514 , TFLOPS: 95.94367586176133, Tokens per sec: 78398.04735658232, Loss: 2.2730820178985596 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21515 , TFLOPS: 97.18064289554071, Tokens per sec: 79408.80496224767, Loss: 2.2460711002349854 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21516 , TFLOPS: 97.06846528146212, Tokens per sec: 79317.14174607537, Loss: 2.254267454147339 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21517 , TFLOPS: 97.96798819634012, Tokens per sec: 80052.16507560201, Loss: 2.250993251800537 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21518 , TFLOPS: 96.08357162929232, Tokens per sec: 78512.35978946932, Loss: 2.25065016746521 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21519 , TFLOPS: 96.68803457406203, Tokens per sec: 79006.28202190112, Loss: 2.255986213684082 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21520 , TFLOPS: 97.67106344925038, Tokens per sec: 79809.54022123202, Loss: 2.2812421321868896 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21521 , TFLOPS: 97.7547778859233, Tokens per sec: 79877.94544243877, Loss: 2.2530570030212402 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21522 , TFLOPS: 97.80965444167165, Tokens per sec: 79922.78648878896, Loss: 2.2542214393615723 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21523 , TFLOPS: 97.13174549788194, Tokens per sec: 79368.8496398897, Loss: 2.260059356689453 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21524 , TFLOPS: 97.87046335811708, Tokens per sec: 79972.4750197773, Loss: 2.2629024982452393 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21525 , TFLOPS: 97.76562818256807, Tokens per sec: 79886.81149913868, Loss: 2.2725484371185303 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21526 , TFLOPS: 97.85523445516108, Tokens per sec: 79960.03109114511, Loss: 2.2607641220092773 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21527 , TFLOPS: 96.73997511282114, Tokens per sec: 79048.72397318961, Loss: 2.2559561729431152 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21528 , TFLOPS: 97.62765062667663, Tokens per sec: 79774.06648636151, Loss: 2.2492761611938477 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21529 , TFLOPS: 97.70636957254753, Tokens per sec: 79838.38976343858, Loss: 2.2413601875305176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21530 , TFLOPS: 97.12409587770173, Tokens per sec: 79362.59893832171, Loss: 2.2405953407287598 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21531 , TFLOPS: 98.40231876292631, Tokens per sec: 80407.06776222303, Loss: 2.256131887435913 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21532 , TFLOPS: 96.32079226865359, Tokens per sec: 78706.19888049393, Loss: 2.2396512031555176 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21533 , TFLOPS: 96.03344242938157, Tokens per sec: 78471.39792978183, Loss: 2.275418996810913 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21534 , TFLOPS: 97.76069150855034, Tokens per sec: 79882.77761571751, Loss: 2.2696633338928223 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21535 , TFLOPS: 97.31106019299804, Tokens per sec: 79515.37229323995, Loss: 2.245631456375122 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21536 , TFLOPS: 97.75266173446457, Tokens per sec: 79876.21628060724, Loss: 2.267932891845703 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21537 , TFLOPS: 96.71475288455244, Tokens per sec: 79028.11424119333, Loss: 2.2500624656677246 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21538 , TFLOPS: 95.94369592184742, Tokens per sec: 78398.06374819505, Loss: 2.253936529159546 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21539 , TFLOPS: 97.29532661631632, Tokens per sec: 79502.51598271503, Loss: 2.252652168273926 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21540 , TFLOPS: 97.34118183279125, Tokens per sec: 79539.98546051497, Loss: 2.2492096424102783 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21541 , TFLOPS: 97.2322443654341, Tokens per sec: 79450.96985163756, Loss: 2.240448236465454 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21542 , TFLOPS: 97.50296114325582, Tokens per sec: 79672.17949966554, Loss: 2.2515053749084473 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21543 , TFLOPS: 96.57634406904388, Tokens per sec: 78915.01683508142, Loss: 2.231192111968994 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21544 , TFLOPS: 97.64931616175906, Tokens per sec: 79791.76995279787, Loss: 2.247148275375366 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21545 , TFLOPS: 98.4116947883841, Tokens per sec: 80414.72915398495, Loss: 2.2592735290527344 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21546 , TFLOPS: 97.75902312375989, Tokens per sec: 79881.41433555717, Loss: 2.2629616260528564 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21547 , TFLOPS: 96.37984949138969, Tokens per sec: 78754.45605745974, Loss: 2.2759816646575928 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21548 , TFLOPS: 97.77586777564409, Tokens per sec: 79895.17853423169, Loss: 2.253215789794922 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21549 , TFLOPS: 97.7741176196918, Tokens per sec: 79893.74843675004, Loss: 2.2362914085388184 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21550 , TFLOPS: 95.9198093656367, Tokens per sec: 78378.5454282206, Loss: 2.259782314300537 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21551 , TFLOPS: 97.10386392430135, Tokens per sec: 79346.06688838144, Loss: 2.2453248500823975 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21552 , TFLOPS: 97.2510918557329, Tokens per sec: 79466.37062113828, Loss: 2.2542309761047363 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21553 , TFLOPS: 96.36954578873373, Tokens per sec: 78746.03663677866, Loss: 2.258485794067383 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21554 , TFLOPS: 97.6809213140962, Tokens per sec: 79817.59533636154, Loss: 2.240035057067871 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21555 , TFLOPS: 96.18461375400935, Tokens per sec: 78594.92391063129, Loss: 2.2686269283294678 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21556 , TFLOPS: 97.33224893970403, Tokens per sec: 79532.6861636203, Loss: 2.269089460372925 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21557 , TFLOPS: 96.50418978480278, Tokens per sec: 78856.05771201149, Loss: 2.2547318935394287 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21558 , TFLOPS: 97.81270538561442, Tokens per sec: 79925.27949362287, Loss: 2.259429454803467 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21559 , TFLOPS: 97.0322704336281, Tokens per sec: 79287.56600417191, Loss: 2.2564449310302734 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21560 , TFLOPS: 97.76506901353024, Tokens per sec: 79886.35458772359, Loss: 2.253573417663574 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21561 , TFLOPS: 97.20648055787275, Tokens per sec: 79429.91758125978, Loss: 2.2620553970336914 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21562 , TFLOPS: 97.55584700472677, Tokens per sec: 79715.39389847663, Loss: 2.24658465385437 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21563 , TFLOPS: 97.06746878603415, Tokens per sec: 79316.3274840091, Loss: 2.241938591003418 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21564 , TFLOPS: 96.63029656326887, Tokens per sec: 78959.10280697342, Loss: 2.232093572616577 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21565 , TFLOPS: 97.13229262742578, Tokens per sec: 79369.29671352434, Loss: 2.2821269035339355 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21566 , TFLOPS: 96.61508615312039, Tokens per sec: 78946.67398929028, Loss: 2.271958589553833 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21567 , TFLOPS: 98.38881502230039, Tokens per sec: 80396.03350814038, Loss: 2.250244379043579 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21568 , TFLOPS: 96.62913851622505, Tokens per sec: 78958.15653692312, Loss: 2.2655508518218994 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21569 , TFLOPS: 98.36152976135669, Tokens per sec: 80373.73801903818, Loss: 2.270930528640747 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21570 , TFLOPS: 96.69879310065075, Tokens per sec: 79015.07309092587, Loss: 2.2505977153778076 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21571 , TFLOPS: 95.84729912168973, Tokens per sec: 78319.29544131181, Loss: 2.2423486709594727 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21572 , TFLOPS: 97.7993190596102, Tokens per sec: 79914.34118205014, Loss: 2.2459375858306885 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21573 , TFLOPS: 96.893017900101, Tokens per sec: 79173.77917434782, Loss: 2.2494394779205322 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21574 , TFLOPS: 97.03612199210403, Tokens per sec: 79290.71321175054, Loss: 2.263885974884033 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21575 , TFLOPS: 96.76699565029386, Tokens per sec: 79070.8031499291, Loss: 2.2555959224700928 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21576 , TFLOPS: 97.22422574331637, Tokens per sec: 79444.41762908747, Loss: 2.270508050918579 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21577 , TFLOPS: 98.407602524536, Tokens per sec: 80411.38525985043, Loss: 2.252322196960449 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21578 , TFLOPS: 97.85329894167808, Tokens per sec: 79958.44953325359, Loss: 2.23738431930542 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21579 , TFLOPS: 97.71647177554348, Tokens per sec: 79846.64453356041, Loss: 2.25179386138916 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21580 , TFLOPS: 97.41785408579698, Tokens per sec: 79602.63633216533, Loss: 2.2655489444732666 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21581 , TFLOPS: 97.82168929938098, Tokens per sec: 79932.62048082806, Loss: 2.264932870864868 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21582 , TFLOPS: 96.9840497416272, Tokens per sec: 79248.16363542696, Loss: 2.2544655799865723 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21583 , TFLOPS: 97.75896868609497, Tokens per sec: 79881.36985313983, Loss: 2.243990421295166 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21584 , TFLOPS: 96.99189096575974, Tokens per sec: 79254.57090151671, Loss: 2.2388036251068115 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21585 , TFLOPS: 97.07431386357601, Tokens per sec: 79321.92077307652, Loss: 2.252859115600586 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21586 , TFLOPS: 97.75983185158017, Tokens per sec: 79882.07516787764, Loss: 2.2636499404907227 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 21587 , TFLOPS: 96.51602238363873, Tokens per sec: 78865.72643311853, Loss: 2.2407925128936768 diff --git a/wandb/run-20240829_195743-wevlcym0/files/requirements.txt b/wandb/run-20240829_195743-wevlcym0/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..34a2774e444bdc395654ccf8ce6da6833c7bc1ee --- /dev/null +++ b/wandb/run-20240829_195743-wevlcym0/files/requirements.txt @@ -0,0 +1,375 @@ +absl-py==2.1.0 +accelerate==0.23.0 +aiohttp==3.9.1 +aiosignal==1.3.1 +annotated-types==0.6.0 +antlr4-python3-runtime==4.9.3 +anyio==4.4.0 +apex==0.1 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==23.1.0 +astroid==3.2.4 +asttokens==2.4.1 +astunparse==1.6.3 +async-timeout==4.0.3 +attrs==23.2.0 +audioread==3.0.1 +beautifulsoup4==4.12.3 +bert-score==0.3.13 +bleach==6.1.0 +blis==0.7.11 +build==1.2.1 +cachecontrol==0.14.0 +cachetools==5.3.2 +catalogue==2.0.10 +certifi==2024.2.2 +cffi==1.16.0 +chardet==5.2.0 +charset-normalizer==3.3.2 +cleo==2.1.0 +click==8.1.7 +cloudpathlib==0.16.0 +cloudpickle==3.0.0 +cmake==3.28.1 +colorama==0.4.6 +comm==0.2.1 +confection==0.1.4 +contourpy==1.2.0 +cramjam==2.8.3 +crashtest==0.4.1 +cryptography==43.0.0 +cubinlinker==0.3.0+2.g405ac64 +cuda-python==12.3.0rc4+9.gdb8c48a.dirty +cudf==23.12.0 +cugraph-dgl==23.12.0 +cugraph-service-client==23.12.0 +cugraph-service-server==23.12.0 +cugraph==23.12.0 +cuml==23.12.0 +cupy-cuda12x==12.3.0 +cycler==0.12.1 +cymem==2.0.8 +cython==3.0.8 +dask-cuda==23.12.0 +dask-cudf==23.12.0 +dask==2023.11.0 +dataclasses-json==0.6.7 +dataproperty==1.0.1 +datasets==2.20.0 +debugpy==1.8.1 +decorator==5.1.1 +defusedxml==0.7.1 +dill==0.3.8 +distlib==0.3.8 +distributed==2023.11.0 +distro==1.9.0 +dm-tree==0.1.8 +docker-pycreds==0.4.0 +dulwich==0.21.7 +einops==0.7.0 +emoji==2.12.1 +entmax==1.3 +evaluate==0.4.2 +exceptiongroup==1.2.0 +execnet==2.0.2 +executing==2.0.1 +expecttest==0.1.3 +fastjsonschema==2.19.1 +fastparquet==2023.10.1 +fastrlock==0.8.2 +filelock==3.13.1 +flash-attn==2.4.2 +fonttools==4.48.1 +frozenlist==1.4.1 +fsspec==2023.12.2 +fugashi==1.3.2 +fuzzywuzzy==0.18.0 +gast==0.5.4 +gitdb==4.0.11 +gitpython==3.1.43 +google-auth-oauthlib==0.4.6 +google-auth==2.27.0 +graphsurgeon==0.4.6 +greenlet==3.0.3 +grpcio==1.60.1 +h11==0.14.0 +httpcore==1.0.5 +httpx==0.27.0 +huggingface-hub==0.24.5 +hydra-core==1.3.2 +hypothesis==5.35.1 +idna==3.6 +importlib-metadata==7.0.1 +iniconfig==2.0.0 +installer==0.7.0 +intel-openmp==2021.4.0 +ipadic==1.0.0 +ipykernel==6.29.2 +ipython-genutils==0.2.0 +ipython==8.21.0 +isort==5.13.2 +jaraco.classes==3.4.0 +jedi==0.19.1 +jeepney==0.8.0 +jinja2==3.1.3 +jiter==0.5.0 +joblib==1.3.2 +json5==0.9.14 +jsonargparse==3.13.1 +jsonlines==4.0.0 +jsonnet==0.19.1 +jsonpatch==1.33 +jsonpointer==3.0.0 +jsonschema-specifications==2023.12.1 +jsonschema==4.21.1 +jupyter-client==8.6.0 +jupyter-core==5.7.1 +jupyter-tensorboard==0.2.0 +jupyterlab-pygments==0.3.0 +jupyterlab-server==1.2.0 +jupyterlab==2.3.2 +jupytext==1.16.1 +keyring==24.3.1 +kiwisolver==1.4.5 +langchain-community==0.2.12 +langchain-core==0.2.31 +langchain-huggingface==0.0.2 +langchain-openai==0.1.21 +langchain-text-splitters==0.2.2 +langchain==0.2.13 +langcodes==3.3.0 +langsmith==0.1.99 +lazy-loader==0.3 +levenshtein==0.25.1 +librosa==0.10.1 +lightning-utilities==0.11.6 +llm-jp-eval==1.4.0 +llvmlite==0.40.1 +lm-eval==0.3.0 +locket==1.0.0 +logzero==1.7.0 +lxml==5.2.2 +markdown-it-py==3.0.0 +markdown==3.5.2 +markupsafe==2.1.4 +marshmallow==3.21.3 +matplotlib-inline==0.1.6 +matplotlib==3.8.2 +mbstrdecoder==1.1.3 +mccabe==0.7.0 +mdit-py-plugins==0.4.0 +mdurl==0.1.2 +mecab-python3==1.0.6 +mistune==3.0.2 +mkl-devel==2021.1.1 +mkl-include==2021.1.1 +mkl==2021.1.1 +mock==5.1.0 +mojimoji==0.0.13 +more-itertools==9.1.0 +mpmath==1.3.0 +msgpack==1.0.7 +multidict==6.0.4 +multiprocess==0.70.16 +murmurhash==1.0.10 +mypy-extensions==1.0.0 +nbclient==0.9.0 +nbconvert==7.16.0 +nbformat==5.9.2 +neologdn==0.5.3 +nest-asyncio==1.6.0 +networkx==2.6.3 +ninja==1.11.1.1 +nltk==3.8.1 +notebook==6.4.10 +numba==0.57.1+1.g1ff679645 +numexpr==2.10.1 +numpy==1.24.4 +nvfuser==0.1.4a0+d0bb811 +nvidia-dali-cuda120==1.34.0 +nvidia-pyindex==1.0.9 +nvtx==0.2.5 +oauthlib==3.2.2 +omegaconf==2.3.0 +onnx==1.15.0rc2 +openai==1.40.6 +opencv==4.7.0 +optree==0.10.0 +orjson==3.10.7 +packaging==23.2 +pandas==2.2.2 +pandocfilters==1.5.1 +parso==0.8.3 +partd==1.4.1 +pathvalidate==3.2.0 +peft==0.5.0 +pexpect==4.9.0 +pillow==10.2.0 +pip==24.0 +pkginfo==1.11.1 +plac==1.4.3 +platformdirs==4.2.0 +pluggy==1.4.0 +ply==3.11 +poetry-core==1.9.0 +poetry-plugin-export==1.8.0 +poetry==1.8.3 +polygraphy==0.49.4 +pooch==1.8.0 +portalocker==2.10.1 +preshed==3.0.9 +prettytable==3.9.0 +prometheus-client==0.19.0 +prompt-toolkit==3.0.43 +protobuf==4.24.4 +psutil==5.9.4 +ptxcompiler==0.8.1+2.g0d406d6 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow-hotfix==0.6 +pyarrow==15.0.2 +pyasn1-modules==0.3.0 +pyasn1==0.5.1 +pybind11-global==2.11.1 +pybind11==2.11.1 +pycocotools==2.0+nv0.8.0 +pycountry==24.6.1 +pycparser==2.21 +pydantic-core==2.16.2 +pydantic==2.6.1 +pygments==2.17.2 +pylibcugraph==23.12.0 +pylibcugraphops==23.12.0 +pylibraft==23.12.0 +pylint==3.2.6 +pynvml==11.4.1 +pyparsing==3.1.1 +pyproject-hooks==1.1.0 +pytablewriter==1.2.0 +pytest-flakefinder==1.1.0 +pytest-rerunfailures==13.0 +pytest-shard==0.1.2 +pytest-xdist==3.5.0 +pytest==8.0.0 +python-dateutil==2.8.2 +python-dotenv==1.0.0 +python-hostlist==1.23.0 +python-levenshtein==0.25.1 +pytorch-lightning==2.4.0 +pytorch-quantization==2.1.2 +pytz==2023.3.post1 +pyyaml==6.0.1 +pyzmq==25.1.2 +raft-dask==23.12.0 +rapidfuzz==3.9.6 +rapids-dask-dependency==23.12.1 +referencing==0.33.0 +regex==2023.12.25 +requests-oauthlib==1.3.1 +requests-toolbelt==1.0.0 +requests==2.32.3 +rhoknp==1.7.0 +rich==13.7.0 +rmm==23.12.0 +rouge-score==0.1.2 +rpds-py==0.17.1 +rsa==4.9 +sacrebleu==2.4.2 +safetensors==0.4.3 +scikit-learn==1.5.1 +scipy==1.12.0 +secretstorage==3.3.3 +send2trash==1.8.2 +sentence-transformers==3.0.1 +sentencepiece==0.1.99 +sentry-sdk==2.12.0 +setproctitle==1.3.3 +setuptools==68.2.2 +shellingham==1.5.4 +six==1.16.0 +smart-open==6.4.0 +smmap==5.0.1 +sniffio==1.3.1 +sortedcontainers==2.4.0 +soundfile==0.12.1 +soupsieve==2.5 +soxr==0.3.7 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 +spacy==3.7.2 +sphinx-glpi-theme==0.6 +sqlalchemy==2.0.32 +sqlitedict==2.1.0 +srsly==2.4.8 +stack-data==0.6.3 +sumeval==0.2.2 +sympy==1.12 +tabledata==1.3.3 +tabulate==0.9.0 +tbb==2021.11.0 +tblib==3.0.0 +tcolorpy==0.1.6 +tenacity==8.5.0 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.9.0 +tensorrt==8.6.3 +terminado==0.18.0 +termplotlib==0.3.9 +text-generation==0.7.0 +thinc==8.2.3 +threadpoolctl==3.2.0 +thriftpy2==0.4.17 +tiktoken==0.7.0 +tinycss2==1.2.1 +tokenizers==0.19.1 +toml==0.10.2 +tomli==2.0.1 +tomlkit==0.13.2 +toolz==0.12.1 +torch-tensorrt==2.3.0a0 +torch==2.3.0a0+ebedce2 +torchdata==0.7.1a0 +torchmetrics==0.10.3 +torchtext==0.17.0a0 +torchvision==0.18.0a0 +tornado==6.4 +tqdm-multiprocess==0.0.11 +tqdm==4.66.5 +traitlets==5.9.0 +transformer-engine==1.3.0+5b90b7f +transformers==4.43.3 +treelite-runtime==3.9.1 +treelite==3.9.1 +triton==2.2.0+e28a256 +trove-classifiers==2024.7.2 +typepy==1.3.2 +typer==0.9.0 +types-dataclasses==0.6.6 +typing-extensions==4.12.2 +typing-inspect==0.9.0 +tzdata==2024.1 +ucx-py==0.35.0 +uff==0.6.9 +ujson==5.8.0 +unbabel-comet==2.2.2 +unidic-lite==1.0.8 +urllib3==1.26.18 +virtualenv==20.26.3 +wandb==0.16.3 +wasabi==1.1.2 +wcwidth==0.2.13 +weasel==0.3.4 +webencodings==0.5.1 +werkzeug==3.0.1 +wheel==0.42.0 +word2number==1.1 +xdoctest==1.0.2 +xgboost==1.7.6 +xmltodict==0.13.0 +xxhash==3.4.1 +yarl==1.9.4 +zict==3.0.0 +zipp==3.17.0 +zstandard==0.23.0 \ No newline at end of file diff --git a/wandb/run-20240829_195743-wevlcym0/files/wandb-metadata.json b/wandb/run-20240829_195743-wevlcym0/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..083a2868115335f2568d877a22796cdf8222897b --- /dev/null +++ b/wandb/run-20240829_195743-wevlcym0/files/wandb-metadata.json @@ -0,0 +1,880 @@ +{ + "os": "Linux-5.15.0-91-generic-x86_64-with-glibc2.35", + "python": "3.10.12", + "heartbeatAt": "2024-08-29T10:57:44.460225", + "startedAt": "2024-08-29T10:57:43.769221", + "docker": null, + "cuda": null, + "args": [ + "--seq-length", + "1024", + "--sliding-window-size", + "131072", + "--micro-batch-size", + "16", + "--valid_micro_batch_size", + "1", + "--global-batch-size", + "1280", + "--train-iters", + "23178", + "--tokenizer-type", + "HFPreTrainedTokenizer", + "--tokenizer-model", + "/share/pretrained_lm/Qwen/Qwen2-1.5B", + "--train-data-path", + "1754785366", + "/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document", + "28623823675", + "/project/datas/llm-jp-corpus-v2/ja-cc/level0/data_text_document", + "--valid-data-path", + "1205770", + "/work/llm_recipes/datasets/bin/baseline/llm_jp_corpus_v2_ja_wiki_validation_0/data_text_document", + "--test-data-path", + "1205770", + "/work/llm_recipes/datasets/bin/baseline/llm_jp_corpus_v2_ja_wiki_validation_0/data_text_document", + "--lr", + "3.5e-5", + "--min-lr", + "3.5e-6", + "--lr-decay-style", + "cosine", + "--lr-warmup-iters", + "500", + "--lr-decay-iters", + "23178", + "--weight-decay", + "0.1", + "--grad-clip-norm", + "1.0", + "--optimizer", + "anyprecision", + "--adam-beta1", + "0.9", + "--adam-beta2", + "0.95", + "--adam-eps", + "1e-8", + "--save-interval", + "200", + "--eval-interval", + "200", + "--eval-iters", + "10", + "--bf16", + "--mixed-precision", + "--base-model", + "/share/pretrained_lm/Qwen/Qwen2-1.5B", + "--save", + "/work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5", + "--load", + "/work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5", + "--fsdp-activation-checkpointing", + "--num-workers", + "4", + "--sharding-strategy", + "FULL_SHARD", + "--checkpoint-type", + "LOCAL_STATE_DICT", + "--save-n-checkpoints", + "10", + "--upload-all-checkpoints-to-hf", + "--hf-upload-retry-limit", + "2", + "--hf-repo-id", + "koichi12/yans-baseline-qwen2-1.5B-3.5e-5", + "--wandb-entity", + "iwakawa-koichi-q5-tohoku-nlp6723", + "--wandb-project", + "yans_experiment", + "--wandb-name", + "yans-baseline-qwen2-1.5B-3.5e-5_train_2024-08-29-19:57:17" + ], + "state": "running", + "program": "/project/examples/finetuning.py", + "codePathLocal": "examples/finetuning.py", + "codePath": "examples/finetuning.py", + "git": { + "remote": "https://github.com/cl-tohoku/llm-recipes-failab-m1-yans.git", + "commit": "887a2cc5d104c10264701f95cbbb0a6a116768d6" + }, + "email": null, + "root": "/project", + "host": "gpu-koiwa-00", + "username": "koiwa", + "executable": "/usr/bin/python", + "cpu_count": 144, + "cpu_count_logical": 144, + "cpu_freq": { + "current": 2400.0389999999943, + "min": 0.0, + "max": 0.0 + }, + "cpu_freq_per_core": [ + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.039, + "min": 0.0, + "max": 0.0 + } + ], + "disk": { + "/": { + "total": 0.0625, + "used": 1.1444091796875e-05 + } + }, + "gpu": "NVIDIA A100-SXM4-40GB", + "gpu_count": 8, + "gpu_devices": [ + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + }, + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + }, + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + }, + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + }, + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + }, + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + }, + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + }, + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + } + ], + "memory": { + "total": 453.4449653625488 + } +} diff --git a/wandb/run-20240829_195743-wevlcym0/files/wandb-summary.json b/wandb/run-20240829_195743-wevlcym0/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..8c247523d808507d1a49aad252923e08497863ef --- /dev/null +++ b/wandb/run-20240829_195743-wevlcym0/files/wandb-summary.json @@ -0,0 +1 @@ +{"training/loss": 2.2636499404907227, "training/perplexity": 9.618130782945018, "utils/batch_size": 16, "utils/global_batch_size": 1280, "utils/seq_len": 1025, "utils/gradient_accumulation_steps": 10, "utils/iteration": 21586, "optimizer/lr": 3.881482675671578e-06, "optimizer/variance_l2": 0.0003015717535799468, "optimizer/variance_sqrt_l2": 0.15141626608347175, "optimizer/momentum_l2": 0.034602634223417625, "optimizer/weight_l2": 1232.9854013734307, "optimizer/variance_l1": 0.022857666015625, "optimizer/variance_sqrt_l1": 935.375, "optimizer/momentum_l1": 174.203125, "optimizer/weight_l1": 3161088.0, "optimizer/variance_abs_max": 0.00017452239990234375, "optimizer/variance_sqrt_abs_max": 0.01318359375, "optimizer/momentum_abs_max": 0.002777099609375, "optimizer/weight_abs_max": 432.0, "stats/1_iteration_time": 16.42421027799719, "stats/tokens_per_sec": 79882.07516787764, "stats/tokens_per_sec_per_gpu": 9985.259395984705, "stats/tflops": 97.75983185158017, "_timestamp": 1725058775.1049252, "_runtime": 129711.30785012245, "_step": 21586, "evaluation/val_loss": 2.027616262435913, "evaluation/val_ppl": 7.595958232879639} \ No newline at end of file diff --git a/wandb/run-20240829_195743-wevlcym0/logs/debug.log b/wandb/run-20240829_195743-wevlcym0/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..f1d872a4500f6c48a5857140a2a9d8120f739f58 --- /dev/null +++ b/wandb/run-20240829_195743-wevlcym0/logs/debug.log @@ -0,0 +1,29 @@ +2024-08-29 19:57:43,788 INFO MainThread:16572 [wandb_setup.py:_flush():76] Current SDK version is 0.16.3 +2024-08-29 19:57:43,789 INFO MainThread:16572 [wandb_setup.py:_flush():76] Configure stats pid to 16572 +2024-08-29 19:57:43,789 INFO MainThread:16572 [wandb_setup.py:_flush():76] Loading settings from /singularity_home/.config/wandb/settings +2024-08-29 19:57:43,789 INFO MainThread:16572 [wandb_setup.py:_flush():76] Loading settings from /project/wandb/settings +2024-08-29 19:57:43,789 INFO MainThread:16572 [wandb_setup.py:_flush():76] Loading settings from environment variables: {'api_key': '***REDACTED***', 'run_notes': 'Train sample'} +2024-08-29 19:57:43,789 INFO MainThread:16572 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False} +2024-08-29 19:57:43,789 INFO MainThread:16572 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'examples/finetuning.py', 'program_abspath': '/project/examples/finetuning.py', 'program': '/project/examples/finetuning.py'} +2024-08-29 19:57:43,789 INFO MainThread:16572 [wandb_init.py:_log_setup():526] Logging user logs to /project/wandb/run-20240829_195743-wevlcym0/logs/debug.log +2024-08-29 19:57:43,789 INFO MainThread:16572 [wandb_init.py:_log_setup():527] Logging internal logs to /project/wandb/run-20240829_195743-wevlcym0/logs/debug-internal.log +2024-08-29 19:57:43,789 INFO MainThread:16572 [wandb_init.py:init():566] calling init triggers +2024-08-29 19:57:43,789 INFO MainThread:16572 [wandb_init.py:init():573] wandb.init called with sweep_config: {} +config: {'sharding_strategy': 'FULL_SHARD', 'checkpoint_type': 'LOCAL_STATE_DICT', 'fsdp_activation_checkpointing': True, 'fsdp_cpu_offload': False, 'low_cpu_fsdp': False, 'no_meta_device': False, 'data_path': None, 'split': '969, 30, 1', 'train_data_path': ['1754785366', '/project/datas/llm-jp-corpus-v2/ja-wiki/data/data_text_document', '28623823675', '/project/datas/llm-jp-corpus-v2/ja-cc/level0/data_text_document'], 'valid_data_path': ['1205770', '/work/llm_recipes/datasets/bin/baseline/llm_jp_corpus_v2_ja_wiki_validation_0/data_text_document'], 'test_data_path': ['1205770', '/work/llm_recipes/datasets/bin/baseline/llm_jp_corpus_v2_ja_wiki_validation_0/data_text_document'], 'data_cache_path': None, 'vocab_size': None, 'vocab_file': None, 'merge_file': None, 'seq_length': 1024, 'num_workers': 4, 'tokenizer_type': 'HFPreTrainedTokenizer', 'tokenizer_model': '/share/pretrained_lm/Qwen/Qwen2-1.5B', 'reset_position_ids': False, 'reset_attention_mask': False, 'eod_mask_loss': False, 'retro_return_doc_ids': False, 'short_seq_prob': 0.1, 'vocab_extra_ids': 0, 'seed': 1234, 'use_mpi': False, 'wandb_entity': 'iwakawa-koichi-q5-tohoku-nlp6723', 'wandb_name': 'yans-baseline-qwen2-1.5B-3.5e-5_train_2024-08-29-19:57:17', 'wandb_project': 'yans_experiment', 'quantization': False, 'use_freeze_layers': False, 'freeze_layers': None, 'bf16': True, 'fp16': False, 'mixed_precision': True, 'param_dtype': None, 'load': '/work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5', 'save': '/work/llm_recipes/models/yans-baseline-qwen2-1.5B-3.5e-5', 'base_model': '/share/pretrained_lm/Qwen/Qwen2-1.5B', 'use_better_transformer': False, 'grad_clip_norm': 1.0, 'eval_interval': 200, 'save_interval': 200, 'eval_iters': 10, 'optimizer': 'anyprecision', 'lr': 3.5e-05, 'lr_decay_style': 'cosine', 'lr_decay_iters': 23178, 'lr_warmup_iters': 500, 'min_lr': 3.5e-06, 'train_iters': 23178, 'train_samples': None, 'global_batch_size': 1280, 'micro_batch_size': 16, 'make_vocab_size_divisible_by': 128, 'sliding_window_size': 131072, 'skip_batch': None, 'no_save_optimizer_state': False, 'continual_pretraining': False, 'instruction_tuning': False, 'direct_preference_optimization': False, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'weight_decay': 0.1, 'adam_beta1': 0.9, 'adam_beta2': 0.95, 'adam_eps': 1e-08, 'hf_transformer_model_dir': None, 'instruction_train_data_path': None, 'instruction_valid_data_path': None, 'epoch': None, 'instruction_dataset_size': None, 'save_sampler_state': False, 'label_smoothing': 0.0, 'save_n_checkpoints': 10, 'hf_repo_id': 'koichi12/yans-baseline-qwen2-1.5B-3.5e-5', 'create_public_hf_repo': False, 'upload_all_checkpoints_to_hf': True, 'hf_upload_retry_limit': 2, 'exit_duration_in_mins': None, 'source_key': None, 'target_key': None, 'attn_implementation': 'flash_attention_2', 'efficient_instruction_tuning': False, 'remove_padding_masking': False, 'save_start_iter': None, 'valid_micro_batch_size': 1, 'rank': 0, 'world_size': 8, 'padded_vocab_size': 151680, 'gradient_accumulation_steps': 10} +2024-08-29 19:57:43,790 INFO MainThread:16572 [wandb_init.py:init():616] starting backend +2024-08-29 19:57:43,790 INFO MainThread:16572 [wandb_init.py:init():620] setting up manager +2024-08-29 19:57:43,795 INFO MainThread:16572 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-08-29 19:57:43,796 INFO MainThread:16572 [wandb_init.py:init():628] backend started and connected +2024-08-29 19:57:43,804 INFO MainThread:16572 [wandb_init.py:init():720] updated telemetry +2024-08-29 19:57:43,901 INFO MainThread:16572 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout +2024-08-29 19:57:44,315 INFO MainThread:16572 [wandb_run.py:_on_init():2262] communicating current version +2024-08-29 19:57:44,338 INFO MainThread:16572 [wandb_run.py:_on_init():2271] got version response upgrade_message: "wandb version 0.17.8 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2024-08-29 19:57:44,339 INFO MainThread:16572 [wandb_init.py:init():804] starting run threads in backend +2024-08-29 19:57:44,495 INFO MainThread:16572 [wandb_run.py:_console_start():2241] atexit reg +2024-08-29 19:57:44,495 INFO MainThread:16572 [wandb_run.py:_redirect():2096] redirect: wrap_raw +2024-08-29 19:57:44,496 INFO MainThread:16572 [wandb_run.py:_redirect():2161] Wrapping output streams. +2024-08-29 19:57:44,496 INFO MainThread:16572 [wandb_run.py:_redirect():2186] Redirects installed. +2024-08-29 19:57:44,497 INFO MainThread:16572 [wandb_init.py:init():847] run started, returning control to user process +2024-08-29 19:58:45,570 INFO MainThread:16572 [wandb_run.py:_config_callback():1343] config_cb None None {'model_architecture': 'Qwen2ForCausalLM', 'activation_function': 'silu', 'hidden_size': 1536, 'model_type': 'qwen2', 'max_position_embeddings': 1024, 'num_attention_heads': 12, 'num_hidden_layers': 28} +2024-08-29 19:58:45,571 INFO MainThread:16572 [wandb_run.py:_config_callback():1343] config_cb None None {'world_size': 8} diff --git a/wandb/run-20241101_034132-tssjv11t/logs/debug-internal.log b/wandb/run-20241101_034132-tssjv11t/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..e588bc4190444e1e3048d8b196bdf924d06588fe --- /dev/null +++ b/wandb/run-20241101_034132-tssjv11t/logs/debug-internal.log @@ -0,0 +1,245 @@ +2024-11-01 03:41:32,775 INFO StreamThr :13148 [internal.py:wandb_internal():86] W&B internal server running at pid: 13148, started at: 2024-11-01 03:41:32.774010 +2024-11-01 03:41:32,776 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status +2024-11-01 03:41:32,778 INFO WriterThread:13148 [datastore.py:open_for_write():87] open: /project/wandb/run-20241101_034132-tssjv11t/run-tssjv11t.wandb +2024-11-01 03:41:32,779 DEBUG SenderThread:13148 [sender.py:send():382] send: header +2024-11-01 03:41:32,794 DEBUG SenderThread:13148 [sender.py:send():382] send: run +2024-11-01 03:41:33,234 INFO SenderThread:13148 [dir_watcher.py:__init__():211] watching files in: /project/wandb/run-20241101_034132-tssjv11t/files +2024-11-01 03:41:33,234 INFO SenderThread:13148 [sender.py:_start_run_threads():1136] run started: tssjv11t with start time 1730400092.772681 +2024-11-01 03:41:33,239 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: check_version +2024-11-01 03:41:33,239 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: check_version +2024-11-01 03:41:33,311 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: run_start +2024-11-01 03:41:33,318 DEBUG HandlerThread:13148 [system_info.py:__init__():27] System info init +2024-11-01 03:41:33,318 DEBUG HandlerThread:13148 [system_info.py:__init__():42] System info init done +2024-11-01 03:41:33,318 INFO HandlerThread:13148 [system_monitor.py:start():194] Starting system monitor +2024-11-01 03:41:33,318 INFO SystemMonitor:13148 [system_monitor.py:_start():158] Starting system asset monitoring threads +2024-11-01 03:41:33,318 INFO HandlerThread:13148 [system_monitor.py:probe():214] Collecting system info +2024-11-01 03:41:33,318 INFO SystemMonitor:13148 [interfaces.py:start():190] Started cpu monitoring +2024-11-01 03:41:33,319 INFO SystemMonitor:13148 [interfaces.py:start():190] Started disk monitoring +2024-11-01 03:41:33,319 INFO SystemMonitor:13148 [interfaces.py:start():190] Started gpu monitoring +2024-11-01 03:41:33,321 INFO SystemMonitor:13148 [interfaces.py:start():190] Started memory monitoring +2024-11-01 03:41:33,322 INFO SystemMonitor:13148 [interfaces.py:start():190] Started network monitoring +2024-11-01 03:41:33,337 DEBUG HandlerThread:13148 [system_info.py:probe():151] Probing system +2024-11-01 03:41:33,340 DEBUG HandlerThread:13148 [system_info.py:_probe_git():136] Probing git +2024-11-01 03:41:33,353 DEBUG HandlerThread:13148 [system_info.py:_probe_git():144] Probing git done +2024-11-01 03:41:33,353 DEBUG HandlerThread:13148 [system_info.py:probe():199] Probing system done +2024-11-01 03:41:33,353 DEBUG HandlerThread:13148 [system_monitor.py:probe():223] {'os': 'Linux-5.15.0-91-generic-x86_64-with-glibc2.35', 'python': '3.10.12', 'heartbeatAt': '2024-10-31T18:41:33.337959', 'startedAt': '2024-10-31T18:41:32.759487', 'docker': None, 'cuda': None, 'args': ('--seq-length', '1024', '--sliding-window-size', '131072', '--micro-batch-size', '6', '--valid_micro_batch_size', '1', '--global-batch-size', '256', '--train-iters', '698', '--tokenizer-type', 'HFPreTrainedTokenizer', '--tokenizer-model', '/share/pretrained_lm/llm-jp/llm-jp-v3-3.7b', '--train-data-path', '183224836', '/project/trans/datasets/processed_ja-en/train/documents_text_document', '--valid-data-path', '61084836', '/project/trans/datasets/processed_ja-en/valid/documents_text_document', '--test-data-path', '61084836', '/project/trans/datasets/processed_ja-en/valid/documents_text_document', '--lr', '1.5e-4', '--min-lr', '1.5e-5', '--lr-decay-style', 'cosine', '--lr-warmup-iters', '70', '--lr-decay-iters', '698', '--weight-decay', '0.1', '--grad-clip-norm', '1.0', '--optimizer', 'anyprecision', '--adam-beta1', '0.9', '--adam-beta2', '0.95', '--adam-eps', '1e-8', '--save-interval', '50', '--eval-interval', '20', '--eval-iters', '10', '--bf16', '--mixed-precision', '--base-model', '/share/pretrained_lm/llm-jp/llm-jp-v3-3.7b', '--save', '/work/llm_recipes/models/llm-jp-v3-3.7b_ja-en_actual_3M-pairs', '--load', '/work/llm_recipes/models/llm-jp-v3-3.7b_ja-en_actual_3M-pairs', '--num-workers', '4', '--fsdp-activation-checkpointing', '--sharding-strategy', 'SHARD_GRAD_OP', '--checkpoint-type', 'LOCAL_STATE_DICT', '--save-n-checkpoints', '3', '--upload-all-checkpoints-to-hf', '--hf-upload-retry-limit', '2', '--hf-repo-id', 'koichi12/llm-jp-v3-3.7b_ja-en_actual_3M-pairs', '--wandb-entity', 'iwakawa-koichi-q5-tohoku-nlp6723', '--wandb-project', 'trans_experiment', '--wandb-name', 'llm-jp-v3-3.7b_ja-en_actual_3M-pairs_train_2024-11-01-03:41:19'), 'state': 'running', 'program': '/project/examples/finetuning.py', 'codePathLocal': 'examples/finetuning.py', 'codePath': 'examples/finetuning.py', 'git': {'remote': 'https://github.com/cl-tohoku/llm-recipes-failab-m1-yans.git', 'commit': '3b2976faebe2228c39adb20194a29b785a37defe'}, 'email': None, 'root': '/project', 'host': 'gpu-koiwa-00', 'username': 'koiwa', 'executable': '/usr/bin/python', 'cpu_count': 36, 'cpu_count_logical': 36, 'cpu_freq': {'current': 2400.032000000001, 'min': 0.0, 'max': 0.0}, 'cpu_freq_per_core': [{'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}, {'current': 2400.032, 'min': 0.0, 'max': 0.0}], 'disk': {'/': {'total': 0.0625, 'used': 1.1444091796875e-05}}, 'gpu': 'NVIDIA A100-SXM4-40GB', 'gpu_count': 2, 'gpu_devices': [{'name': 'NVIDIA A100-SXM4-40GB', 'memory_total': 42949672960}, {'name': 'NVIDIA A100-SXM4-40GB', 'memory_total': 42949672960}], 'memory': {'total': 113.18244934082031}} +2024-11-01 03:41:33,353 INFO HandlerThread:13148 [system_monitor.py:probe():224] Finished collecting system info +2024-11-01 03:41:33,353 INFO HandlerThread:13148 [system_monitor.py:probe():227] Publishing system info +2024-11-01 03:41:33,354 INFO HandlerThread:13148 [system_monitor.py:probe():229] Finished publishing system info +2024-11-01 03:41:33,360 DEBUG SenderThread:13148 [sender.py:send():382] send: files +2024-11-01 03:41:33,360 INFO SenderThread:13148 [sender.py:_save_file():1403] saving file wandb-metadata.json with policy now +2024-11-01 03:41:33,372 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: python_packages +2024-11-01 03:41:33,373 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: stop_status +2024-11-01 03:41:33,373 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: internal_messages +2024-11-01 03:41:33,373 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: python_packages +2024-11-01 03:41:33,375 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: stop_status +2024-11-01 03:41:33,572 DEBUG SenderThread:13148 [sender.py:send():382] send: telemetry +2024-11-01 03:41:34,011 INFO wandb-upload_0:13148 [upload_job.py:push():131] Uploaded file /tmp/tmp02yl14_rwandb/k21mgcjl-wandb-metadata.json +2024-11-01 03:41:34,236 INFO Thread-12 :13148 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20241101_034132-tssjv11t/files/requirements.txt +2024-11-01 03:41:34,236 INFO Thread-12 :13148 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20241101_034132-tssjv11t/files/output.log +2024-11-01 03:41:34,236 INFO Thread-12 :13148 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20241101_034132-tssjv11t/files/wandb-metadata.json +2024-11-01 03:41:36,237 INFO Thread-12 :13148 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20241101_034132-tssjv11t/files/output.log +2024-11-01 03:41:38,101 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:41:40,240 INFO Thread-12 :13148 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20241101_034132-tssjv11t/files/output.log +2024-11-01 03:41:43,943 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:41:44,242 INFO Thread-12 :13148 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20241101_034132-tssjv11t/files/output.log +2024-11-01 03:41:47,639 DEBUG SenderThread:13148 [sender.py:send():382] send: config +2024-11-01 03:41:47,639 DEBUG SenderThread:13148 [sender.py:send():382] send: config +2024-11-01 03:41:48,245 INFO Thread-12 :13148 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20241101_034132-tssjv11t/files/output.log +2024-11-01 03:41:48,374 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: internal_messages +2024-11-01 03:41:48,375 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: stop_status +2024-11-01 03:41:48,375 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: stop_status +2024-11-01 03:41:49,543 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:41:50,246 INFO Thread-12 :13148 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20241101_034132-tssjv11t/files/output.log +2024-11-01 03:41:54,543 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:41:59,544 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:42:03,371 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: stop_status +2024-11-01 03:42:03,371 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: stop_status +2024-11-01 03:42:03,414 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: internal_messages +2024-11-01 03:42:04,638 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:42:05,256 INFO Thread-12 :13148 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20241101_034132-tssjv11t/files/config.yaml +2024-11-01 03:42:09,839 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:42:14,839 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:42:17,191 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: partial_history +2024-11-01 03:42:18,371 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: stop_status +2024-11-01 03:42:18,371 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: stop_status +2024-11-01 03:42:18,373 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: internal_messages +2024-11-01 03:42:20,266 INFO Thread-12 :13148 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20241101_034132-tssjv11t/files/output.log +2024-11-01 03:42:20,646 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:42:25,647 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:42:30,648 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:42:33,322 DEBUG SystemMonitor:13148 [system_monitor.py:_start():172] Starting system metrics aggregation loop +2024-11-01 03:42:33,325 DEBUG SenderThread:13148 [sender.py:send():382] send: stats +2024-11-01 03:42:33,371 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: stop_status +2024-11-01 03:42:33,371 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: stop_status +2024-11-01 03:42:33,414 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: internal_messages +2024-11-01 03:42:36,634 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:42:41,635 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:42:44,535 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: partial_history +2024-11-01 03:42:44,538 DEBUG SenderThread:13148 [sender.py:send():382] send: history +2024-11-01 03:42:44,539 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: summary_record +2024-11-01 03:42:44,541 INFO SenderThread:13148 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-11-01 03:42:45,282 INFO Thread-12 :13148 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20241101_034132-tssjv11t/files/wandb-summary.json +2024-11-01 03:42:46,282 INFO Thread-12 :13148 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20241101_034132-tssjv11t/files/output.log +2024-11-01 03:42:47,580 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:42:48,371 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: stop_status +2024-11-01 03:42:48,371 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: stop_status +2024-11-01 03:42:48,373 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: internal_messages +2024-11-01 03:42:53,545 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:42:58,545 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:43:03,330 DEBUG SenderThread:13148 [sender.py:send():382] send: stats +2024-11-01 03:43:03,371 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: stop_status +2024-11-01 03:43:03,371 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: stop_status +2024-11-01 03:43:03,414 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: internal_messages +2024-11-01 03:43:03,610 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:43:08,611 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:43:11,917 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: partial_history +2024-11-01 03:43:11,919 DEBUG SenderThread:13148 [sender.py:send():382] send: history +2024-11-01 03:43:11,919 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: summary_record +2024-11-01 03:43:11,921 INFO SenderThread:13148 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-11-01 03:43:12,299 INFO Thread-12 :13148 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20241101_034132-tssjv11t/files/wandb-summary.json +2024-11-01 03:43:13,960 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:43:14,300 INFO Thread-12 :13148 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20241101_034132-tssjv11t/files/output.log +2024-11-01 03:43:18,371 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: stop_status +2024-11-01 03:43:18,372 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: stop_status +2024-11-01 03:43:18,373 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: internal_messages +2024-11-01 03:43:19,597 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:43:24,597 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:43:29,598 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:43:33,331 DEBUG SenderThread:13148 [sender.py:send():382] send: stats +2024-11-01 03:43:33,371 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: stop_status +2024-11-01 03:43:33,372 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: stop_status +2024-11-01 03:43:33,414 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: internal_messages +2024-11-01 03:43:34,640 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:43:39,259 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: partial_history +2024-11-01 03:43:39,262 DEBUG SenderThread:13148 [sender.py:send():382] send: history +2024-11-01 03:43:39,263 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: summary_record +2024-11-01 03:43:39,265 INFO SenderThread:13148 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-11-01 03:43:39,317 INFO Thread-12 :13148 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20241101_034132-tssjv11t/files/wandb-summary.json +2024-11-01 03:43:40,304 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:43:40,317 INFO Thread-12 :13148 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20241101_034132-tssjv11t/files/output.log +2024-11-01 03:43:45,304 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:43:48,371 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: stop_status +2024-11-01 03:43:48,372 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: stop_status +2024-11-01 03:43:48,374 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: internal_messages +2024-11-01 03:43:50,636 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:43:55,637 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:44:00,637 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:44:03,334 DEBUG SenderThread:13148 [sender.py:send():382] send: stats +2024-11-01 03:44:03,371 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: stop_status +2024-11-01 03:44:03,372 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: stop_status +2024-11-01 03:44:03,414 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: internal_messages +2024-11-01 03:44:06,585 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:44:06,648 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: partial_history +2024-11-01 03:44:06,649 DEBUG SenderThread:13148 [sender.py:send():382] send: history +2024-11-01 03:44:06,650 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: summary_record +2024-11-01 03:44:06,651 INFO SenderThread:13148 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-11-01 03:44:07,338 INFO Thread-12 :13148 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20241101_034132-tssjv11t/files/wandb-summary.json +2024-11-01 03:44:07,807 DEBUG SenderThread:13148 [sender.py:send():382] send: exit +2024-11-01 03:44:07,807 INFO SenderThread:13148 [sender.py:send_exit():589] handling exit code: 255 +2024-11-01 03:44:07,807 INFO SenderThread:13148 [sender.py:send_exit():591] handling runtime: 154 +2024-11-01 03:44:07,808 INFO SenderThread:13148 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-11-01 03:44:07,809 INFO SenderThread:13148 [sender.py:send_exit():597] send defer +2024-11-01 03:44:07,809 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: defer +2024-11-01 03:44:07,809 INFO HandlerThread:13148 [handler.py:handle_request_defer():172] handle defer: 0 +2024-11-01 03:44:07,809 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: defer +2024-11-01 03:44:07,809 INFO SenderThread:13148 [sender.py:send_request_defer():613] handle sender defer: 0 +2024-11-01 03:44:07,809 INFO SenderThread:13148 [sender.py:transition_state():617] send defer: 1 +2024-11-01 03:44:07,809 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: defer +2024-11-01 03:44:07,809 INFO HandlerThread:13148 [handler.py:handle_request_defer():172] handle defer: 1 +2024-11-01 03:44:07,809 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: defer +2024-11-01 03:44:07,810 INFO SenderThread:13148 [sender.py:send_request_defer():613] handle sender defer: 1 +2024-11-01 03:44:07,810 INFO SenderThread:13148 [sender.py:transition_state():617] send defer: 2 +2024-11-01 03:44:07,810 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: defer +2024-11-01 03:44:07,810 INFO HandlerThread:13148 [handler.py:handle_request_defer():172] handle defer: 2 +2024-11-01 03:44:07,810 INFO HandlerThread:13148 [system_monitor.py:finish():203] Stopping system monitor +2024-11-01 03:44:07,810 DEBUG SystemMonitor:13148 [system_monitor.py:_start():179] Finished system metrics aggregation loop +2024-11-01 03:44:07,810 INFO HandlerThread:13148 [interfaces.py:finish():202] Joined cpu monitor +2024-11-01 03:44:07,810 DEBUG SystemMonitor:13148 [system_monitor.py:_start():183] Publishing last batch of metrics +2024-11-01 03:44:07,811 INFO HandlerThread:13148 [interfaces.py:finish():202] Joined disk monitor +2024-11-01 03:44:07,913 INFO HandlerThread:13148 [interfaces.py:finish():202] Joined gpu monitor +2024-11-01 03:44:07,914 INFO HandlerThread:13148 [interfaces.py:finish():202] Joined memory monitor +2024-11-01 03:44:07,914 INFO HandlerThread:13148 [interfaces.py:finish():202] Joined network monitor +2024-11-01 03:44:07,915 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: defer +2024-11-01 03:44:07,915 INFO SenderThread:13148 [sender.py:send_request_defer():613] handle sender defer: 2 +2024-11-01 03:44:07,915 INFO SenderThread:13148 [sender.py:transition_state():617] send defer: 3 +2024-11-01 03:44:07,915 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: defer +2024-11-01 03:44:07,915 INFO HandlerThread:13148 [handler.py:handle_request_defer():172] handle defer: 3 +2024-11-01 03:44:07,919 DEBUG SenderThread:13148 [sender.py:send():382] send: stats +2024-11-01 03:44:07,919 DEBUG SenderThread:13148 [sender.py:send():382] send: history +2024-11-01 03:44:07,919 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: summary_record +2024-11-01 03:44:07,920 INFO SenderThread:13148 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-11-01 03:44:07,921 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: defer +2024-11-01 03:44:07,921 INFO SenderThread:13148 [sender.py:send_request_defer():613] handle sender defer: 3 +2024-11-01 03:44:07,921 INFO SenderThread:13148 [sender.py:transition_state():617] send defer: 4 +2024-11-01 03:44:07,921 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: defer +2024-11-01 03:44:07,921 INFO HandlerThread:13148 [handler.py:handle_request_defer():172] handle defer: 4 +2024-11-01 03:44:07,921 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: defer +2024-11-01 03:44:07,921 INFO SenderThread:13148 [sender.py:send_request_defer():613] handle sender defer: 4 +2024-11-01 03:44:07,921 INFO SenderThread:13148 [sender.py:transition_state():617] send defer: 5 +2024-11-01 03:44:07,921 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: defer +2024-11-01 03:44:07,921 INFO HandlerThread:13148 [handler.py:handle_request_defer():172] handle defer: 5 +2024-11-01 03:44:07,922 DEBUG SenderThread:13148 [sender.py:send():382] send: summary +2024-11-01 03:44:07,923 INFO SenderThread:13148 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-11-01 03:44:07,923 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: defer +2024-11-01 03:44:07,923 INFO SenderThread:13148 [sender.py:send_request_defer():613] handle sender defer: 5 +2024-11-01 03:44:07,923 INFO SenderThread:13148 [sender.py:transition_state():617] send defer: 6 +2024-11-01 03:44:07,924 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: defer +2024-11-01 03:44:07,924 INFO HandlerThread:13148 [handler.py:handle_request_defer():172] handle defer: 6 +2024-11-01 03:44:07,924 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: defer +2024-11-01 03:44:07,924 INFO SenderThread:13148 [sender.py:send_request_defer():613] handle sender defer: 6 +2024-11-01 03:44:07,924 INFO SenderThread:13148 [sender.py:transition_state():617] send defer: 7 +2024-11-01 03:44:07,924 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:44:07,924 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: defer +2024-11-01 03:44:07,924 INFO HandlerThread:13148 [handler.py:handle_request_defer():172] handle defer: 7 +2024-11-01 03:44:07,924 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: defer +2024-11-01 03:44:07,924 INFO SenderThread:13148 [sender.py:send_request_defer():613] handle sender defer: 7 +2024-11-01 03:44:08,340 INFO Thread-12 :13148 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20241101_034132-tssjv11t/files/wandb-summary.json +2024-11-01 03:44:08,340 INFO Thread-12 :13148 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20241101_034132-tssjv11t/files/output.log +2024-11-01 03:44:08,807 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: poll_exit +2024-11-01 03:44:10,244 INFO SenderThread:13148 [sender.py:transition_state():617] send defer: 8 +2024-11-01 03:44:10,244 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: poll_exit +2024-11-01 03:44:10,244 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: defer +2024-11-01 03:44:10,244 INFO HandlerThread:13148 [handler.py:handle_request_defer():172] handle defer: 8 +2024-11-01 03:44:10,244 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: defer +2024-11-01 03:44:10,244 INFO SenderThread:13148 [sender.py:send_request_defer():613] handle sender defer: 8 +2024-11-01 03:44:10,244 INFO SenderThread:13148 [job_builder.py:build():296] Attempting to build job artifact +2024-11-01 03:44:10,246 INFO SenderThread:13148 [job_builder.py:_get_source_type():426] is repo sourced job +2024-11-01 03:44:10,261 INFO SenderThread:13148 [job_builder.py:build():402] adding wandb-job metadata file +2024-11-01 03:44:10,270 INFO SenderThread:13148 [sender.py:transition_state():617] send defer: 9 +2024-11-01 03:44:10,271 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: defer +2024-11-01 03:44:10,271 DEBUG SenderThread:13148 [sender.py:send():382] send: artifact +2024-11-01 03:44:10,271 INFO HandlerThread:13148 [handler.py:handle_request_defer():172] handle defer: 9 +2024-11-01 03:44:10,341 INFO Thread-12 :13148 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20241101_034132-tssjv11t/files/output.log +2024-11-01 03:44:10,808 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: poll_exit +2024-11-01 03:44:11,250 INFO SenderThread:13148 [sender.py:send_artifact():1494] sent artifact job-https___github.com_cl-tohoku_llm-recipes-failab-m1-yans.git_examples_finetuning.py - {'id': 'QXJ0aWZhY3Q6MTMwMjQ5MDY3NQ==', 'state': 'COMMITTED', 'artifactSequence': {'id': 'QXJ0aWZhY3RDb2xsZWN0aW9uOjQ4MzQ0MjQxNg==', 'latestArtifact': {'id': 'QXJ0aWZhY3Q6MTMwMjQ5MDY3NQ==', 'versionIndex': 1}}} +2024-11-01 03:44:11,250 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: defer +2024-11-01 03:44:11,250 INFO SenderThread:13148 [sender.py:send_request_defer():613] handle sender defer: 9 +2024-11-01 03:44:11,250 INFO SenderThread:13148 [dir_watcher.py:finish():358] shutting down directory watcher +2024-11-01 03:44:11,342 INFO SenderThread:13148 [dir_watcher.py:finish():388] scan: /project/wandb/run-20241101_034132-tssjv11t/files +2024-11-01 03:44:11,343 INFO SenderThread:13148 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20241101_034132-tssjv11t/files/requirements.txt requirements.txt +2024-11-01 03:44:11,343 INFO SenderThread:13148 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20241101_034132-tssjv11t/files/config.yaml config.yaml +2024-11-01 03:44:11,343 INFO SenderThread:13148 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20241101_034132-tssjv11t/files/wandb-metadata.json wandb-metadata.json +2024-11-01 03:44:11,343 INFO SenderThread:13148 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20241101_034132-tssjv11t/files/wandb-summary.json wandb-summary.json +2024-11-01 03:44:11,343 INFO SenderThread:13148 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20241101_034132-tssjv11t/files/output.log output.log +2024-11-01 03:44:11,344 INFO SenderThread:13148 [sender.py:transition_state():617] send defer: 10 +2024-11-01 03:44:11,344 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: poll_exit +2024-11-01 03:44:11,344 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: defer +2024-11-01 03:44:11,344 INFO HandlerThread:13148 [handler.py:handle_request_defer():172] handle defer: 10 +2024-11-01 03:44:11,344 DEBUG SenderThread:13148 [sender.py:send_request():409] send_request: defer +2024-11-01 03:44:11,344 INFO SenderThread:13148 [sender.py:send_request_defer():613] handle sender defer: 10 +2024-11-01 03:44:11,344 INFO SenderThread:13148 [file_pusher.py:finish():172] shutting down file pusher +2024-11-01 03:44:17,345 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:44:22,346 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:44:27,347 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:44:32,348 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:44:37,349 DEBUG HandlerThread:13148 [handler.py:handle_request():146] handle_request: status_report +2024-11-01 03:44:37,967 WARNING StreamThr :13148 [internal.py:is_dead():414] Internal process exiting, parent pid 12981 disappeared +2024-11-01 03:44:37,967 ERROR StreamThr :13148 [internal.py:wandb_internal():152] Internal process shutdown. +2024-11-01 03:44:38,349 INFO SenderThread:13148 [sender.py:finish():1572] shutting down sender +2024-11-01 03:44:38,349 INFO SenderThread:13148 [file_pusher.py:finish():172] shutting down file pusher +2024-11-01 03:44:38,349 INFO SenderThread:13148 [file_pusher.py:join():178] waiting for file pusher +2024-11-01 03:44:38,349 INFO SenderThread:13148 [file_stream.py:finish():595] file stream finish called +2024-11-01 03:44:38,349 INFO WriterThread:13148 [datastore.py:close():296] close: /project/wandb/run-20241101_034132-tssjv11t/run-tssjv11t.wandb +2024-11-01 03:44:38,349 INFO HandlerThread:13148 [handler.py:finish():869] shutting down handler +2024-11-01 03:44:38,523 INFO SenderThread:13148 [file_stream.py:finish():599] file stream finish is done diff --git a/wandb/run-20241101_034132-tssjv11t/logs/debug.log b/wandb/run-20241101_034132-tssjv11t/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..70ad85fe9d156e8b243da4070d3dedcf87a50e39 --- /dev/null +++ b/wandb/run-20241101_034132-tssjv11t/logs/debug.log @@ -0,0 +1,29 @@ +2024-11-01 03:41:32,765 INFO MainThread:12981 [wandb_setup.py:_flush():76] Current SDK version is 0.16.3 +2024-11-01 03:41:32,766 INFO MainThread:12981 [wandb_setup.py:_flush():76] Configure stats pid to 12981 +2024-11-01 03:41:32,766 INFO MainThread:12981 [wandb_setup.py:_flush():76] Loading settings from /singularity_home/.config/wandb/settings +2024-11-01 03:41:32,766 INFO MainThread:12981 [wandb_setup.py:_flush():76] Loading settings from /project/wandb/settings +2024-11-01 03:41:32,766 INFO MainThread:12981 [wandb_setup.py:_flush():76] Loading settings from environment variables: {'api_key': '***REDACTED***', 'run_notes': 'CPT en-ja'} +2024-11-01 03:41:32,766 INFO MainThread:12981 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False} +2024-11-01 03:41:32,766 INFO MainThread:12981 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'examples/finetuning.py', 'program_abspath': '/project/examples/finetuning.py', 'program': '/project/examples/finetuning.py'} +2024-11-01 03:41:32,766 INFO MainThread:12981 [wandb_init.py:_log_setup():526] Logging user logs to /project/wandb/run-20241101_034132-tssjv11t/logs/debug.log +2024-11-01 03:41:32,766 INFO MainThread:12981 [wandb_init.py:_log_setup():527] Logging internal logs to /project/wandb/run-20241101_034132-tssjv11t/logs/debug-internal.log +2024-11-01 03:41:32,766 INFO MainThread:12981 [wandb_init.py:init():566] calling init triggers +2024-11-01 03:41:32,766 INFO MainThread:12981 [wandb_init.py:init():573] wandb.init called with sweep_config: {} +config: {'sharding_strategy': 'SHARD_GRAD_OP', 'checkpoint_type': 'LOCAL_STATE_DICT', 'fsdp_activation_checkpointing': True, 'fsdp_cpu_offload': False, 'low_cpu_fsdp': False, 'no_meta_device': False, 'data_path': None, 'split': '969, 30, 1', 'train_data_path': ['183224836', '/project/trans/datasets/processed_ja-en/train/documents_text_document'], 'valid_data_path': ['61084836', '/project/trans/datasets/processed_ja-en/valid/documents_text_document'], 'test_data_path': ['61084836', '/project/trans/datasets/processed_ja-en/valid/documents_text_document'], 'data_cache_path': None, 'vocab_size': None, 'vocab_file': None, 'merge_file': None, 'seq_length': 1024, 'num_workers': 4, 'tokenizer_type': 'HFPreTrainedTokenizer', 'tokenizer_model': '/share/pretrained_lm/llm-jp/llm-jp-v3-3.7b', 'reset_position_ids': False, 'reset_attention_mask': False, 'eod_mask_loss': False, 'retro_return_doc_ids': False, 'short_seq_prob': 0.1, 'vocab_extra_ids': 0, 'seed': 1234, 'use_mpi': False, 'wandb_entity': 'iwakawa-koichi-q5-tohoku-nlp6723', 'wandb_name': 'llm-jp-v3-3.7b_ja-en_actual_3M-pairs_train_2024-11-01-03:41:19', 'wandb_project': 'trans_experiment', 'quantization': False, 'use_freeze_layers': False, 'freeze_layers': None, 'bf16': True, 'fp16': False, 'mixed_precision': True, 'param_dtype': None, 'load': '/work/llm_recipes/models/llm-jp-v3-3.7b_ja-en_actual_3M-pairs', 'save': '/work/llm_recipes/models/llm-jp-v3-3.7b_ja-en_actual_3M-pairs', 'base_model': '/share/pretrained_lm/llm-jp/llm-jp-v3-3.7b', 'use_better_transformer': False, 'grad_clip_norm': 1.0, 'eval_interval': 20, 'save_interval': 50, 'eval_iters': 10, 'optimizer': 'anyprecision', 'lr': 0.00015, 'lr_decay_style': 'cosine', 'lr_decay_iters': 698, 'lr_warmup_iters': 70, 'min_lr': 1.5e-05, 'train_iters': 698, 'train_samples': None, 'global_batch_size': 256, 'micro_batch_size': 6, 'make_vocab_size_divisible_by': 128, 'sliding_window_size': 131072, 'skip_batch': None, 'no_save_optimizer_state': False, 'continual_pretraining': False, 'instruction_tuning': False, 'direct_preference_optimization': False, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'weight_decay': 0.1, 'adam_beta1': 0.9, 'adam_beta2': 0.95, 'adam_eps': 1e-08, 'hf_transformer_model_dir': None, 'instruction_train_data_path': None, 'instruction_valid_data_path': None, 'epoch': None, 'instruction_dataset_size': None, 'save_sampler_state': False, 'label_smoothing': 0.0, 'save_n_checkpoints': 3, 'hf_repo_id': 'koichi12/llm-jp-v3-3.7b_ja-en_actual_3M-pairs', 'create_public_hf_repo': False, 'upload_all_checkpoints_to_hf': True, 'hf_upload_retry_limit': 2, 'exit_duration_in_mins': None, 'source_key': None, 'target_key': None, 'attn_implementation': 'flash_attention_2', 'efficient_instruction_tuning': False, 'remove_padding_masking': False, 'save_start_iter': None, 'valid_micro_batch_size': 1, 'rank': 0, 'world_size': 2, 'padded_vocab_size': 99584, 'gradient_accumulation_steps': 21} +2024-11-01 03:41:32,766 INFO MainThread:12981 [wandb_init.py:init():616] starting backend +2024-11-01 03:41:32,766 INFO MainThread:12981 [wandb_init.py:init():620] setting up manager +2024-11-01 03:41:32,771 INFO MainThread:12981 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-11-01 03:41:32,772 INFO MainThread:12981 [wandb_init.py:init():628] backend started and connected +2024-11-01 03:41:32,778 INFO MainThread:12981 [wandb_init.py:init():720] updated telemetry +2024-11-01 03:41:32,790 INFO MainThread:12981 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout +2024-11-01 03:41:33,238 INFO MainThread:12981 [wandb_run.py:_on_init():2262] communicating current version +2024-11-01 03:41:33,263 INFO MainThread:12981 [wandb_run.py:_on_init():2271] got version response upgrade_message: "wandb version 0.18.5 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2024-11-01 03:41:33,263 INFO MainThread:12981 [wandb_init.py:init():804] starting run threads in backend +2024-11-01 03:41:33,370 INFO MainThread:12981 [wandb_run.py:_console_start():2241] atexit reg +2024-11-01 03:41:33,371 INFO MainThread:12981 [wandb_run.py:_redirect():2096] redirect: wrap_raw +2024-11-01 03:41:33,371 INFO MainThread:12981 [wandb_run.py:_redirect():2161] Wrapping output streams. +2024-11-01 03:41:33,371 INFO MainThread:12981 [wandb_run.py:_redirect():2186] Redirects installed. +2024-11-01 03:41:33,372 INFO MainThread:12981 [wandb_init.py:init():847] run started, returning control to user process +2024-11-01 03:41:47,638 INFO MainThread:12981 [wandb_run.py:_config_callback():1343] config_cb None None {'model_architecture': 'LlamaForCausalLM', 'activation_function': 'silu', 'hidden_size': 3072, 'model_type': 'llama', 'max_position_embeddings': 4096, 'num_attention_heads': 24, 'num_hidden_layers': 28} +2024-11-01 03:41:47,638 INFO MainThread:12981 [wandb_run.py:_config_callback():1343] config_cb None None {'world_size': 2} diff --git a/wandb/run-20241101_034132-tssjv11t/run-tssjv11t.wandb b/wandb/run-20241101_034132-tssjv11t/run-tssjv11t.wandb new file mode 100644 index 0000000000000000000000000000000000000000..0e917c1b195fb2975b5b38cb2acd20668450e6dc Binary files /dev/null and b/wandb/run-20241101_034132-tssjv11t/run-tssjv11t.wandb differ