| + task2run=indic_copa_hi |
| + WORKER_DIR=/mnt/weka/peacock/evaluation |
| + echo 'WORKER_DIR: /mnt/weka/peacock/evaluation' |
| WORKER_DIR: /mnt/weka/peacock/evaluation |
| + LM_EVAL_REPO=/mnt/weka/peacock/evaluation/lm-evaluation-harness |
| + export PYTHONPATH=/root:/usr/lib/habanalabs/:/mnt/weka/peacock/evaluation/Megatron-DeepSpeed |
| + PYTHONPATH=/root:/usr/lib/habanalabs/:/mnt/weka/peacock/evaluation/Megatron-DeepSpeed |
| + DS_TO_UNIV_PY=/usr/local/lib/python3.10/dist-packages/deepspeed/checkpoint/ds_to_universal.py |
| + ckpt_dir=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024 |
| + ckpt_step=(global_step120000 global_step240000) |
| + UNIV_CKPT=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/universal |
| + HF_CKPT=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf |
| + UNIV_TO_HF_PY=/mnt/weka/peacock/evaluation/convert_checkpoint/mds_universal_to_huggingface.py |
| + UNIV_TO_HF_JSON=/mnt/weka/peacock/evaluation/convert_checkpoint/mds_to_hf_llama_custom_3b_peacock.json |
| + tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer |
| + logs_eval=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval |
| + mkdir -p /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval |
| + tasks=(${task2run}) |
| + for ckpt in ${ckpt_step[@]} |
| + MDS_CKPT_DIR=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/global_step120000 |
| + UNIV_CKPT_DIR=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/universal/global_step120000 |
| + HF_CKPT_DIR=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step120000 |
| + mkdir -p /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/universal/global_step120000 |
| + mkdir -p /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step120000 |
| + cd /mnt/weka/peacock/evaluation/lm-evaluation-harness |
| + export HF_DATASETS_TRUST_REMOTE_CODE=True |
| + HF_DATASETS_TRUST_REMOTE_CODE=True |
| + for task in ${tasks[@]} |
| + batch_size=4 |
| + cmd='HF_DATASETS_TRUST_REMOTE_CODE=True lm_eval --model hf --model_args pretrained=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step120000,tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer --tasks indic_copa_hi --device hpu --batch_size 4 --num_fewshot 5 --trust_remote_code --verbosity DEBUG 2>&1 |tee /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval/global_step120000_indic_copa_hi_bs4_full.log' |
| + echo HF_DATASETS_TRUST_REMOTE_CODE=True lm_eval --model hf --model_args pretrained=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step120000,tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer --tasks indic_copa_hi --device hpu --batch_size 4 --num_fewshot 5 --trust_remote_code --verbosity DEBUG '2>&1' '|tee' /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval/global_step120000_indic_copa_hi_bs4_full.log |
| HF_DATASETS_TRUST_REMOTE_CODE=True lm_eval --model hf --model_args pretrained=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step120000,tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer --tasks indic_copa_hi --device hpu --batch_size 4 --num_fewshot 5 --trust_remote_code --verbosity DEBUG 2>&1 |tee /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval/global_step120000_indic_copa_hi_bs4_full.log |
| + eval HF_DATASETS_TRUST_REMOTE_CODE=True lm_eval --model hf --model_args pretrained=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step120000,tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer --tasks indic_copa_hi --device hpu --batch_size 4 --num_fewshot 5 --trust_remote_code --verbosity DEBUG '2>&1' '|tee' /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval/global_step120000_indic_copa_hi_bs4_full.log |
| ++ HF_DATASETS_TRUST_REMOTE_CODE=True |
| ++ lm_eval --model hf --model_args pretrained=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step120000,tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer --tasks indic_copa_hi --device hpu --batch_size 4 --num_fewshot 5 --trust_remote_code --verbosity DEBUG |
| ++ tee /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval/global_step120000_indic_copa_hi_bs4_full.log |
| /usr/local/lib/python3.10/dist-packages/torch/distributed/distributed_c10d.py:366: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead |
| warnings.warn( |
| /usr/local/lib/python3.10/dist-packages/habana_frameworks/torch/gpu_migration/__init__.py:46: UserWarning: apex not installed, gpu_migration will not swap api for this package. |
| warnings.warn( |
| 2024-08-28:06:00:25,794 INFO [__main__.py:251] Verbosity set to DEBUG |
| 2024-08-28:06:00:30,153 ERROR [__main__.py:301] Tasks were not found: indic_copa_hi |
| Try `lm-eval --tasks list` for list of available tasks |
| Traceback (most recent call last): |
| File "/usr/local/bin/lm_eval", line 8, in <module> |
| sys.exit(cli_evaluate()) |
| File "/usr/local/lib/python3.10/dist-packages/lm_eval/__main__.py", line 305, in cli_evaluate |
| raise ValueError( |
| ValueError: Tasks not found: indic_copa_hi. Try `lm-eval --tasks list` for list of available tasks, or '--verbosity DEBUG' to troubleshoot task registration issues. |
| + for ckpt in ${ckpt_step[@]} |
| + MDS_CKPT_DIR=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/global_step240000 |
| + UNIV_CKPT_DIR=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/universal/global_step240000 |
| + HF_CKPT_DIR=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step240000 |
| + mkdir -p /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/universal/global_step240000 |
| + mkdir -p /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step240000 |
| + cd /mnt/weka/peacock/evaluation/lm-evaluation-harness |
| + export HF_DATASETS_TRUST_REMOTE_CODE=True |
| + HF_DATASETS_TRUST_REMOTE_CODE=True |
| + for task in ${tasks[@]} |
| + batch_size=4 |
| + cmd='HF_DATASETS_TRUST_REMOTE_CODE=True lm_eval --model hf --model_args pretrained=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step240000,tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer --tasks indic_copa_hi --device hpu --batch_size 4 --num_fewshot 5 --trust_remote_code --verbosity DEBUG 2>&1 |tee /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval/global_step240000_indic_copa_hi_bs4_full.log' |
| + echo HF_DATASETS_TRUST_REMOTE_CODE=True lm_eval --model hf --model_args pretrained=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step240000,tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer --tasks indic_copa_hi --device hpu --batch_size 4 --num_fewshot 5 --trust_remote_code --verbosity DEBUG '2>&1' '|tee' /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval/global_step240000_indic_copa_hi_bs4_full.log |
| HF_DATASETS_TRUST_REMOTE_CODE=True lm_eval --model hf --model_args pretrained=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step240000,tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer --tasks indic_copa_hi --device hpu --batch_size 4 --num_fewshot 5 --trust_remote_code --verbosity DEBUG 2>&1 |tee /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval/global_step240000_indic_copa_hi_bs4_full.log |
| + eval HF_DATASETS_TRUST_REMOTE_CODE=True lm_eval --model hf --model_args pretrained=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step240000,tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer --tasks indic_copa_hi --device hpu --batch_size 4 --num_fewshot 5 --trust_remote_code --verbosity DEBUG '2>&1' '|tee' /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval/global_step240000_indic_copa_hi_bs4_full.log |
| ++ HF_DATASETS_TRUST_REMOTE_CODE=True |
| ++ lm_eval --model hf --model_args pretrained=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step240000,tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer --tasks indic_copa_hi --device hpu --batch_size 4 --num_fewshot 5 --trust_remote_code --verbosity DEBUG |
| ++ tee /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval/global_step240000_indic_copa_hi_bs4_full.log |
| /usr/local/lib/python3.10/dist-packages/torch/distributed/distributed_c10d.py:366: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead |
| warnings.warn( |
| /usr/local/lib/python3.10/dist-packages/habana_frameworks/torch/gpu_migration/__init__.py:46: UserWarning: apex not installed, gpu_migration will not swap api for this package. |
| warnings.warn( |
| 2024-08-28:06:00:34,476 INFO [__main__.py:251] Verbosity set to DEBUG |
| 2024-08-28:06:00:38,862 ERROR [__main__.py:301] Tasks were not found: indic_copa_hi |
| Try `lm-eval --tasks list` for list of available tasks |
| Traceback (most recent call last): |
| File "/usr/local/bin/lm_eval", line 8, in <module> |
| sys.exit(cli_evaluate()) |
| File "/usr/local/lib/python3.10/dist-packages/lm_eval/__main__.py", line 305, in cli_evaluate |
| raise ValueError( |
| ValueError: Tasks not found: indic_copa_hi. Try `lm-eval --tasks list` for list of available tasks, or '--verbosity DEBUG' to troubleshoot task registration issues. |
|
|