peacock-data-public-evaluation / launch /logs /eval_indic_copa_hi.log
applied-ai-018's picture
Add files using upload-large-folder tool
5949b83 verified
+ task2run=indic_copa_hi
+ WORKER_DIR=/mnt/weka/peacock/evaluation
+ echo 'WORKER_DIR: /mnt/weka/peacock/evaluation'
WORKER_DIR: /mnt/weka/peacock/evaluation
+ LM_EVAL_REPO=/mnt/weka/peacock/evaluation/lm-evaluation-harness
+ export PYTHONPATH=/root:/usr/lib/habanalabs/:/mnt/weka/peacock/evaluation/Megatron-DeepSpeed
+ PYTHONPATH=/root:/usr/lib/habanalabs/:/mnt/weka/peacock/evaluation/Megatron-DeepSpeed
+ DS_TO_UNIV_PY=/usr/local/lib/python3.10/dist-packages/deepspeed/checkpoint/ds_to_universal.py
+ ckpt_dir=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024
+ ckpt_step=(global_step120000 global_step240000)
+ UNIV_CKPT=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/universal
+ HF_CKPT=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf
+ UNIV_TO_HF_PY=/mnt/weka/peacock/evaluation/convert_checkpoint/mds_universal_to_huggingface.py
+ UNIV_TO_HF_JSON=/mnt/weka/peacock/evaluation/convert_checkpoint/mds_to_hf_llama_custom_3b_peacock.json
+ tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer
+ logs_eval=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval
+ mkdir -p /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval
+ tasks=(${task2run})
+ for ckpt in ${ckpt_step[@]}
+ MDS_CKPT_DIR=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/global_step120000
+ UNIV_CKPT_DIR=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/universal/global_step120000
+ HF_CKPT_DIR=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step120000
+ mkdir -p /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/universal/global_step120000
+ mkdir -p /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step120000
+ cd /mnt/weka/peacock/evaluation/lm-evaluation-harness
+ export HF_DATASETS_TRUST_REMOTE_CODE=True
+ HF_DATASETS_TRUST_REMOTE_CODE=True
+ for task in ${tasks[@]}
+ batch_size=4
+ cmd='HF_DATASETS_TRUST_REMOTE_CODE=True lm_eval --model hf --model_args pretrained=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step120000,tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer --tasks indic_copa_hi --device hpu --batch_size 4 --num_fewshot 5 --trust_remote_code --verbosity DEBUG 2>&1 |tee /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval/global_step120000_indic_copa_hi_bs4_full.log'
+ echo HF_DATASETS_TRUST_REMOTE_CODE=True lm_eval --model hf --model_args pretrained=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step120000,tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer --tasks indic_copa_hi --device hpu --batch_size 4 --num_fewshot 5 --trust_remote_code --verbosity DEBUG '2>&1' '|tee' /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval/global_step120000_indic_copa_hi_bs4_full.log
HF_DATASETS_TRUST_REMOTE_CODE=True lm_eval --model hf --model_args pretrained=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step120000,tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer --tasks indic_copa_hi --device hpu --batch_size 4 --num_fewshot 5 --trust_remote_code --verbosity DEBUG 2>&1 |tee /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval/global_step120000_indic_copa_hi_bs4_full.log
+ eval HF_DATASETS_TRUST_REMOTE_CODE=True lm_eval --model hf --model_args pretrained=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step120000,tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer --tasks indic_copa_hi --device hpu --batch_size 4 --num_fewshot 5 --trust_remote_code --verbosity DEBUG '2>&1' '|tee' /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval/global_step120000_indic_copa_hi_bs4_full.log
++ HF_DATASETS_TRUST_REMOTE_CODE=True
++ lm_eval --model hf --model_args pretrained=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step120000,tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer --tasks indic_copa_hi --device hpu --batch_size 4 --num_fewshot 5 --trust_remote_code --verbosity DEBUG
++ tee /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval/global_step120000_indic_copa_hi_bs4_full.log
/usr/local/lib/python3.10/dist-packages/torch/distributed/distributed_c10d.py:366: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead
warnings.warn(
/usr/local/lib/python3.10/dist-packages/habana_frameworks/torch/gpu_migration/__init__.py:46: UserWarning: apex not installed, gpu_migration will not swap api for this package.
warnings.warn(
2024-08-28:06:00:25,794 INFO [__main__.py:251] Verbosity set to DEBUG
2024-08-28:06:00:30,153 ERROR [__main__.py:301] Tasks were not found: indic_copa_hi
Try `lm-eval --tasks list` for list of available tasks
Traceback (most recent call last):
File "/usr/local/bin/lm_eval", line 8, in <module>
sys.exit(cli_evaluate())
File "/usr/local/lib/python3.10/dist-packages/lm_eval/__main__.py", line 305, in cli_evaluate
raise ValueError(
ValueError: Tasks not found: indic_copa_hi. Try `lm-eval --tasks list` for list of available tasks, or '--verbosity DEBUG' to troubleshoot task registration issues.
+ for ckpt in ${ckpt_step[@]}
+ MDS_CKPT_DIR=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/global_step240000
+ UNIV_CKPT_DIR=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/universal/global_step240000
+ HF_CKPT_DIR=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step240000
+ mkdir -p /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/universal/global_step240000
+ mkdir -p /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step240000
+ cd /mnt/weka/peacock/evaluation/lm-evaluation-harness
+ export HF_DATASETS_TRUST_REMOTE_CODE=True
+ HF_DATASETS_TRUST_REMOTE_CODE=True
+ for task in ${tasks[@]}
+ batch_size=4
+ cmd='HF_DATASETS_TRUST_REMOTE_CODE=True lm_eval --model hf --model_args pretrained=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step240000,tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer --tasks indic_copa_hi --device hpu --batch_size 4 --num_fewshot 5 --trust_remote_code --verbosity DEBUG 2>&1 |tee /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval/global_step240000_indic_copa_hi_bs4_full.log'
+ echo HF_DATASETS_TRUST_REMOTE_CODE=True lm_eval --model hf --model_args pretrained=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step240000,tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer --tasks indic_copa_hi --device hpu --batch_size 4 --num_fewshot 5 --trust_remote_code --verbosity DEBUG '2>&1' '|tee' /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval/global_step240000_indic_copa_hi_bs4_full.log
HF_DATASETS_TRUST_REMOTE_CODE=True lm_eval --model hf --model_args pretrained=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step240000,tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer --tasks indic_copa_hi --device hpu --batch_size 4 --num_fewshot 5 --trust_remote_code --verbosity DEBUG 2>&1 |tee /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval/global_step240000_indic_copa_hi_bs4_full.log
+ eval HF_DATASETS_TRUST_REMOTE_CODE=True lm_eval --model hf --model_args pretrained=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step240000,tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer --tasks indic_copa_hi --device hpu --batch_size 4 --num_fewshot 5 --trust_remote_code --verbosity DEBUG '2>&1' '|tee' /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval/global_step240000_indic_copa_hi_bs4_full.log
++ HF_DATASETS_TRUST_REMOTE_CODE=True
++ lm_eval --model hf --model_args pretrained=/mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/global_step240000,tokenizer=/mnt/weka/peacock/evaluation/ConvertedTokenizer --tasks indic_copa_hi --device hpu --batch_size 4 --num_fewshot 5 --trust_remote_code --verbosity DEBUG
++ tee /mnt/weka/peacock/peacock-data/experiments/llama/checkpoint/llamav2-3b/mbs8_240000/1024/hf/logs_eval/global_step240000_indic_copa_hi_bs4_full.log
/usr/local/lib/python3.10/dist-packages/torch/distributed/distributed_c10d.py:366: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead
warnings.warn(
/usr/local/lib/python3.10/dist-packages/habana_frameworks/torch/gpu_migration/__init__.py:46: UserWarning: apex not installed, gpu_migration will not swap api for this package.
warnings.warn(
2024-08-28:06:00:34,476 INFO [__main__.py:251] Verbosity set to DEBUG
2024-08-28:06:00:38,862 ERROR [__main__.py:301] Tasks were not found: indic_copa_hi
Try `lm-eval --tasks list` for list of available tasks
Traceback (most recent call last):
File "/usr/local/bin/lm_eval", line 8, in <module>
sys.exit(cli_evaluate())
File "/usr/local/lib/python3.10/dist-packages/lm_eval/__main__.py", line 305, in cli_evaluate
raise ValueError(
ValueError: Tasks not found: indic_copa_hi. Try `lm-eval --tasks list` for list of available tasks, or '--verbosity DEBUG' to troubleshoot task registration issues.