| set -eux | |
| LLM_RECIPES_DIR=/code/llm-recipes | |
| source $LLM_RECIPES_DIR/scripts/wmt2024/tokens.sh | |
| for i in `seq 10 14`; do | |
| # minus 9 from i for gpu id | |
| GPU_ID=$((i-5)) | |
| python /code/llm-recipes/tools/hf_inference.py \ | |
| --model /work/models/translation_finetuned_hf/mistral-ja-zh-continuous-pretrained-clened-v4-dev-finetune-chunked-docs-all-averaged-31-35 \ | |
| -i /work/wmt2024_test/LLM/split/ja-zh/wmttest2024.src.sentence_splited.with_template.ja-zh.ja.jsonl.${i} \ | |
| -o /work/translation/wmt24_test/ja-zh/mistral-top-p-0.95/split_${i} \ | |
| -g $GPU_ID \ | |
| -b 400 \ | |
| --attn_implementation sdpa \ | |
| --dynamic_max_new_token_ratio 2.0 \ | |
| --num_return_sequences 100 \ | |
| --do_sample \ | |
| --top_p 0.95 \ | |
| --max_input_tokens 481 & | |
| done | |
| wait | |