| set -eux | |
| LLM_RECIPES_DIR=/code/llm-recipes | |
| source $LLM_RECIPES_DIR/scripts/wmt2024/tokens.sh | |
| MAX_INPUT_TOKENS=158 | |
| BEAM_SIZE=50 | |
| python /code/llm-recipes/tools/hf_inference_distrubuted.py \ | |
| --model /work/models/additiona_trained_hf/llama2-en-ja-continuous-pretrained-v0-dev-finetune-chunked-docs-all-averaged-841-845 \ | |
| -i /work/wmt2024_test/LLM/wmttest2024.src.sentence_splited.with_template.en-ja.en.jsonl \ | |
| -o /work/translation/wmt2024_test/en-ja/llama2-beam \ | |
| -g 0 1 2 3 4 5 6 7 \ | |
| --attn_implementation sdpa \ | |
| --dynamic_max_new_token_ratio 3.0 \ | |
| --num_return_sequences ${BEAM_SIZE} \ | |
| --num_beams ${BEAM_SIZE} \ | |
| --max_input_tokens ${MAX_INPUT_TOKENS} \ | |
| -b 158 | |