llm-scripts / scripts /decode /ja-zh /mistral /top_p_inference_2.sh
koichi12's picture
Add files using upload-large-folder tool
dc55567 verified
set -eux
LLM_RECIPES_DIR=/code/llm-recipes
source $LLM_RECIPES_DIR/scripts/wmt2024/tokens.sh
for i in `seq 10 14`; do
# minus 9 from i for gpu id
GPU_ID=$((i-5))
python /code/llm-recipes/tools/hf_inference.py \
--model /work/models/translation_finetuned_hf/mistral-ja-zh-continuous-pretrained-clened-v4-dev-finetune-chunked-docs-all-averaged-31-35 \
-i /work/wmt2024_test/LLM/split/ja-zh/wmttest2024.src.sentence_splited.with_template.ja-zh.ja.jsonl.${i} \
-o /work/translation/wmt24_test/ja-zh/mistral-top-p-0.95/split_${i} \
-g $GPU_ID \
-b 400 \
--attn_implementation sdpa \
--dynamic_max_new_token_ratio 2.0 \
--num_return_sequences 100 \
--do_sample \
--top_p 0.95 \
--max_input_tokens 481 &
done
wait