arkts-lint-fixer / run_vllm_server.sh
ZPaC's picture
Upload sft and rl models
7521481
export VLLM_USE_V1=0
# export CUDA_VISIBLE_DEVICES=0,1,2,3
# vllm serve /data/k8s/zpc/models/Qwen2.5-Coder-7B \
# --host 0.0.0.0 \
# --port 8082 \
# --max-model-len 32768 \
# --tensor-parallel-size 4 >vllm_after_cpt.log 2>&1 &
# export CUDA_VISIBLE_DEVICES=4,5,6,7
# vllm serve /data/k8s/zpc/Custom-LLaMA-Factory/arkts_linter_model/merged \
# --host 0.0.0.0 \
# --port 8081 \
# --max-model-len 32768 \
# --generation-config vllm \
# --speculative-config '{"method":"ngram", "num_speculative_tokens":15, "prompt_lookup_max":5}' \
# --tensor-parallel-size 4 >arkts_lint_infer_ablation_for_sample_mining.log 2>&1 &
# 目前最高性能的模型
#export CUDA_VISIBLE_DEVICES=4,5,6,7
#vllm serve /data/k8s/zpc/Custom-LLaMA-Factory/arkts_linter_model_member_ordering_1_epoch/merged \
#--host 0.0.0.0 \
#--port 8081 \
#--max-model-len 32768 \
#--generation-config vllm \
#--speculative-config '{"method":"ngram", "num_speculative_tokens":15, "prompt_lookup_max":5}' \
#--tensor-parallel-size 4 >origin_for_some_rules.log 2>&1 &
#export CUDA_VISIBLE_DEVICES=4,5,6,7
#vllm serve /data/k8s/zpc/Custom-LLaMA-Factory/arkts_linter_after_rl \
#--host 0.0.0.0 \
#--port 8081 \
#--max-model-len 32768 \
#--generation-config vllm \
#--speculative-config '{"method":"ngram", "num_speculative_tokens":15, "prompt_lookup_max":5}' \
#--tensor-parallel-size 4 > rl_infer_for_some_rules.log 2>&1 &
export CUDA_VISIBLE_DEVICES=4,5,6,7
vllm serve arkts_linter_after_7_epoch_rl_bf16 \
--host 0.0.0.0 \
--port 8081 \
--max-model-len 32768 \
--generation-config vllm \
--speculative-config '{"method":"ngram", "num_speculative_tokens":15, "prompt_lookup_max":5}' \
--tensor-parallel-size 4 > rl_infer_for_some_rules.log 2>&1 &
#export CUDA_VISIBLE_DEVICES=3
#vllm serve /data/k8s/zpc/Custom-LLaMA-Factory/arkts_linter_reward_model_merged_v1 \
#--host 0.0.0.0 \
#--port 8082 \
#--max-model-len 32768 --task reward > reward_model_pooling.log 2>&1 &
# export CUDA_VISIBLE_DEVICES=4,5,6,7
# vllm serve /data/k8s/zpc/Custom-LLaMA-Factory/arkts_linter_test_for_fe_ablation/merged \
# --host 0.0.0.0 \
# --port 8081 \
# --max-model-len 32768 \
# --generation-config vllm \
# --speculative-config '{"method":"ngram", "num_speculative_tokens":15, "prompt_lookup_max":5}' \
# --tensor-parallel-size 4 >arkts_lint_infer_test_for_fe.log 2>&1 &