| export VLLM_USE_V1=0 | |
| # export CUDA_VISIBLE_DEVICES=0,1,2,3 | |
| # vllm serve /data/k8s/zpc/models/Qwen2.5-Coder-7B \ | |
| # --host 0.0.0.0 \ | |
| # --port 8082 \ | |
| # --max-model-len 32768 \ | |
| # --tensor-parallel-size 4 >vllm_after_cpt.log 2>&1 & | |
| # export CUDA_VISIBLE_DEVICES=4,5,6,7 | |
| # vllm serve /data/k8s/zpc/Custom-LLaMA-Factory/arkts_linter_model/merged \ | |
| # --host 0.0.0.0 \ | |
| # --port 8081 \ | |
| # --max-model-len 32768 \ | |
| # --generation-config vllm \ | |
| # --speculative-config '{"method":"ngram", "num_speculative_tokens":15, "prompt_lookup_max":5}' \ | |
| # --tensor-parallel-size 4 >arkts_lint_infer_ablation_for_sample_mining.log 2>&1 & | |
| # 目前最高性能的模型 | |
| #export CUDA_VISIBLE_DEVICES=4,5,6,7 | |
| #vllm serve /data/k8s/zpc/Custom-LLaMA-Factory/arkts_linter_model_member_ordering_1_epoch/merged \ | |
| #--host 0.0.0.0 \ | |
| #--port 8081 \ | |
| #--max-model-len 32768 \ | |
| #--generation-config vllm \ | |
| #--speculative-config '{"method":"ngram", "num_speculative_tokens":15, "prompt_lookup_max":5}' \ | |
| #--tensor-parallel-size 4 >origin_for_some_rules.log 2>&1 & | |
| #export CUDA_VISIBLE_DEVICES=4,5,6,7 | |
| #vllm serve /data/k8s/zpc/Custom-LLaMA-Factory/arkts_linter_after_rl \ | |
| #--host 0.0.0.0 \ | |
| #--port 8081 \ | |
| #--max-model-len 32768 \ | |
| #--generation-config vllm \ | |
| #--speculative-config '{"method":"ngram", "num_speculative_tokens":15, "prompt_lookup_max":5}' \ | |
| #--tensor-parallel-size 4 > rl_infer_for_some_rules.log 2>&1 & | |
| export CUDA_VISIBLE_DEVICES=4,5,6,7 | |
| vllm serve arkts_linter_after_7_epoch_rl_bf16 \ | |
| --host 0.0.0.0 \ | |
| --port 8081 \ | |
| --max-model-len 32768 \ | |
| --generation-config vllm \ | |
| --speculative-config '{"method":"ngram", "num_speculative_tokens":15, "prompt_lookup_max":5}' \ | |
| --tensor-parallel-size 4 > rl_infer_for_some_rules.log 2>&1 & | |
| #export CUDA_VISIBLE_DEVICES=3 | |
| #vllm serve /data/k8s/zpc/Custom-LLaMA-Factory/arkts_linter_reward_model_merged_v1 \ | |
| #--host 0.0.0.0 \ | |
| #--port 8082 \ | |
| #--max-model-len 32768 --task reward > reward_model_pooling.log 2>&1 & | |
| # export CUDA_VISIBLE_DEVICES=4,5,6,7 | |
| # vllm serve /data/k8s/zpc/Custom-LLaMA-Factory/arkts_linter_test_for_fe_ablation/merged \ | |
| # --host 0.0.0.0 \ | |
| # --port 8081 \ | |
| # --max-model-len 32768 \ | |
| # --generation-config vllm \ | |
| # --speculative-config '{"method":"ngram", "num_speculative_tokens":15, "prompt_lookup_max":5}' \ | |
| # --tensor-parallel-size 4 >arkts_lint_infer_test_for_fe.log 2>&1 & | |