| CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=2 vllm serve meta-llama/Llama-3.1-8B-Instruct \ |
| --port 8040 \ |
| --served-model-name dspy \ |
| --dtype bfloat16 \ |
| --tensor-parallel-size 1 \ |
| --max-model-len 16384 |
|
|
| python /home/mshahidul/readctrl/code/fine_tune_sft_dpo/qwen3-inference-vllm_bn.py \ |
| --model-dir Qwen/Qwen3-4B-Instruct-2507 \ |
| --output-file results/bn/test_inference_vllm_qwen3-4B_base.json |
|
|
|
|
| python best_of_n_qwen3_vllm.py --model base \ |
| --output-file results/bn/test_best_of_n_qwen3-4B_base.json \ |
| --prompt-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/prompt_bn \ |
| --test-data /home/mshahidul/readctrl/code/fine_tune_sft_dpo/dataset/bn/test_bn.json \ |
| --src-lang Bengali |
|
|
| python best_of_n_qwen3_vllm_bn.py --model finetuned \ |
| --output-file results/bn/test_best_of_n_qwen3-4B_sft.json \ |
| --prompt-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/prompt_bn \ |
| --test-data /home/mshahidul/readctrl/code/fine_tune_sft_dpo/dataset/bn/test_bn.json \ |
| --src-lang Bengali \ |
| --finetuned-model-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/model/bn |
|
|
|
|
| python /home/mshahidul/readctrl/code/fine_tune_sft_dpo/qwen3-inference-vllm_bn.py \ |
| --model-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/model/bn \ |
| --output-file results/bn/test_inference_vllm_qwen3-4B_sft.json |
|
|
| python self_refine_qwen3_vllm.py \ |
| --num-iterations 5 \ |
| --max-new-tokens 512 \ |
| --revise-max-new-tokens 512 \ |
| --critique-max-new-tokens 512 \ |
| --temperature 0.1 \ |
| --critique-temperature 0.3 \ |
| --output-file /home/mshahidul/readctrl/code/fine_tune_sft_dpo/results/bn/test_self_refine_vllm_qwen3_4B_base.json \ |
| --prompt-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/prompt_bn \ |
| --test-json /home/mshahidul/readctrl/code/fine_tune_sft_dpo/dataset/bn/test_bn.json \ |
| --src-lang Bengali |
|
|
| python self_refine_qwen3_vllm.py \ |
| --model-id /home/mshahidul/readctrl/code/fine_tune_sft_dpo/model/bn \ |
| --num-iterations 5 \ |
| --max-new-tokens 512 \ |
| --revise-max-new-tokens 512 \ |
| --critique-max-new-tokens 512 \ |
| --temperature 0.1 \ |
| --critique-temperature 0.3 \ |
| --output-file /home/mshahidul/readctrl/code/fine_tune_sft_dpo/results/bn/test_self_refine_vllm_qwen3_4B_sft.json \ |
| --prompt-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/prompt_bn \ |
| --test-json /home/mshahidul/readctrl/code/fine_tune_sft_dpo/dataset/bn/test_bn.json \ |
| --src-lang Bengali |
|
|
| cd /home/mshahidul/readctrl/code/fine_tune_sft_dpo |
|
|
| python evaluate_scores.py \ |
| --input results/bn/test_self_refine_vllm_qwen3_4B_sft.json \ |
| --subclaims dataset/bn/test_bn_subclaims.json \ |
| --output-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/evaluation/bn |
|
|
| python evaluate_scores_bn.py \ |
| --input /home/mshahidul/readctrl/code/fine_tune_sft_dpo/results/bn/test_best_of_n_qwen3-4B_base.json \ |
| --subclaims dataset/bn/test_bn_subclaims.json \ |
| --model-key qwen3_finetuned \ |
| --output-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/evaluation/bn |
|
|
| python evaluate_scores_bn.py \ |
| --input /home/mshahidul/readctrl/code/fine_tune_sft_dpo/results/bn/test_best_of_n_qwen3-4B_base.json \ |
| --subclaims dataset/bn/test_bn_subclaims.json \ |
| --output-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/evaluation/bn |
|
|
|
|
| python /home/mshahidul/readctrl/code/fine_tune_sft_dpo/evaluate_scores_bn_vllm_rl.py \ |
| --input /home/mshahidul/readctrl/code/fine_tune_sft_dpo/results/bn/test_best_of_n_qwen3-4B_base.json \ |
| --subclaims dataset/bn/test_bn_subclaims.json \ |
| --output-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/evaluation/bn |
|
|
| python evaluate_scores_bn_vllm.py \ |
| --input /home/mshahidul/readctrl/code/readctrl_rl_inference/vllm_model_result/bn_temp/bn_200.jsonl \ |
| --output-dir evaluation/bn/ |
| --subclaims dataset/bn/test_bn_subclaims.json |
|
|