CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=2 vllm serve meta-llama/Llama-3.1-8B-Instruct \ --port 8040 \ --served-model-name dspy \ --dtype bfloat16 \ --tensor-parallel-size 1 \ --max-model-len 16384 python /home/mshahidul/readctrl/code/fine_tune_sft_dpo/qwen3-inference-vllm_bn.py \ --model-dir Qwen/Qwen3-4B-Instruct-2507 \ --output-file results/bn/test_inference_vllm_qwen3-4B_base.json python best_of_n_qwen3_vllm.py --model base \ --output-file results/bn/test_best_of_n_qwen3-4B_base.json \ --prompt-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/prompt_bn \ --test-data /home/mshahidul/readctrl/code/fine_tune_sft_dpo/dataset/bn/test_bn.json \ --src-lang Bengali python best_of_n_qwen3_vllm_bn.py --model finetuned \ --output-file results/bn/test_best_of_n_qwen3-4B_sft.json \ --prompt-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/prompt_bn \ --test-data /home/mshahidul/readctrl/code/fine_tune_sft_dpo/dataset/bn/test_bn.json \ --src-lang Bengali \ --finetuned-model-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/model/bn python /home/mshahidul/readctrl/code/fine_tune_sft_dpo/qwen3-inference-vllm_bn.py \ --model-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/model/bn \ --output-file results/bn/test_inference_vllm_qwen3-4B_sft.json python self_refine_qwen3_vllm.py \ --num-iterations 5 \ --max-new-tokens 512 \ --revise-max-new-tokens 512 \ --critique-max-new-tokens 512 \ --temperature 0.1 \ --critique-temperature 0.3 \ --output-file /home/mshahidul/readctrl/code/fine_tune_sft_dpo/results/bn/test_self_refine_vllm_qwen3_4B_base.json \ --prompt-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/prompt_bn \ --test-json /home/mshahidul/readctrl/code/fine_tune_sft_dpo/dataset/bn/test_bn.json \ --src-lang Bengali python self_refine_qwen3_vllm.py \ --model-id /home/mshahidul/readctrl/code/fine_tune_sft_dpo/model/bn \ --num-iterations 5 \ --max-new-tokens 512 \ --revise-max-new-tokens 512 \ --critique-max-new-tokens 512 \ --temperature 0.1 \ --critique-temperature 0.3 \ --output-file /home/mshahidul/readctrl/code/fine_tune_sft_dpo/results/bn/test_self_refine_vllm_qwen3_4B_sft.json \ --prompt-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/prompt_bn \ --test-json /home/mshahidul/readctrl/code/fine_tune_sft_dpo/dataset/bn/test_bn.json \ --src-lang Bengali cd /home/mshahidul/readctrl/code/fine_tune_sft_dpo python evaluate_scores.py \ --input results/bn/test_self_refine_vllm_qwen3_4B_sft.json \ --subclaims dataset/bn/test_bn_subclaims.json \ --output-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/evaluation/bn python evaluate_scores_bn.py \ --input /home/mshahidul/readctrl/code/fine_tune_sft_dpo/results/bn/test_best_of_n_qwen3-4B_base.json \ --subclaims dataset/bn/test_bn_subclaims.json \ --model-key qwen3_finetuned \ --output-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/evaluation/bn python evaluate_scores_bn.py \ --input /home/mshahidul/readctrl/code/fine_tune_sft_dpo/results/bn/test_best_of_n_qwen3-4B_base.json \ --subclaims dataset/bn/test_bn_subclaims.json \ --output-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/evaluation/bn python /home/mshahidul/readctrl/code/fine_tune_sft_dpo/evaluate_scores_bn_vllm_rl.py \ --input /home/mshahidul/readctrl/code/fine_tune_sft_dpo/results/bn/test_best_of_n_qwen3-4B_base.json \ --subclaims dataset/bn/test_bn_subclaims.json \ --output-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/evaluation/bn python evaluate_scores_bn_vllm.py \ --input /home/mshahidul/readctrl/code/readctrl_rl_inference/vllm_model_result/bn_temp/bn_200.jsonl \ --output-dir evaluation/bn/ --subclaims dataset/bn/test_bn_subclaims.json