shahidul034
"Update readCtrl repo"
93694bb
CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=2 vllm serve meta-llama/Llama-3.1-8B-Instruct \
--port 8040 \
--served-model-name dspy \
--dtype bfloat16 \
--tensor-parallel-size 1 \
--max-model-len 16384
python /home/mshahidul/readctrl/code/fine_tune_sft_dpo/qwen3-inference-vllm_bn.py \
--model-dir Qwen/Qwen3-4B-Instruct-2507 \
--output-file results/bn/test_inference_vllm_qwen3-4B_base.json
python best_of_n_qwen3_vllm.py --model base \
--output-file results/bn/test_best_of_n_qwen3-4B_base.json \
--prompt-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/prompt_bn \
--test-data /home/mshahidul/readctrl/code/fine_tune_sft_dpo/dataset/bn/test_bn.json \
--src-lang Bengali
python best_of_n_qwen3_vllm_bn.py --model finetuned \
--output-file results/bn/test_best_of_n_qwen3-4B_sft.json \
--prompt-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/prompt_bn \
--test-data /home/mshahidul/readctrl/code/fine_tune_sft_dpo/dataset/bn/test_bn.json \
--src-lang Bengali \
--finetuned-model-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/model/bn
python /home/mshahidul/readctrl/code/fine_tune_sft_dpo/qwen3-inference-vllm_bn.py \
--model-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/model/bn \
--output-file results/bn/test_inference_vllm_qwen3-4B_sft.json
python self_refine_qwen3_vllm.py \
--num-iterations 5 \
--max-new-tokens 512 \
--revise-max-new-tokens 512 \
--critique-max-new-tokens 512 \
--temperature 0.1 \
--critique-temperature 0.3 \
--output-file /home/mshahidul/readctrl/code/fine_tune_sft_dpo/results/bn/test_self_refine_vllm_qwen3_4B_base.json \
--prompt-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/prompt_bn \
--test-json /home/mshahidul/readctrl/code/fine_tune_sft_dpo/dataset/bn/test_bn.json \
--src-lang Bengali
python self_refine_qwen3_vllm.py \
--model-id /home/mshahidul/readctrl/code/fine_tune_sft_dpo/model/bn \
--num-iterations 5 \
--max-new-tokens 512 \
--revise-max-new-tokens 512 \
--critique-max-new-tokens 512 \
--temperature 0.1 \
--critique-temperature 0.3 \
--output-file /home/mshahidul/readctrl/code/fine_tune_sft_dpo/results/bn/test_self_refine_vllm_qwen3_4B_sft.json \
--prompt-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/prompt_bn \
--test-json /home/mshahidul/readctrl/code/fine_tune_sft_dpo/dataset/bn/test_bn.json \
--src-lang Bengali
cd /home/mshahidul/readctrl/code/fine_tune_sft_dpo
python evaluate_scores.py \
--input results/bn/test_self_refine_vllm_qwen3_4B_sft.json \
--subclaims dataset/bn/test_bn_subclaims.json \
--output-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/evaluation/bn
python evaluate_scores_bn.py \
--input /home/mshahidul/readctrl/code/fine_tune_sft_dpo/results/bn/test_best_of_n_qwen3-4B_base.json \
--subclaims dataset/bn/test_bn_subclaims.json \
--model-key qwen3_finetuned \
--output-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/evaluation/bn
python evaluate_scores_bn.py \
--input /home/mshahidul/readctrl/code/fine_tune_sft_dpo/results/bn/test_best_of_n_qwen3-4B_base.json \
--subclaims dataset/bn/test_bn_subclaims.json \
--output-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/evaluation/bn
python /home/mshahidul/readctrl/code/fine_tune_sft_dpo/evaluate_scores_bn_vllm_rl.py \
--input /home/mshahidul/readctrl/code/fine_tune_sft_dpo/results/bn/test_best_of_n_qwen3-4B_base.json \
--subclaims dataset/bn/test_bn_subclaims.json \
--output-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/evaluation/bn
python evaluate_scores_bn_vllm.py \
--input /home/mshahidul/readctrl/code/readctrl_rl_inference/vllm_model_result/bn_temp/bn_200.jsonl \
--output-dir evaluation/bn/
--subclaims dataset/bn/test_bn_subclaims.json