File size: 3,748 Bytes
030876e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93694bb
 
 
 
 
 
 
 
 
 
 
030876e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93694bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=2 vllm serve meta-llama/Llama-3.1-8B-Instruct \
  --port 8040 \
  --served-model-name dspy \
  --dtype bfloat16 \
  --tensor-parallel-size 1 \
  --max-model-len 16384

python /home/mshahidul/readctrl/code/fine_tune_sft_dpo/qwen3-inference-vllm_bn.py \
  --model-dir Qwen/Qwen3-4B-Instruct-2507 \
  --output-file results/bn/test_inference_vllm_qwen3-4B_base.json


python best_of_n_qwen3_vllm.py --model base \
--output-file results/bn/test_best_of_n_qwen3-4B_base.json \
--prompt-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/prompt_bn \
--test-data /home/mshahidul/readctrl/code/fine_tune_sft_dpo/dataset/bn/test_bn.json \
--src-lang Bengali

python best_of_n_qwen3_vllm_bn.py --model finetuned \
--output-file results/bn/test_best_of_n_qwen3-4B_sft.json \
--prompt-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/prompt_bn \
--test-data /home/mshahidul/readctrl/code/fine_tune_sft_dpo/dataset/bn/test_bn.json \
--src-lang Bengali \
--finetuned-model-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/model/bn


python /home/mshahidul/readctrl/code/fine_tune_sft_dpo/qwen3-inference-vllm_bn.py \
  --model-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/model/bn \
  --output-file results/bn/test_inference_vllm_qwen3-4B_sft.json

python self_refine_qwen3_vllm.py \
  --num-iterations 5 \
  --max-new-tokens 512 \
  --revise-max-new-tokens 512 \
  --critique-max-new-tokens 512 \
  --temperature 0.1 \
  --critique-temperature 0.3 \
  --output-file /home/mshahidul/readctrl/code/fine_tune_sft_dpo/results/bn/test_self_refine_vllm_qwen3_4B_base.json \
  --prompt-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/prompt_bn \
  --test-json /home/mshahidul/readctrl/code/fine_tune_sft_dpo/dataset/bn/test_bn.json \
  --src-lang Bengali

python self_refine_qwen3_vllm.py \
  --model-id /home/mshahidul/readctrl/code/fine_tune_sft_dpo/model/bn \
  --num-iterations 5 \
  --max-new-tokens 512 \
  --revise-max-new-tokens 512 \
  --critique-max-new-tokens 512 \
  --temperature 0.1 \
  --critique-temperature 0.3 \
  --output-file /home/mshahidul/readctrl/code/fine_tune_sft_dpo/results/bn/test_self_refine_vllm_qwen3_4B_sft.json \
  --prompt-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/prompt_bn \
  --test-json /home/mshahidul/readctrl/code/fine_tune_sft_dpo/dataset/bn/test_bn.json \
  --src-lang Bengali

cd /home/mshahidul/readctrl/code/fine_tune_sft_dpo

python evaluate_scores.py \
  --input results/bn/test_self_refine_vllm_qwen3_4B_sft.json \
  --subclaims dataset/bn/test_bn_subclaims.json \
  --output-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/evaluation/bn

python evaluate_scores_bn.py \
  --input /home/mshahidul/readctrl/code/fine_tune_sft_dpo/results/bn/test_best_of_n_qwen3-4B_base.json \
  --subclaims dataset/bn/test_bn_subclaims.json \
  --model-key qwen3_finetuned \
  --output-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/evaluation/bn

python evaluate_scores_bn.py \
  --input /home/mshahidul/readctrl/code/fine_tune_sft_dpo/results/bn/test_best_of_n_qwen3-4B_base.json \
  --subclaims dataset/bn/test_bn_subclaims.json \
  --output-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/evaluation/bn


python /home/mshahidul/readctrl/code/fine_tune_sft_dpo/evaluate_scores_bn_vllm_rl.py \
  --input /home/mshahidul/readctrl/code/fine_tune_sft_dpo/results/bn/test_best_of_n_qwen3-4B_base.json \
  --subclaims dataset/bn/test_bn_subclaims.json \
  --output-dir /home/mshahidul/readctrl/code/fine_tune_sft_dpo/evaluation/bn

python evaluate_scores_bn_vllm.py \
  --input /home/mshahidul/readctrl/code/readctrl_rl_inference/vllm_model_result/bn_temp/bn_200.jsonl \
  --output-dir evaluation/bn/
  --subclaims dataset/bn/test_bn_subclaims.json