| | python /home/mshahidul/LLM_guard/code/auto/inference_qwen3-32B_best_ans_selection_v2.py |
| | python /home/mshahidul/LLM_guard/code/auto/inference_qwen3-32B_ans_combiner_v2.py |
| |
|
| |
|
| | python /home/mshahidul/LLM_guard/code/auto/inference_qwen3-32B_ans_combiner_v2.py |
| | python /home/mshahidul/LLM_guard/code/auto/inference_qwen3-32B_ques_ans_eval_v3.py |
| | python /home/mshahidul/LLM_guard/code/readability_control.py |
| |
|
| |
|
| |
|
| |
|
| | save_folder="/home/mshahidul/LLM_guard/results/new_v2/best_ans_selection(more_strict)_v3" |
| | save_folder="/home/mshahidul/LLM_guard/results/new_v2/answers_combiner(more_strict)_v4" |
| | save_folder="/home/mshahidul/LLM_guard/results/new_v2/sub_ques_ans_combined_different_models_ans_evaluation(more_strict)_v3" |
| |
|
| |
|
| | python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_madhu_Bhai_v2.py --input_file /home/mshahidul/LLM_guard/results/madhu_bhai/madhu_bhai_dataset/HB100_DA__SQ_Q25-3B__SA_p4m__CB_q25-3B__JG_q3G.json |
| | python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_madhu_Bhai_v2.py --input_file /home/mshahidul/LLM_guard/results/madhu_bhai/madhu_bhai_dataset/HB100_DA__SQ_Q25-3B__SA_q25-3B__CB_q25-3B__JG_q3G.json |
| | python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_madhu_Bhai_v2.py --input_file /home/mshahidul/LLM_guard/results/madhu_bhai/madhu_bhai_dataset/HB100_DA__SQ_Q25-3B__Team1_criteria__CB_q25-3B__JG_q3G.json |
| | python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_madhu_Bhai_v2.py --input_file /home/mshahidul/LLM_guard/results/madhu_bhai/madhu_bhai_dataset/HB100_DA__SQ_Q25-3B__SA_l32-3B__CB_q25-3B__JG_q3G.json |
| | python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_madhu_Bhai_v2.py --input_file /home/mshahidul/LLM_guard/results/madhu_bhai/madhu_bhai_dataset/HB100_DA__SQ_Q25-3B__Team4_criteria__CB_q25-3B__JG_q3G.json |
| | python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_madhu_Bhai_v2.py --input_file /home/mshahidul/LLM_guard/results/madhu_bhai/madhu_bhai_dataset/HB100_DA__SQ_Q25-3B__Team2_criteria__CB_q25-3B__JG_q3G.json |
| | python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_madhu_Bhai_v2.py --input_file /home/mshahidul/LLM_guard/results/madhu_bhai/madhu_bhai_dataset/HB100_DA__SQ_Q25-3B__Team3_criteria__CB_q25-3B__JG_q3G.json |
| | python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_madhu_Bhai_v2.py --input_file /home/mshahidul/LLM_guard/results/madhu_bhai/madhu_bhai_dataset/HB100_DA__SQ_Q25-3B__SA_q3-4B__CB_q25-3B__JG_q3G.json |
| | python /home/mshahidul/readctrl/code/readability_control.py |
| |
|
| |
|
| |
|
| |
|
| | python /home/mshahidul/LLM_guard/RL/grpo.py |
| |
|
| |
|
| | python /home/mshahidul/LLM_guard/RL/grpo_reward_qwen_guard.py |
| | python /home/mshahidul/LLM_guard/code/auto/inference_qwen3-32B_best_ans_selection_v4_grpo.py --input_file /home/mshahidul/LLM_guard/data/sub_questions_answers_combined_diff_models.json --save_folder /home/mshahidul/LLM_guard/results/new_grpo_v6/ |
| | python /home/mshahidul/LLM_guard/code/auto/inference_qwen3-32B_ans_combiner_v2.py --save_folder /home/mshahidul/LLM_guard/results/new_grpo_v6/ |
| | python /home/mshahidul/LLM_guard/code/auto/inference_qwen3-32B_ques_ans_eval_v3.py --save_folder /home/mshahidul/LLM_guard/results/new_grpo_v6/ |
| | python /home/mshahidul/LLM_guard/code/readability_control.py |
| |
|
| |
|
| | python /home/mshahidul/LLM_guard/RL/grpo_reward_qwen_guard.py |
| |
|
| | python /home/mshahidul/LLM_guard/code/qwen3-32B.py |
| | python /home/mshahidul/LLM_guard/code/readability_control.py |
| |
|
| |
|
| |
|
| | question--> subquestions --> multiple model answering --> best_ans_selection --> combiner --> evaluation |
| |
|
| | question--> subquestions --> single model answering --> combiner --> evaluation |
| |
|
| |
|
| |
|
| | python /home/mshahidul/LLM_guard/code/auto/inference_qwen3-32B_best_ans_selection_v5_grpo.py --input_file /home/mshahidul/LLM_guard/data/sub_questions_answers_combined_diff_models.json --save_folder /home/mshahidul/LLM_guard/results/new_grpo_v6/ |
| |
|
| | python /home/mshahidul/LLM_guard/code/data_pre/unsafe_data_generation_inf.py |
| |
|
| | python /home/mshahidul/LLM_guard/code/Lllama31_8B.py |
| |
|
| | python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_v4.py --input_file /home/mshahidul/LLM_guard/results_general_domain/direct_answer_filtered/llama-3.1-8B_direct_temp0.3.json |
| | python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_v4.py --input_file /home/mshahidul/LLM_guard/results_general_domain/direct_answer_filtered/phi-4_direct_temp0.3.json |
| | python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_v4.py --input_file /home/mshahidul/LLM_guard/results_general_domain/direct_answer_filtered/qwen3-14B_direct_temp0.3.json |
| |
|
| | python /home/mshahidul/LLM_guard/code/inference/phi-4_infV2.py |
| | python /home/mshahidul/LLM_guard/code/inference/qwen3-14_inf_v2.py |
| | python /home/mshahidul/LLM_guard/code/readability_control.py |
| |
|
| |
|
| |
|
| |
|
| | ssh -R 80:localhost:8000 ssh.localhost.run |
| | https://chatgpt.com/c/6904577d-3758-832f-9ffd-b1bc52df4058 |
| |
|
| |
|
| | CUDA_VISIBLE_DEVICES=1,2,3,4 python -m vllm.entrypoints.openai.api_server --model Qwen/Qwen2.5-32B-Instruct-AWQ --tensor-parallel-size 4 --port 8000 --dtype auto --max-model-len 8192 --gpu-memory-utilization 0.9 |
| |
|
| | CUDA_VISIBLE_DEVICES=7 ~/llama.cpp/build/bin/llama-server -m /home/mshahidul/model/gpt20b/gpt-oss-20b-F16.gguf --host 0.0.0.0 --port 8000 --jinja -ngl 80 --threads -1 --ctx-size 16384 --temp 0.4 --top-p 0.9 --top-k 50 |
| |
|
| | CUDA_VISIBLE_DEVICES=7 ~/llama.cpp/build/bin/llama-server -m /home/mshahidul/model/gpt20b/gpt-oss-20b-F16.gguf --host 0.0.0.0 --port 8000 --jinja -ngl 80 --threads -1 --ctx-size 16384 --temp 0.4 --top-p 0.9 --top-k 50 |
| |
|
| | ngrok http --domain=adequate-gorilla-tough.ngrok-free.app 8000 |
| |
|
| | CUDA_VISIBLE_DEVICES=0,1,2,3,4 \ |
| | python -m vllm.entrypoints.openai.api_server \ |
| | --model Qwen/Qwen2.5-32B-Instruct-AWQ \ |
| | --tensor-parallel-size 5 \ |
| | --port 8000 \ |
| | --dtype auto \ |
| | --max-model-len 8192 \ |
| | --max-num-batched-tokens 8192 \ |
| | --gpu-memory-utilization 0.9 |
| |
|
| |
|
| | CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --model Qwen/Qwen2.5-0.5B-Instruct --tensor-parallel-size 1 --port 8001 --dtype auto --max-model-len 4192 --gpu-memory-utilization 0.85 |
| |
|
| |
|
| | CUDA_VISIBLE_DEVICES=2 python -m vllm.entrypoints.openai.api_server --model Qwen/Qwen2.5-3B-Instruct --tensor-parallel-size 1 --port 8002 --dtype auto --max-model-len 4192 --gpu-memory-utilization 0.85 |
| |
|
| |
|
| | CUDA_VISIBLE_DEVICES=3 python -m vllm.entrypoints.openai.api_server \ |
| | --model meta-llama/Meta-Llama-3.1-8B-Instruct \ |
| | --max-model-len 2048 \ |
| | --gpu-memory-utilization 0.95 \ |
| | --port 8003 --dtype auto |
| |
|
| | CUDA_VISIBLE_DEVICES=4 python -m vllm.entrypoints.openai.api_server \ |
| | --model Qwen/Qwen3-4B-Instruct-2507 \ |
| | --max-model-len 2048 \ |
| | --tensor-parallel-size 1 |
| |
|
| | CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server \ |
| | --model /home/mshahidul/llama_guard_model/inference_model/Qwen2.5-7B_instruct_finetuned_fp16 \ |
| | --port 8004 --dtype auto --tensor-parallel-size 1 --max-model-len 2048 \ |
| | --chat-template /home/mshahidul/LLM_guard/RL/custom_grpo_trainer/inference/qwen2.5.jinja |
| |
|
| |
|
| |
|
| | CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=2 python -m vllm.entrypoints.openai.api_server \ |
| | --model /home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-support-check-8b_ctx_v2-bf16 \ |
| | --port 8002 \ |
| | --dtype auto \ |
| | --tensor-parallel-size 1 \ |
| | --max-model-len 2048 \ |
| | --gpu-memory-utilization 0.85 \ |
| | --chat-template /home/mshahidul/LLM_guard/RL/custom_grpo_trainer/inference/qwen2.5.jinja |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=2 python -m vllm.entrypoints.openai.api_server \ |
| | --model Qwen/Qwen3-8B \ |
| | --served-model-name chatbot \ |
| | --max-model-len 8192 \ |
| | --tensor-parallel-size 1 \ |
| | --port 8004 \ |
| | --dtype auto \ |
| | --trust_remote_code True \ |
| | --enable-auto-tool-choice --tool-call-parser hermes --reasoning-parser deepseek_r1 |
| |
|
| |
|
| |
|
| | curl https://adequate-gorilla-tough.ngrok-free.app/v1/chat/completions \ |
| | -H "Content-Type: application/json" \ |
| | -d '{ |
| | "model": "chatbot", |
| | "messages": [ |
| | {"role": "user", "content": "Hello! Say hi in one sentence."} |
| | ], |
| | "temperature": 0.7 |
| | }' |
| |
|
| | python /home/mshahidul/readctrl/code/finetune-inference/convert_fp16.py \ |
| | --model_path /home/mshahidul/readctrl_model/qwen3-32B_subclaims-attribution_resonability_check_8kCtx_v1 \ |
| | --save_path /home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-attribution_resonability_check_8kCtx_v1_BF16_merged \ |
| | --cuda_device 2 |
| |
|
| |
|
| | python /home/mshahidul/readctrl/code/finetune-inference/mistral_3.1_24B.py |
| | python /home/mshahidul/readctrl/code/readability_control.py |
| |
|
| | Qwen/Qwen3-30B-A3B |
| | python /home/mshahidul/readctrl/code/finetune-inference/subclaim_support/subclaim_support_cal_tesing_v4.py --start_index 0 --end_index 100 |
| |
|
| |
|
| | python /home/mshahidul/readctrl/code/finetune-inference/convert_fp16.py --model_path /home/mshahidul/readctrl_model/qwen3-32B_subclaims-extraction-8b_ctx --save_path /home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-extraction-8b_ctx_fp16 |
| |
|
| | python /home/mshahidul/readctrl/code/finetune-inference/inference_extract_subclaims_vllm.py --input_file /home/mshahidul/readctrl/data/processed_test_raw_data/multiclinsum_test_en.json --start 3000 |
| |
|
| | CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES="2" PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True VLLM_USE_MODELSCOPE=True vllm serve swift/Qwen3-30B-A3B-AWQ --gpu-memory-utilization 0.9 --max-model-len 32768 --max-num-seqs 64 --served-model-name swift/Qwen3-30B-A3B-AWQ --host 127.0.0.1 --port 8004 |
| |
|
| |
|
| | CUDA_VISIBLE_DEVICES="2" vllm serve Qwen/Qwen3-14B-AWQ --gpu-memory-utilization 0.9 --max-model-len 32768 --max-num-seqs 64 --served-model-name Qwen/Qwen3-14B-AWQ --host 127.0.0.1 --port 8004 |
| |
|
| |
|
| |
|
| | CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES="2" \ |
| | PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \ |
| | VLLM_USE_MODELSCOPE=True \ |
| | vllm serve swift/Qwen3-30B-A3B-AWQ \ |
| | --gpu-memory-utilization 0.5 \ |
| | --max-model-len 8192 \ |
| | --max-num-seqs 64 \ |
| | --served-model-name Qwen3-30B-A3B-AWQ \ |
| | --port 8004 |
| |
|
| | |
| |
|
| | |
| | export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True |
| | CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES="2" \ |
| | vllm serve /home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-support-check-8b_ctx_v2-bf16 \ |
| | --port 8004 \ |
| | --quantization bitsandbytes \ |
| | --load-format bitsandbytes \ |
| | --max-model-len 8192 \ |
| | --gpu-memory-utilization 0.90 \ |
| | --trust-remote-code \ |
| | --served-model-name "qwen3-32b-readctrl" |
| |
|
| | CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES="1" vllm serve google/translategemma-27b-it \ |
| | --port 8006 \ |
| | --served-model-name translate_gemma \ |
| | --dtype bfloat16 \ |
| | --max-model-len 8192 \ |
| | --gpu-memory-utilization 0.90 |
| |
|
| | echo $OPENAI_API_KEY |
| |
|
| |
|
| | python /home/mshahidul/readctrl/code/translation/translate_multiclinsum_all_lang_judge_strict_v2.py --source-lang en --target-lang bn |
| | python /home/mshahidul/readctrl/code/translation/translate_multiclinsum_all_lang_judge_strict_v2.py --source-lang en --target-lang zh |
| | python /home/mshahidul/readctrl/code/translation/translate_multiclinsum_all_lang_judge_strict_v2.py --source-lang en --target-lang vi |
| | python /home/mshahidul/readctrl/code/translation/translate_multiclinsum_all_lang_judge_strict_v2.py --source-lang en --target-lang hi |
| |
|
| |
|
| | CUDA_VISIBLE_DEVICES=0 \ |
| | vllm serve \ |
| | /home/mshahidul/readctrl_model/gguf/qwen3-8B_subclaims-verifier_lora_nonreasoning_gguf_8bit \ |
| | --port 8085 \ |
| | --host 0.0.0.0 \ |
| | --load-format gguf \ |
| | --quantization gguf \ |
| | --tensor-parallel-size 1 \ |
| | --gpu-memory-utilization 0.55 |
| |
|
| | |
| | CUDA_VISIBLE_DEVICES=3 \ |
| | python3 -m vllm.entrypoints.openai.api_server \ |
| | --model /home/mshahidul/readctrl_model/support_checking_vllm \ |
| | --served-model-name support_check \ |
| | --dtype half \ |
| | --enable-prefix-caching \ |
| | --gpu-memory-utilization 0.90 \ |
| | --max-model-len 4096 \ |
| | --port 8089 |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | export TORCH_COMPILE_CACHE_SIZE_LIMIT=512 |
| | CUDA_VISIBLE_DEVICES=6 \ |
| | python -m vllm.entrypoints.openai.api_server \ |
| | --model unsloth/Meta-Llama-3.1-8B-Instruct \ |
| | --served-model-name dspy \ |
| | --dtype half \ |
| | --max-model-len 16384 \ |
| | --gpu-memory-utilization 0.90 \ |
| | --enable-prefix-caching \ |
| | --max-num-seqs 256 \ |
| | --port 8036 |
| |
|
| |
|
| |
|
| | export VLLM_USE_V1=0 |
| | CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=4 \ |
| | python -m vllm.entrypoints.openai.api_server \ |
| | --model unsloth/gemma-3-12b-it-bnb-4bit \ |
| | --served-model-name dspy \ |
| | --dtype auto \ |
| | --load-format bitsandbytes \ |
| | --gpu-memory-utilization 0.85 \ |
| | --max-model-len 8192 \ |
| | --port 8034 \ |
| | --trust-remote-code |
| |
|
| |
|
| | CUDA_VISIBLE_DEVICES=2 python -m vllm.entrypoints.openai.api_server \ |
| | --model /home/mshahidul/readctrl_model/support_checking_vllm/sc2_fp16 \ |
| | --served-model-name sc \ |
| | --port 3090 \ |
| | --dtype bfloat16 \ |
| | --gpu-memory-utilization 0.9 \ |
| | --max-num-seqs 256 \ |
| | --max-num-batched-tokens 8192 \ |
| | --enable-prefix-caching \ |
| | --disable-log-requests \ |
| | --trust-remote-code |
| |
|
| | |
| |
|
| |
|
| | CUDA_VISIBLE_DEVICES=2 python -m vllm.entrypoints.openai.api_server \ |
| | --model /home/mshahidul/readctrl_model/support_checking_vllm/qwen3-4b \ |
| | --served-model-name sc \ |
| | --port 3090 \ |
| | --dtype bfloat16 \ |
| | --gpu-memory-utilization 0.9 \ |
| | --max-num-seqs 256 \ |
| | --max-num-batched-tokens 8192 \ |
| | --enable-prefix-caching \ |
| | --disable-log-requests \ |
| | --trust-remote-code |
| |
|
| | CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ |
| | --model hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4 \ |
| | --served-model-name dspy \ |
| | --quantization awq \ |
| | --port 8040 \ |
| | --max-model-len 16384 \ |
| | --gpu-memory-utilization 0.90 |