Add files using upload-large-folder tool

1db7196 verified 27 days ago

14.4 kB

	python /home/mshahidul/LLM_guard/code/auto/inference_qwen3-32B_best_ans_selection_v2.py
	python /home/mshahidul/LLM_guard/code/auto/inference_qwen3-32B_ans_combiner_v2.py


	python /home/mshahidul/LLM_guard/code/auto/inference_qwen3-32B_ans_combiner_v2.py
	python /home/mshahidul/LLM_guard/code/auto/inference_qwen3-32B_ques_ans_eval_v3.py
	python /home/mshahidul/LLM_guard/code/readability_control.py




	save_folder="/home/mshahidul/LLM_guard/results/new_v2/best_ans_selection(more_strict)_v3"
	save_folder="/home/mshahidul/LLM_guard/results/new_v2/answers_combiner(more_strict)_v4"
	save_folder="/home/mshahidul/LLM_guard/results/new_v2/sub_ques_ans_combined_different_models_ans_evaluation(more_strict)_v3"


	python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_madhu_Bhai_v2.py --input_file /home/mshahidul/LLM_guard/results/madhu_bhai/madhu_bhai_dataset/HB100_DA__SQ_Q25-3B__SA_p4m__CB_q25-3B__JG_q3G.json
	python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_madhu_Bhai_v2.py --input_file /home/mshahidul/LLM_guard/results/madhu_bhai/madhu_bhai_dataset/HB100_DA__SQ_Q25-3B__SA_q25-3B__CB_q25-3B__JG_q3G.json
	python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_madhu_Bhai_v2.py --input_file /home/mshahidul/LLM_guard/results/madhu_bhai/madhu_bhai_dataset/HB100_DA__SQ_Q25-3B__Team1_criteria__CB_q25-3B__JG_q3G.json
	python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_madhu_Bhai_v2.py --input_file /home/mshahidul/LLM_guard/results/madhu_bhai/madhu_bhai_dataset/HB100_DA__SQ_Q25-3B__SA_l32-3B__CB_q25-3B__JG_q3G.json
	python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_madhu_Bhai_v2.py --input_file /home/mshahidul/LLM_guard/results/madhu_bhai/madhu_bhai_dataset/HB100_DA__SQ_Q25-3B__Team4_criteria__CB_q25-3B__JG_q3G.json
	python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_madhu_Bhai_v2.py --input_file /home/mshahidul/LLM_guard/results/madhu_bhai/madhu_bhai_dataset/HB100_DA__SQ_Q25-3B__Team2_criteria__CB_q25-3B__JG_q3G.json
	python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_madhu_Bhai_v2.py --input_file /home/mshahidul/LLM_guard/results/madhu_bhai/madhu_bhai_dataset/HB100_DA__SQ_Q25-3B__Team3_criteria__CB_q25-3B__JG_q3G.json
	python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_madhu_Bhai_v2.py --input_file /home/mshahidul/LLM_guard/results/madhu_bhai/madhu_bhai_dataset/HB100_DA__SQ_Q25-3B__SA_q3-4B__CB_q25-3B__JG_q3G.json
	python /home/mshahidul/readctrl/code/readability_control.py




	python /home/mshahidul/LLM_guard/RL/grpo.py


	python /home/mshahidul/LLM_guard/RL/grpo_reward_qwen_guard.py
	python /home/mshahidul/LLM_guard/code/auto/inference_qwen3-32B_best_ans_selection_v4_grpo.py --input_file /home/mshahidul/LLM_guard/data/sub_questions_answers_combined_diff_models.json --save_folder /home/mshahidul/LLM_guard/results/new_grpo_v6/
	python /home/mshahidul/LLM_guard/code/auto/inference_qwen3-32B_ans_combiner_v2.py --save_folder /home/mshahidul/LLM_guard/results/new_grpo_v6/
	python /home/mshahidul/LLM_guard/code/auto/inference_qwen3-32B_ques_ans_eval_v3.py --save_folder /home/mshahidul/LLM_guard/results/new_grpo_v6/
	python /home/mshahidul/LLM_guard/code/readability_control.py


	python /home/mshahidul/LLM_guard/RL/grpo_reward_qwen_guard.py

	python /home/mshahidul/LLM_guard/code/qwen3-32B.py
	python /home/mshahidul/LLM_guard/code/readability_control.py



	question--> subquestions --> multiple model answering --> best_ans_selection --> combiner --> evaluation

	question--> subquestions --> single model answering --> combiner --> evaluation



	python /home/mshahidul/LLM_guard/code/auto/inference_qwen3-32B_best_ans_selection_v5_grpo.py --input_file /home/mshahidul/LLM_guard/data/sub_questions_answers_combined_diff_models.json --save_folder /home/mshahidul/LLM_guard/results/new_grpo_v6/

	python /home/mshahidul/LLM_guard/code/data_pre/unsafe_data_generation_inf.py

	python /home/mshahidul/LLM_guard/code/Lllama31_8B.py

	python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_v4.py --input_file /home/mshahidul/LLM_guard/results_general_domain/direct_answer_filtered/llama-3.1-8B_direct_temp0.3.json
	python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_v4.py --input_file /home/mshahidul/LLM_guard/results_general_domain/direct_answer_filtered/phi-4_direct_temp0.3.json
	python /home/mshahidul/LLM_guard/code/evaluation/inference_qwen3-32B_ques_ans_eval_v4.py --input_file /home/mshahidul/LLM_guard/results_general_domain/direct_answer_filtered/qwen3-14B_direct_temp0.3.json

	python /home/mshahidul/LLM_guard/code/inference/phi-4_infV2.py
	python /home/mshahidul/LLM_guard/code/inference/qwen3-14_inf_v2.py
	python /home/mshahidul/LLM_guard/code/readability_control.py




	ssh -R 80:localhost:8000 ssh.localhost.run
	https://chatgpt.com/c/6904577d-3758-832f-9ffd-b1bc52df4058


	CUDA_VISIBLE_DEVICES=1,2,3,4 python -m vllm.entrypoints.openai.api_server --model Qwen/Qwen2.5-32B-Instruct-AWQ --tensor-parallel-size 4 --port 8000 --dtype auto --max-model-len 8192 --gpu-memory-utilization 0.9

	CUDA_VISIBLE_DEVICES=7 ~/llama.cpp/build/bin/llama-server -m /home/mshahidul/model/gpt20b/gpt-oss-20b-F16.gguf --host 0.0.0.0 --port 8000 --jinja -ngl 80 --threads -1 --ctx-size 16384 --temp 0.4 --top-p 0.9 --top-k 50

	CUDA_VISIBLE_DEVICES=7 ~/llama.cpp/build/bin/llama-server -m /home/mshahidul/model/gpt20b/gpt-oss-20b-F16.gguf --host 0.0.0.0 --port 8000 --jinja -ngl 80 --threads -1 --ctx-size 16384 --temp 0.4 --top-p 0.9 --top-k 50

	ngrok http --domain=adequate-gorilla-tough.ngrok-free.app 8000

	CUDA_VISIBLE_DEVICES=0,1,2,3,4 \
	python -m vllm.entrypoints.openai.api_server \
	--model Qwen/Qwen2.5-32B-Instruct-AWQ \
	--tensor-parallel-size 5 \
	--port 8000 \
	--dtype auto \
	--max-model-len 8192 \
	--max-num-batched-tokens 8192 \
	--gpu-memory-utilization 0.9


	CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server --model Qwen/Qwen2.5-0.5B-Instruct --tensor-parallel-size 1 --port 8001 --dtype auto --max-model-len 4192 --gpu-memory-utilization 0.85


	CUDA_VISIBLE_DEVICES=2 python -m vllm.entrypoints.openai.api_server --model Qwen/Qwen2.5-3B-Instruct --tensor-parallel-size 1 --port 8002 --dtype auto --max-model-len 4192 --gpu-memory-utilization 0.85


	CUDA_VISIBLE_DEVICES=3 python -m vllm.entrypoints.openai.api_server \
	--model meta-llama/Meta-Llama-3.1-8B-Instruct \
	--max-model-len 2048 \
	--gpu-memory-utilization 0.95 \
	--port 8003 --dtype auto

	CUDA_VISIBLE_DEVICES=4 python -m vllm.entrypoints.openai.api_server \
	--model Qwen/Qwen3-4B-Instruct-2507 \
	--max-model-len 2048 \
	--tensor-parallel-size 1

	CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server \
	--model /home/mshahidul/llama_guard_model/inference_model/Qwen2.5-7B_instruct_finetuned_fp16 \
	--port 8004 --dtype auto --tensor-parallel-size 1 --max-model-len 2048 \
	--chat-template /home/mshahidul/LLM_guard/RL/custom_grpo_trainer/inference/qwen2.5.jinja



	CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=2 python -m vllm.entrypoints.openai.api_server \
	--model /home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-support-check-8b_ctx_v2-bf16 \
	--port 8002 \
	--dtype auto \
	--tensor-parallel-size 1 \
	--max-model-len 2048 \
	--gpu-memory-utilization 0.85 \
	--chat-template /home/mshahidul/LLM_guard/RL/custom_grpo_trainer/inference/qwen2.5.jinja


	#
	# For subclaim support check model: /home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-support-check-8b_ctx_v2-bf16
	# For reasoning model: Qwen/Qwen3-30B-A3B-Instruct-2507
	# nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16
	CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=2 python -m vllm.entrypoints.openai.api_server \
	--model Qwen/Qwen3-8B \
	--served-model-name chatbot \
	--max-model-len 8192 \
	--tensor-parallel-size 1 \
	--port 8004 \
	--dtype auto \
	--trust_remote_code True \
	--enable-auto-tool-choice --tool-call-parser hermes --reasoning-parser deepseek_r1



	curl https://adequate-gorilla-tough.ngrok-free.app/v1/chat/completions \
	-H "Content-Type: application/json" \
	-d '{
	"model": "chatbot",
	"messages": [
	{"role": "user", "content": "Hello! Say hi in one sentence."}
	],
	"temperature": 0.7
	}'

	python /home/mshahidul/readctrl/code/finetune-inference/convert_fp16.py \
	--model_path /home/mshahidul/readctrl_model/qwen3-32B_subclaims-attribution_resonability_check_8kCtx_v1 \
	--save_path /home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-attribution_resonability_check_8kCtx_v1_BF16_merged \
	--cuda_device 2


	python /home/mshahidul/readctrl/code/finetune-inference/mistral_3.1_24B.py
	python /home/mshahidul/readctrl/code/readability_control.py

	Qwen/Qwen3-30B-A3B
	python /home/mshahidul/readctrl/code/finetune-inference/subclaim_support/subclaim_support_cal_tesing_v4.py --start_index 0 --end_index 100


	python /home/mshahidul/readctrl/code/finetune-inference/convert_fp16.py --model_path /home/mshahidul/readctrl_model/qwen3-32B_subclaims-extraction-8b_ctx --save_path /home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-extraction-8b_ctx_fp16

	python /home/mshahidul/readctrl/code/finetune-inference/inference_extract_subclaims_vllm.py --input_file /home/mshahidul/readctrl/data/processed_test_raw_data/multiclinsum_test_en.json --start 3000

	CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES="2" PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True VLLM_USE_MODELSCOPE=True vllm serve swift/Qwen3-30B-A3B-AWQ --gpu-memory-utilization 0.9 --max-model-len 32768 --max-num-seqs 64 --served-model-name swift/Qwen3-30B-A3B-AWQ --host 127.0.0.1 --port 8004


	CUDA_VISIBLE_DEVICES="2" vllm serve Qwen/Qwen3-14B-AWQ --gpu-memory-utilization 0.9 --max-model-len 32768 --max-num-seqs 64 --served-model-name Qwen/Qwen3-14B-AWQ --host 127.0.0.1 --port 8004



	CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES="2" \
	PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \
	VLLM_USE_MODELSCOPE=True \
	vllm serve swift/Qwen3-30B-A3B-AWQ \
	--gpu-memory-utilization 0.5 \
	--max-model-len 8192 \
	--max-num-seqs 64 \
	--served-model-name Qwen3-30B-A3B-AWQ \
	--port 8004

	#!/bin/bash

	# Optimization: Prevent memory fragmentation
	export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
	CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES="2" \
	vllm serve /home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-support-check-8b_ctx_v2-bf16 \
	--port 8004 \
	--quantization bitsandbytes \
	--load-format bitsandbytes \
	--max-model-len 8192 \
	--gpu-memory-utilization 0.90 \
	--trust-remote-code \
	--served-model-name "qwen3-32b-readctrl"

	CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES="1" vllm serve google/translategemma-27b-it \
	--port 8006 \
	--served-model-name translate_gemma \
	--dtype bfloat16 \
	--max-model-len 8192 \
	--gpu-memory-utilization 0.90

	echo $OPENAI_API_KEY


	python /home/mshahidul/readctrl/code/translation/translate_multiclinsum_all_lang_judge_strict_v2.py --source-lang en --target-lang bn
	python /home/mshahidul/readctrl/code/translation/translate_multiclinsum_all_lang_judge_strict_v2.py --source-lang en --target-lang zh
	python /home/mshahidul/readctrl/code/translation/translate_multiclinsum_all_lang_judge_strict_v2.py --source-lang en --target-lang vi
	python /home/mshahidul/readctrl/code/translation/translate_multiclinsum_all_lang_judge_strict_v2.py --source-lang en --target-lang hi


	CUDA_VISIBLE_DEVICES=0 \
	vllm serve \
	/home/mshahidul/readctrl_model/gguf/qwen3-8B_subclaims-verifier_lora_nonreasoning_gguf_8bit \
	--port 8085 \
	--host 0.0.0.0 \
	--load-format gguf \
	--quantization gguf \
	--tensor-parallel-size 1 \
	--gpu-memory-utilization 0.55

	# 8086, 8087, 8088, 8089
	CUDA_VISIBLE_DEVICES=3 \
	python3 -m vllm.entrypoints.openai.api_server \
	--model /home/mshahidul/readctrl_model/support_checking_vllm \
	--served-model-name support_check \
	--dtype half \
	--enable-prefix-caching \
	--gpu-memory-utilization 0.90 \
	--max-model-len 4096 \
	--port 8089

	# unsloth/Meta-Llama-3.1-8B-Instruct
	# unsloth/gpt-oss-20b
	# unsloth/gemma-3-12b-it
	# unsloth/Qwen2.5-7B-Instruct
	# unsloth/phi-4
	# CUDA_DEVICE_ORDER=PCI_BUS_ID

	export TORCH_COMPILE_CACHE_SIZE_LIMIT=512
	CUDA_VISIBLE_DEVICES=6 \
	python -m vllm.entrypoints.openai.api_server \
	--model unsloth/Meta-Llama-3.1-8B-Instruct \
	--served-model-name dspy \
	--dtype half \
	--max-model-len 16384 \
	--gpu-memory-utilization 0.90 \
	--enable-prefix-caching \
	--max-num-seqs 256 \
	--port 8036



	export VLLM_USE_V1=0
	CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=4 \
	python -m vllm.entrypoints.openai.api_server \
	--model unsloth/gemma-3-12b-it-bnb-4bit \
	--served-model-name dspy \
	--dtype auto \
	--load-format bitsandbytes \
	--gpu-memory-utilization 0.85 \
	--max-model-len 8192 \
	--port 8034 \
	--trust-remote-code


	CUDA_VISIBLE_DEVICES=2 python -m vllm.entrypoints.openai.api_server \
	--model /home/mshahidul/readctrl_model/support_checking_vllm/sc2_fp16 \
	--served-model-name sc \
	--port 3090 \
	--dtype bfloat16 \
	--gpu-memory-utilization 0.9 \
	--max-num-seqs 256 \
	--max-num-batched-tokens 8192 \
	--enable-prefix-caching \
	--disable-log-requests \
	--trust-remote-code

	# Increase the limit to 128 or higher


	CUDA_VISIBLE_DEVICES=2 python -m vllm.entrypoints.openai.api_server \
	--model /home/mshahidul/readctrl_model/support_checking_vllm/qwen3-4b \
	--served-model-name sc \
	--port 3090 \
	--dtype bfloat16 \
	--gpu-memory-utilization 0.9 \
	--max-num-seqs 256 \
	--max-num-batched-tokens 8192 \
	--enable-prefix-caching \
	--disable-log-requests \
	--trust-remote-code

	CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \
	--model hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4 \
	--served-model-name dspy \
	--quantization awq \
	--port 8040 \
	--max-model-len 16384 \
	--gpu-memory-utilization 0.90