File size: 1,931 Bytes
0c51b93 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
# generate self-play script
python generate_conversations.py \
--eval-script sotopia_pi_self_play_script.sh \
--env-file used_env.json \
--experiment-name selftrain-round-2 \
--tag qwen-sft-qwen-sft-3-26-v1 \
--batch-size 20 \
--agent1-model custom/sotopia-sft-1@http://localhost:8005/v1 \
--agent2-model custom/sotopia-sft-2@http://localhost:8006/v1 \
--push-to-db True
# generate sotopia experiment script
python generate_conversations.py \
--eval-script sotopia_all_eval_script.sh \
--env-file used_env.json \
--experiment-name sotopia_env \
--tag xx \
--batch-size 20 \
--agent1-model agent1-model \
--agent2-model agent2-model \
--push-to-db True
# host models
# sotopia-sft-1
CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \
--model Qwen/Qwen2.5-7B-Instruct \
--port 8005 \
--chat-template /root/sotopia-rl/Untitled/qwen2.5-7b.jinja \
--enable-lora \
--lora-modules sotopia-sft-1=/root/sotopia-rl/Untitled/.cache/final_sft/checkpoint-1000/ \
--served-model-name xx
# sotopia-sft-2
CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server \
--model Qwen/Qwen2.5-7B-Instruct \
--port 8006 \
--chat-template /root/sotopia-rl/Untitled/qwen2.5-7b.jinja \
--enable-lora \
--lora-modules sotopia-sft-2=/root/sotopia-rl/Untitled/.cache/final_sft/checkpoint-1000/ \
--served-model-name xx
## generate sft for ppo
python generate_sft_from_episodes.py \
--data_dir ../../data \
--utterances_output_subdir sotopia_pi_round1_qwen_utterances \
--episodes_file sotopia_pi_round1_qwen_episodes.jsonl \
--sft_output_file sotopia_pi_round1_qwen_sft_all.json
## generate sft for sotopia-pi
python generate_sft_from_episodes.py \
--data_dir ../../data \
--utterances_output_subdir sotopia_pi_round1_qwen_utterances_filtered \
--episodes_file sotopia_pi_round1_qwen_episodes_filtered.jsonl \
--sft_output_file sotopia_pi_round1_qwen_sft_pi.json
|