# generate self-play script python generate_conversations.py \ --eval-script sotopia_pi_self_play_script.sh \ --env-file used_env.json \ --experiment-name selftrain-round-2 \ --tag qwen-sft-qwen-sft-3-26-v1 \ --batch-size 20 \ --agent1-model custom/sotopia-sft-1@http://localhost:8005/v1 \ --agent2-model custom/sotopia-sft-2@http://localhost:8006/v1 \ --push-to-db True # generate sotopia experiment script python generate_conversations.py \ --eval-script sotopia_all_eval_script.sh \ --env-file used_env.json \ --experiment-name sotopia_env \ --tag xx \ --batch-size 20 \ --agent1-model agent1-model \ --agent2-model agent2-model \ --push-to-db True # host models # sotopia-sft-1 CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ --model Qwen/Qwen2.5-7B-Instruct \ --port 8005 \ --chat-template /root/sotopia-rl/Untitled/qwen2.5-7b.jinja \ --enable-lora \ --lora-modules sotopia-sft-1=/root/sotopia-rl/Untitled/.cache/final_sft/checkpoint-1000/ \ --served-model-name xx # sotopia-sft-2 CUDA_VISIBLE_DEVICES=1 python -m vllm.entrypoints.openai.api_server \ --model Qwen/Qwen2.5-7B-Instruct \ --port 8006 \ --chat-template /root/sotopia-rl/Untitled/qwen2.5-7b.jinja \ --enable-lora \ --lora-modules sotopia-sft-2=/root/sotopia-rl/Untitled/.cache/final_sft/checkpoint-1000/ \ --served-model-name xx ## generate sft for ppo python generate_sft_from_episodes.py \ --data_dir ../../data \ --utterances_output_subdir sotopia_pi_round1_qwen_utterances \ --episodes_file sotopia_pi_round1_qwen_episodes.jsonl \ --sft_output_file sotopia_pi_round1_qwen_sft_all.json ## generate sft for sotopia-pi python generate_sft_from_episodes.py \ --data_dir ../../data \ --utterances_output_subdir sotopia_pi_round1_qwen_utterances_filtered \ --episodes_file sotopia_pi_round1_qwen_episodes_filtered.jsonl \ --sft_output_file sotopia_pi_round1_qwen_sft_pi.json