LCZZZZ
/

model111

Model card Files Files and versions

model111 / scripts /run_mm_math_multi.sh

LCZZZZ's picture

Upload MemGen code and data

e34b94f verified 3 months ago

history blame contribute delete

1.47 kB

	#!/bin/bash

	# MemGen VIS Training Multi - MM Math
	# Based on launch.json configuration

	# Change to project directory
	cd /root/CVPR/MemGen

	# Set environment variables
	export DEBUG_MODE="true"
	export LOG_PATH="./test_output/debug_log_mm_math.txt"
	export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
	export MAIN_PROCESS_PORT="29507"
	export NCCL_DEBUG="WARN"
	export NCCL_IB_DISABLE="1"
	export NCCL_P2P_DISABLE="0"
	export NCCL_ASYNC_DISABLE="1"
	export TORCH_DISTRIBUTED_DEBUG="OFF"

	# Run with accelerate (using uv run to manage Python environment)
	uv run python -m accelerate.commands.launch \
	--num_processes=8 \
	--main_process_port=29507 \
	--config_file=configs/zero2.yaml \
	main.py \
	--cfg-path configs/latent_memory/mm_math.yaml \
	--options \
	model.reasoner_model_name "Qwen/Qwen2.5-VL-7B-Instruct" \
	model.weaver.weaver_model_name "Qwen/Qwen2.5-1.5B-Instruct" \
	model.trigger.trigger_model_name "null" \
	model.weaver.prompt_latents_len "8" \
	model.weaver.inference_latents_len "4" \
	model.max_prompt_aug_num "0" \
	model.max_inference_aug_num "3" \
	model.load_model_path "null" \
	run.mode "train" \
	run.train_weaver "True" \
	run.train_trigger "False" \
	run.train_weaver_method "grpo" \
	run.generation.do_sample "True" \
	run.generation.temperature "1.0" \
	run.generation.max_response_length "512" \
	run.output_dir "/root/CVPR/MemGen/test_output/mm_math" \
	datasets.mm_math.mode "grpo"