| # method name | |
| method: latmem | |
| model: | |
| # base llm | |
| reasoner_model_name: Qwen/Qwen2.5-1.5B-Instruct | |
| # load trained model | |
| load_model_path: null | |
| # max prompt/inference augmentation num | |
| max_prompt_aug_num: null | |
| max_inference_aug_num: 5 | |
| # processor configs | |
| weaver: | |
| weaver_model_name: Qwen/Qwen2.5-1.5B-Instruct | |
| prompt_latents_len: 8 | |
| inference_latents_len: 8 | |
| use_peft: True | |
| peft_config: | |
| r: 16 | |
| lora_alpha: 32 | |
| target_modules: ["q_proj", "v_proj"] | |
| lora_dropout: 0.1 | |
| bias: "none" | |
| task_type: "CAUSAL_LM" | |
| # trigger model configs | |
| trigger: | |
| trigger_model_name: Qwen/Qwen2.5-0.5B-Instruct | |
| use_peft: True | |
| peft_config: | |
| r: 16 | |
| lora_alpha: 32 | |
| target_modules: ["q_proj", "v_proj"] | |
| lora_dropout: 0.1 | |
| bias: "none" | |
| task_type: "CAUSAL_LM" | |
| datasets: | |
| kodcode: | |
| mode: sft | |
| sft: | |
| cache_path: dataset/kodcode_sft | |
| train_ratio: 0.7 | |
| valid_ratio: 0.1 | |
| test_ratio: 0.2 | |
| grpo: | |
| cache_path: dataset/kodcode_grpo | |
| train_ratio: 0.7 | |
| valid_ratio: 0.1 | |
| test_ratio: 0.2 | |
| # training/evaluation configs | |
| run: | |
| seed: 42 | |
| use_wandb: True | |
| # route | |
| mode: train | |
| train_weaver: True | |
| train_weaver_method: sft # sft or grpo | |
| train_trigger: False | |
| train_trigger_method: grpo | |
| # processor training configs | |
| weaver: | |
| # sft configs | |
| sft: | |
| max_epochs: 2 | |
| batch_size: 4 | |
| grad_accum_steps: 1 | |
| # optimizer configs | |
| optim: adamw_torch | |
| schedular: cosine | |
| warmup_ratio: 0.1 | |
| lr: 1e-5 | |
| # logging | |
| logging_strategy: steps | |
| logging_steps: 1 | |
| eval_strategy: epoch | |
| eval_steps: 100 | |
| save_strategy: epoch | |
| save_steps: 100 | |
| assistant_only_loss: False # used only in conversational dataset | |
| max_length: 1024 # max sequence length | |
| # grpo configs | |
| grpo: | |
| max_epochs: 1 | |
| batch_size: 8 | |
| num_generations: 8 | |
| num_iterations: 1 | |
| grad_accum_steps: 1 | |
| beta: 0.0 | |
| loss_type: bnpo | |
| # optimizer configs | |
| optim: adamw_torch | |
| schedular: cosine | |
| warmup_ratio: 0.1 | |
| lr: 1e-5 | |
| # duration | |
| logging_strategy: steps | |
| logging_steps: 1 | |
| eval_strategy: epoch | |
| eval_steps: 100 | |
| save_strategy: epoch | |
| save_steps: 100 | |
| # rewards | |
| reward_funcs: | |
| - name: accuracy | |
| weight: 1 | |
| # trigger training configs | |
| trigger: | |
| grpo: | |
| max_epochs: 2 | |
| batch_size: 8 | |
| num_generations: 8 | |
| num_iterations: 1 | |
| grad_accum_steps: 1 | |
| beta: 0.0 | |
| # optimizer configs | |
| optim: adamw_torch | |
| lr: 1e-5 | |
| schedular: cosine | |
| warmup_ratio: 0.1 | |
| # duration | |
| logging_strategy: steps | |
| logging_steps: 1 | |
| eval_strategy: steps | |
| eval_steps: 100 | |
| save_strategy: steps | |
| save_steps: 100 | |
| # rewards | |
| reward_funcs: | |
| - name: accuracy | |
| weight: 1.0 | |
| # generation config for GRPO training and evaluation | |
| generation: | |
| max_turns: 1 | |
| max_start_length: 1024 # Maximum length of the initial prompt. | |
| max_prompt_length: 4096 # Maximum prompt length during multi-turn interactions (includes all conversation history across turns). | |
| max_response_length: 1024 | |
| max_obs_length: 512 | |
| do_sample: False | |
| temperature: 1.0 | |
| eval_batch_size: 8 | |