LCZZZZ's picture
Upload MemGen code and data
e34b94f verified
# method name
method: latmem
model:
# base llm
reasoner_model_name: Qwen/Qwen2.5-1.5B-Instruct
# load trained model
load_model_path: null
# max prompt/inference augmentation num
max_prompt_aug_num: null
max_inference_aug_num: 5
# processor configs
weaver:
weaver_model_name: Qwen/Qwen2.5-1.5B-Instruct
prompt_latents_len: 8
inference_latents_len: 8
use_peft: True
peft_config:
r: 16
lora_alpha: 32
target_modules: ["q_proj", "v_proj"]
lora_dropout: 0.1
bias: "none"
task_type: "CAUSAL_LM"
# trigger model configs
trigger:
trigger_model_name: Qwen/Qwen2.5-0.5B-Instruct
use_peft: True
peft_config:
r: 16
lora_alpha: 32
target_modules: ["q_proj", "v_proj"]
lora_dropout: 0.1
bias: "none"
task_type: "CAUSAL_LM"
datasets:
kodcode:
mode: sft
sft:
cache_path: dataset/kodcode_sft
train_ratio: 0.7
valid_ratio: 0.1
test_ratio: 0.2
grpo:
cache_path: dataset/kodcode_grpo
train_ratio: 0.7
valid_ratio: 0.1
test_ratio: 0.2
# training/evaluation configs
run:
seed: 42
use_wandb: True
# route
mode: train
train_weaver: True
train_weaver_method: sft # sft or grpo
train_trigger: False
train_trigger_method: grpo
# processor training configs
weaver:
# sft configs
sft:
max_epochs: 2
batch_size: 4
grad_accum_steps: 1
# optimizer configs
optim: adamw_torch
schedular: cosine
warmup_ratio: 0.1
lr: 1e-5
# logging
logging_strategy: steps
logging_steps: 1
eval_strategy: epoch
eval_steps: 100
save_strategy: epoch
save_steps: 100
assistant_only_loss: False # used only in conversational dataset
max_length: 1024 # max sequence length
# grpo configs
grpo:
max_epochs: 1
batch_size: 8
num_generations: 8
num_iterations: 1
grad_accum_steps: 1
beta: 0.0
loss_type: bnpo
# optimizer configs
optim: adamw_torch
schedular: cosine
warmup_ratio: 0.1
lr: 1e-5
# duration
logging_strategy: steps
logging_steps: 1
eval_strategy: epoch
eval_steps: 100
save_strategy: epoch
save_steps: 100
# rewards
reward_funcs:
- name: accuracy
weight: 1
# trigger training configs
trigger:
grpo:
max_epochs: 2
batch_size: 8
num_generations: 8
num_iterations: 1
grad_accum_steps: 1
beta: 0.0
# optimizer configs
optim: adamw_torch
lr: 1e-5
schedular: cosine
warmup_ratio: 0.1
# duration
logging_strategy: steps
logging_steps: 1
eval_strategy: steps
eval_steps: 100
save_strategy: steps
save_steps: 100
# rewards
reward_funcs:
- name: accuracy
weight: 1.0
# generation config for GRPO training and evaluation
generation:
max_turns: 1
max_start_length: 1024 # Maximum length of the initial prompt.
max_prompt_length: 4096 # Maximum prompt length during multi-turn interactions (includes all conversation history across turns).
max_response_length: 1024
max_obs_length: 512
do_sample: False
temperature: 1.0
eval_batch_size: 8