TroyHow's picture
Upload 3 files
d89f77c verified
envs:
max_steps: 2
task: hotpotqa
data_path: ./datasets/data_sources/hotpotqa
sort_by_index: false
positions_processor: none
positions_processor_dict:
none:
_target_: envs.text_env.AbsolutePositionProcessor
absolute:
_target_: envs.text_env.AbsolutePositionProcessor
random:
_target_: envs.text_env.RandomPositionProcessor
max_chunks_count: 2000
relative:
_target_: envs.text_env.RelativePositionProcessor
step_size: 20
train_dataset:
_target_: envs.RetrievalHotPotQA
path: ${envs.data_path}
split: train
seed: ${seed}
test_dataset:
_target_: envs.RetrievalHotPotQA
path: ${envs.data_path}
split: eval
seed: ${seed}
env:
_target_: envs.qa_env.QAEnv
max_steps: ${envs.max_steps}
action_embed_length: ${max_action_length}
separator: ' [SEP] '
sort_by_index: ${envs.sort_by_index}
positions_processor: ${envs.positions_processor_dict.${envs.positions_processor}}
feedback_model:
_target_: rl.feedback.GroundTruthFeedback
penalize_extra_steps: false
dataset:
_target_: envs.QADatasetAdapter
dataset:
_target_: envs.RetrievalHotPotQA
path: ${envs.data_path}
split: train
seed: ${seed}
test_env:
_target_: envs.qa_env.QAEnv
max_steps: ${envs.env.max_steps}
max_action_length_in_memory: 256
action_embed_length: ${max_action_length}
separator: ${envs.env.separator}
sort_by_index: ${envs.sort_by_index}
positions_processor: ${envs.env.positions_processor}
feedback_model:
_target_: rl.feedback.GroundTruthFeedback
penalize_extra_steps: false
dataset:
_target_: envs.QADatasetAdapter
dataset:
_target_: envs.RetrievalHotPotQA
path: ${envs.data_path}
split: eval
seed: ${seed}
algo:
model:
model_name: intfloat/multilingual-e5-large
revision: main
use_fast_tokenizer: true
predictor:
num_layers: 24
input_dim: 1024
hidden_dim: 512
output_dim: 1
max_seq_len: 5000
interpolate_factor: 1
action_model:
_target_: rl.bert_predictor.BertPredictor
bert:
_target_: transformers.AutoModel.from_pretrained
pretrained_model_name_or_path: ${algo.model.model_name}
revision: ${algo.model.revision}
num_hidden_layers: ${algo.model.predictor.num_layers}
tokenizer:
_target_: transformers.AutoTokenizer.from_pretrained
pretrained_model_name_or_path: ${algo.model.model_name}
revision: ${algo.model.revision}
use_fast: ${algo.model.use_fast_tokenizer}
model_dim: ${algo.model.predictor.input_dim}
output_size: ${algo.model.predictor.hidden_dim}
n_output: ${algo.model.predictor.output_dim}
action_embed_dict:
absolute:
_target_: rl.bert_predictor.EmbedderWithAbsoluteEncoding
model: ${algo.action_model}
max_seq_len: ${algo.model.predictor.max_seq_len}
random:
_target_: rl.bert_predictor.EmbedderWithAbsoluteEncoding
model: ${algo.action_model}
max_seq_len: ${algo.model.predictor.max_seq_len}
interpolate_factor: ${algo.model.predictor.interpolate_factor}
relative:
_target_: rl.bert_predictor.EmbedderWithRelativeEncoding
model: ${algo.action_model}
max_seq_len: 1000
none:
_target_: rl.bert_predictor.EmbedderNone
model: ${algo.action_model}
pqn:
_target_: rl.pqn.PQN
state_embed:
_target_: rl.bert_predictor.BertPredictor
bert:
_target_: transformers.AutoModel.from_pretrained
pretrained_model_name_or_path: ${algo.model.model_name}
revision: ${algo.model.revision}
num_hidden_layers: ${algo.model.predictor.num_layers}
tokenizer:
_target_: transformers.AutoTokenizer.from_pretrained
pretrained_model_name_or_path: ${algo.model.model_name}
revision: ${algo.model.revision}
use_fast: ${algo.model.use_fast_tokenizer}
model_dim: ${algo.model.predictor.input_dim}
output_size: ${algo.model.predictor.hidden_dim}
n_output: ${algo.model.predictor.output_dim}
action_embed: ${algo.action_embed_dict.${envs.positions_processor}}
state_embed_target:
_target_: rl.bert_predictor.BertPredictor
bert:
_target_: transformers.AutoModel.from_pretrained
pretrained_model_name_or_path: ${algo.model.model_name}
revision: ${algo.model.revision}
num_hidden_layers: ${algo.model.predictor.num_layers}
tokenizer:
_target_: transformers.AutoTokenizer.from_pretrained
pretrained_model_name_or_path: ${algo.model.model_name}
revision: ${algo.model.revision}
use_fast: ${algo.model.use_fast_tokenizer}
model_dim: ${algo.model.predictor.input_dim}
output_size: ${algo.model.predictor.hidden_dim}
n_output: ${algo.model.predictor.output_dim}
action_embed_target: ${algo.action_embed_dict.${envs.positions_processor}}
hyperparams:
gamma: 0.99
alpha: 0.05
Lambda: 0.5
tau: 0.02
max_grad_norm: 2.0
accumulate_grads: ${accumulate_grads}
action_embed_length: ${max_action_length}
max_action_length_in_memory: 256
optimizer:
_target_: torch.optim.AdamW
lr: 1.5e-05
betas:
- 0.9
- 0.98
eps: 1.0e-06
weight_decay: 0.0005
scheduler:
_target_: rl.optim.WarmupLinearScheduler
total: ${steps_count}
ratio: 0.1
warmup: 1000
logger:
log_dir: runs/Aug24_01-52-22_PQN_hotpotqa
tensorboard:
_target_: torch.utils.tensorboard.SummaryWriter
comment: _PQN_${envs.task}
log_dir: runs/Aug24_01-52-22_PQN_hotpotqa/tb_logs/
seed: 42
device: cuda:0
learning_start: 200
steps_count: 10000
batch_size: 12
accumulate_grads: 8
eval_interval: 100
eval_episodes: 300
envs_parallel: 1
max_action_length: 220