| envs: |
| max_steps: 2 |
| task: hotpotqa |
| data_path: ./datasets/data_sources/hotpotqa |
| sort_by_index: false |
| positions_processor: none |
| positions_processor_dict: |
| none: |
| _target_: envs.text_env.AbsolutePositionProcessor |
| absolute: |
| _target_: envs.text_env.AbsolutePositionProcessor |
| random: |
| _target_: envs.text_env.RandomPositionProcessor |
| max_chunks_count: 2000 |
| relative: |
| _target_: envs.text_env.RelativePositionProcessor |
| step_size: 20 |
| train_dataset: |
| _target_: envs.RetrievalHotPotQA |
| path: ${envs.data_path} |
| split: train |
| seed: ${seed} |
| test_dataset: |
| _target_: envs.RetrievalHotPotQA |
| path: ${envs.data_path} |
| split: eval |
| seed: ${seed} |
| env: |
| _target_: envs.qa_env.QAEnv |
| max_steps: ${envs.max_steps} |
| action_embed_length: ${max_action_length} |
| separator: ' [SEP] ' |
| sort_by_index: ${envs.sort_by_index} |
| positions_processor: ${envs.positions_processor_dict.${envs.positions_processor}} |
| feedback_model: |
| _target_: rl.feedback.GroundTruthFeedback |
| penalize_extra_steps: false |
| dataset: |
| _target_: envs.QADatasetAdapter |
| dataset: |
| _target_: envs.RetrievalHotPotQA |
| path: ${envs.data_path} |
| split: train |
| seed: ${seed} |
| test_env: |
| _target_: envs.qa_env.QAEnv |
| max_steps: ${envs.env.max_steps} |
| max_action_length_in_memory: 256 |
| action_embed_length: ${max_action_length} |
| separator: ${envs.env.separator} |
| sort_by_index: ${envs.sort_by_index} |
| positions_processor: ${envs.env.positions_processor} |
| feedback_model: |
| _target_: rl.feedback.GroundTruthFeedback |
| penalize_extra_steps: false |
| dataset: |
| _target_: envs.QADatasetAdapter |
| dataset: |
| _target_: envs.RetrievalHotPotQA |
| path: ${envs.data_path} |
| split: eval |
| seed: ${seed} |
| algo: |
| model: |
| model_name: intfloat/multilingual-e5-large |
| revision: main |
| use_fast_tokenizer: true |
| predictor: |
| num_layers: 24 |
| input_dim: 1024 |
| hidden_dim: 512 |
| output_dim: 1 |
| max_seq_len: 5000 |
| interpolate_factor: 1 |
| action_model: |
| _target_: rl.bert_predictor.BertPredictor |
| bert: |
| _target_: transformers.AutoModel.from_pretrained |
| pretrained_model_name_or_path: ${algo.model.model_name} |
| revision: ${algo.model.revision} |
| num_hidden_layers: ${algo.model.predictor.num_layers} |
| tokenizer: |
| _target_: transformers.AutoTokenizer.from_pretrained |
| pretrained_model_name_or_path: ${algo.model.model_name} |
| revision: ${algo.model.revision} |
| use_fast: ${algo.model.use_fast_tokenizer} |
| model_dim: ${algo.model.predictor.input_dim} |
| output_size: ${algo.model.predictor.hidden_dim} |
| n_output: ${algo.model.predictor.output_dim} |
| action_embed_dict: |
| absolute: |
| _target_: rl.bert_predictor.EmbedderWithAbsoluteEncoding |
| model: ${algo.action_model} |
| max_seq_len: ${algo.model.predictor.max_seq_len} |
| random: |
| _target_: rl.bert_predictor.EmbedderWithAbsoluteEncoding |
| model: ${algo.action_model} |
| max_seq_len: ${algo.model.predictor.max_seq_len} |
| interpolate_factor: ${algo.model.predictor.interpolate_factor} |
| relative: |
| _target_: rl.bert_predictor.EmbedderWithRelativeEncoding |
| model: ${algo.action_model} |
| max_seq_len: 1000 |
| none: |
| _target_: rl.bert_predictor.EmbedderNone |
| model: ${algo.action_model} |
| pqn: |
| _target_: rl.pqn.PQN |
| state_embed: |
| _target_: rl.bert_predictor.BertPredictor |
| bert: |
| _target_: transformers.AutoModel.from_pretrained |
| pretrained_model_name_or_path: ${algo.model.model_name} |
| revision: ${algo.model.revision} |
| num_hidden_layers: ${algo.model.predictor.num_layers} |
| tokenizer: |
| _target_: transformers.AutoTokenizer.from_pretrained |
| pretrained_model_name_or_path: ${algo.model.model_name} |
| revision: ${algo.model.revision} |
| use_fast: ${algo.model.use_fast_tokenizer} |
| model_dim: ${algo.model.predictor.input_dim} |
| output_size: ${algo.model.predictor.hidden_dim} |
| n_output: ${algo.model.predictor.output_dim} |
| action_embed: ${algo.action_embed_dict.${envs.positions_processor}} |
| state_embed_target: |
| _target_: rl.bert_predictor.BertPredictor |
| bert: |
| _target_: transformers.AutoModel.from_pretrained |
| pretrained_model_name_or_path: ${algo.model.model_name} |
| revision: ${algo.model.revision} |
| num_hidden_layers: ${algo.model.predictor.num_layers} |
| tokenizer: |
| _target_: transformers.AutoTokenizer.from_pretrained |
| pretrained_model_name_or_path: ${algo.model.model_name} |
| revision: ${algo.model.revision} |
| use_fast: ${algo.model.use_fast_tokenizer} |
| model_dim: ${algo.model.predictor.input_dim} |
| output_size: ${algo.model.predictor.hidden_dim} |
| n_output: ${algo.model.predictor.output_dim} |
| action_embed_target: ${algo.action_embed_dict.${envs.positions_processor}} |
| hyperparams: |
| gamma: 0.99 |
| alpha: 0.05 |
| Lambda: 0.5 |
| tau: 0.02 |
| max_grad_norm: 2.0 |
| accumulate_grads: ${accumulate_grads} |
| action_embed_length: ${max_action_length} |
| max_action_length_in_memory: 256 |
| optimizer: |
| _target_: torch.optim.AdamW |
| lr: 1.5e-05 |
| betas: |
| - 0.9 |
| - 0.98 |
| eps: 1.0e-06 |
| weight_decay: 0.0005 |
| scheduler: |
| _target_: rl.optim.WarmupLinearScheduler |
| total: ${steps_count} |
| ratio: 0.1 |
| warmup: 1000 |
| logger: |
| log_dir: runs/Aug24_01-52-22_PQN_hotpotqa |
| tensorboard: |
| _target_: torch.utils.tensorboard.SummaryWriter |
| comment: _PQN_${envs.task} |
| log_dir: runs/Aug24_01-52-22_PQN_hotpotqa/tb_logs/ |
| seed: 42 |
| device: cuda:0 |
| learning_start: 200 |
| steps_count: 10000 |
| batch_size: 12 |
| accumulate_grads: 8 |
| eval_interval: 100 |
| eval_episodes: 300 |
| envs_parallel: 1 |
| max_action_length: 220 |
|
|