arxiv-end / checkpoints /train_config.yaml
Kevin3777's picture
Upload folder using huggingface_hub
376db5c verified
algorithms:
gradient_clipping:
clipping_threshold: 1.0
clipping_type: norm
callbacks:
lr_monitor: {}
memory_monitor: {}
runtime_estimator: {}
speed_monitor:
window_size: 10
console_log_interval: 50ba
cross_doc_attention: false
dataloaders:
- dataset:
batch_type: lm
local: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming/
max_seq_len: 2048
shuffle: true
split: train
drop_last: false
name: train_loader_docs
num_workers: 0
- dataset:
batch_type: fact
local: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming/qa
masking:
cross_doc_attention: false
max_seq_len: 2048
shuffle: true
split: qa_attribution_train
drop_last: false
name: train_q_a_url
num_workers: 0
deepspeed_config:
bf16:
enabled: true
train_batch_size: 80
zero_optimization:
allgather_bucket_size: 200000000.0
contiguous_gradients: true
offload_optimizer:
device: cpu
pin_memory: true
overlap_comm: true
reduce_bucket_size: true
reduce_scatter: true
stage: 3
device_eval_batch_size: 40
device_train_microbatch_size: 2
eval_first: false
eval_interval: 1
eval_subset_num_batches: -1
experiment:
data:
augment:
doc:
do: false
method: permute
n_sample_per_doc: 2
finetune:
neg_create_probability: 0.0
number_non_attributable_negatives: 0
qa_data_path: /root/autodl-tmp/intrinsic-source-citation/dataset/ours
text_data_path: dataset/ours/pretrain
train_data_path: /root/autodl-tmp/intrinsic-source-citation/dataset/ours/pretrain/train
eval:
disable_all_eval: true
disable_attribution_eval: true
disable_non_attrib_eval: true
disable_qa_eval: true
icl_eval: false
ppl_eval: false
use_ais: false
experiment:
name: arxiv-citation-doc-id-begin
output_dir: outputs/experiments/
model:
checkpoint: /root/autodl-tmp/intrinsic-source-citation/outputs/experiments/arxiv-citation-doc-id-begin/checkpoints/latest-rank0.pt.tar
ckpt_dir: /root/autodl-tmp/intrinsic-source-citation/outputs/experiments/arxiv-citation-doc-id-begin/checkpoints
name: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
train:
config_template_path: conf/templates/train_config.yaml
cross_doc_attention: false
device_eval_batch_size: 40
device_train_microbatch_size: 2
eval_first: false
finetune_q_a: false
finetune_q_a_doc_url: false
finetune_q_a_url: true
finetune_q_url_a: false
loss_type: mask
lr: 8.0e-05
max_duration: 5ep
pretrain: true
q_a_url_predict_url_only: false
repeat_url_across_doc: false
save_folder: outputs/experiments/arxiv-citation-doc-id-begin/checkpoints
sequential: false
url_location: no_url
url_loss_factor: 1.0
weight_decay: 0.02
global_seed: 17
global_train_batch_size: 80
log_to_console: true
loggers:
wandb:
project: intrinsic-source-citation
max_duration: 5ep
max_seq_len: 2048
model:
ckpt_dir: outputs/experiments/arxiv-citation-doc-id-begin/checkpoints
loss:
type: mask
url_loss_factor: 1.0
name: hf_causal_lm
pretrained: true
pretrained_model_name_or_path: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
ood_url_trie: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming/unseen_url_trie.pkl
optimizer:
betas:
- 0.9
- 0.98
eps: 1.0e-06
lr: 8.0e-05
name: deepspeed_adam
weight_decay: 0.02
precision: amp_bf16
progress_bar: false
run_name: arxiv-citation-doc-id-begin
save_folder: outputs/experiments/arxiv-citation-doc-id-begin/checkpoints
save_interval: 1ep
save_num_checkpoints_to_keep: 1
scheduler:
alpha_f: 0.1
name: linear_decay_with_warmup
t_warmup: 1ep
seed: 17
streaming: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming/
text_data_path: dataset/ours/pretrain
tokenizer:
kwargs:
model_max_length: 2048
name: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming//tokenizer
tokenizer_name: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming//tokenizer
url_trie: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming/url_trie.pkl
dist_timeout: 600.0
n_gpus: 1
device_train_batch_size: 80
device_train_grad_accum: 40
n_params: 1100056576