Upload folder using huggingface_hub
Browse files- checkpoints/ep5-ba660-rank0.pt.tar +3 -0
- checkpoints/latest-rank0.pt.tar +3 -0
- checkpoints/train_config.yaml +150 -0
- data/streaming/qa/qa_attribution_train/shard.npy +3 -0
- data/streaming/tokenizer/added_tokens.json +4 -0
- data/streaming/tokenizer/special_tokens_map.json +40 -0
- data/streaming/tokenizer/tokenizer.json +0 -0
- data/streaming/tokenizer/tokenizer.model +3 -0
- data/streaming/tokenizer/tokenizer_config.json +59 -0
- data/streaming/train/shard.npy +3 -0
- data/streaming/unseen_url_trie.pkl +3 -0
- data/streaming/url_trie.pkl +3 -0
- experiment_config.yaml +48 -0
- train_config.yaml +147 -0
checkpoints/ep5-ba660-rank0.pt.tar
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4fd53119b6054e52c071ff7fbb88f7e96a0e0452090629628ed6f6acebf9295
|
| 3 |
+
size 13200865280
|
checkpoints/latest-rank0.pt.tar
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4fd53119b6054e52c071ff7fbb88f7e96a0e0452090629628ed6f6acebf9295
|
| 3 |
+
size 13200865280
|
checkpoints/train_config.yaml
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
algorithms:
|
| 2 |
+
gradient_clipping:
|
| 3 |
+
clipping_threshold: 1.0
|
| 4 |
+
clipping_type: norm
|
| 5 |
+
callbacks:
|
| 6 |
+
lr_monitor: {}
|
| 7 |
+
memory_monitor: {}
|
| 8 |
+
runtime_estimator: {}
|
| 9 |
+
speed_monitor:
|
| 10 |
+
window_size: 10
|
| 11 |
+
console_log_interval: 50ba
|
| 12 |
+
cross_doc_attention: false
|
| 13 |
+
dataloaders:
|
| 14 |
+
- dataset:
|
| 15 |
+
batch_type: lm
|
| 16 |
+
local: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming/
|
| 17 |
+
max_seq_len: 2048
|
| 18 |
+
shuffle: true
|
| 19 |
+
split: train
|
| 20 |
+
drop_last: false
|
| 21 |
+
name: train_loader_docs
|
| 22 |
+
num_workers: 0
|
| 23 |
+
- dataset:
|
| 24 |
+
batch_type: fact
|
| 25 |
+
local: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming/qa
|
| 26 |
+
masking:
|
| 27 |
+
cross_doc_attention: false
|
| 28 |
+
max_seq_len: 2048
|
| 29 |
+
shuffle: true
|
| 30 |
+
split: qa_attribution_train
|
| 31 |
+
drop_last: false
|
| 32 |
+
name: train_q_a_url
|
| 33 |
+
num_workers: 0
|
| 34 |
+
deepspeed_config:
|
| 35 |
+
bf16:
|
| 36 |
+
enabled: true
|
| 37 |
+
train_batch_size: 80
|
| 38 |
+
zero_optimization:
|
| 39 |
+
allgather_bucket_size: 200000000.0
|
| 40 |
+
contiguous_gradients: true
|
| 41 |
+
offload_optimizer:
|
| 42 |
+
device: cpu
|
| 43 |
+
pin_memory: true
|
| 44 |
+
overlap_comm: true
|
| 45 |
+
reduce_bucket_size: true
|
| 46 |
+
reduce_scatter: true
|
| 47 |
+
stage: 3
|
| 48 |
+
device_eval_batch_size: 40
|
| 49 |
+
device_train_microbatch_size: 2
|
| 50 |
+
eval_first: false
|
| 51 |
+
eval_interval: 1
|
| 52 |
+
eval_subset_num_batches: -1
|
| 53 |
+
experiment:
|
| 54 |
+
data:
|
| 55 |
+
augment:
|
| 56 |
+
doc:
|
| 57 |
+
do: false
|
| 58 |
+
method: permute
|
| 59 |
+
n_sample_per_doc: 2
|
| 60 |
+
finetune:
|
| 61 |
+
neg_create_probability: 0.0
|
| 62 |
+
number_non_attributable_negatives: 0
|
| 63 |
+
qa_data_path: /root/autodl-tmp/intrinsic-source-citation/dataset/ours
|
| 64 |
+
text_data_path: dataset/ours/pretrain
|
| 65 |
+
train_data_path: /root/autodl-tmp/intrinsic-source-citation/dataset/ours/pretrain/train
|
| 66 |
+
eval:
|
| 67 |
+
disable_all_eval: true
|
| 68 |
+
disable_attribution_eval: true
|
| 69 |
+
disable_non_attrib_eval: true
|
| 70 |
+
disable_qa_eval: true
|
| 71 |
+
icl_eval: false
|
| 72 |
+
ppl_eval: false
|
| 73 |
+
use_ais: false
|
| 74 |
+
experiment:
|
| 75 |
+
name: arxiv-citation-doc-id-begin
|
| 76 |
+
output_dir: outputs/experiments/
|
| 77 |
+
model:
|
| 78 |
+
checkpoint: /root/autodl-tmp/intrinsic-source-citation/outputs/experiments/arxiv-citation-doc-id-begin/checkpoints/latest-rank0.pt.tar
|
| 79 |
+
ckpt_dir: /root/autodl-tmp/intrinsic-source-citation/outputs/experiments/arxiv-citation-doc-id-begin/checkpoints
|
| 80 |
+
name: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
|
| 81 |
+
train:
|
| 82 |
+
config_template_path: conf/templates/train_config.yaml
|
| 83 |
+
cross_doc_attention: false
|
| 84 |
+
device_eval_batch_size: 40
|
| 85 |
+
device_train_microbatch_size: 2
|
| 86 |
+
eval_first: false
|
| 87 |
+
finetune_q_a: false
|
| 88 |
+
finetune_q_a_doc_url: false
|
| 89 |
+
finetune_q_a_url: true
|
| 90 |
+
finetune_q_url_a: false
|
| 91 |
+
loss_type: mask
|
| 92 |
+
lr: 8.0e-05
|
| 93 |
+
max_duration: 5ep
|
| 94 |
+
pretrain: true
|
| 95 |
+
q_a_url_predict_url_only: false
|
| 96 |
+
repeat_url_across_doc: false
|
| 97 |
+
save_folder: outputs/experiments/arxiv-citation-doc-id-begin/checkpoints
|
| 98 |
+
sequential: false
|
| 99 |
+
url_location: no_url
|
| 100 |
+
url_loss_factor: 1.0
|
| 101 |
+
weight_decay: 0.02
|
| 102 |
+
global_seed: 17
|
| 103 |
+
global_train_batch_size: 80
|
| 104 |
+
log_to_console: true
|
| 105 |
+
loggers:
|
| 106 |
+
wandb:
|
| 107 |
+
project: intrinsic-source-citation
|
| 108 |
+
max_duration: 5ep
|
| 109 |
+
max_seq_len: 2048
|
| 110 |
+
model:
|
| 111 |
+
ckpt_dir: outputs/experiments/arxiv-citation-doc-id-begin/checkpoints
|
| 112 |
+
loss:
|
| 113 |
+
type: mask
|
| 114 |
+
url_loss_factor: 1.0
|
| 115 |
+
name: hf_causal_lm
|
| 116 |
+
pretrained: true
|
| 117 |
+
pretrained_model_name_or_path: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
|
| 118 |
+
ood_url_trie: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming/unseen_url_trie.pkl
|
| 119 |
+
optimizer:
|
| 120 |
+
betas:
|
| 121 |
+
- 0.9
|
| 122 |
+
- 0.98
|
| 123 |
+
eps: 1.0e-06
|
| 124 |
+
lr: 8.0e-05
|
| 125 |
+
name: deepspeed_adam
|
| 126 |
+
weight_decay: 0.02
|
| 127 |
+
precision: amp_bf16
|
| 128 |
+
progress_bar: false
|
| 129 |
+
run_name: arxiv-citation-doc-id-begin
|
| 130 |
+
save_folder: outputs/experiments/arxiv-citation-doc-id-begin/checkpoints
|
| 131 |
+
save_interval: 1ep
|
| 132 |
+
save_num_checkpoints_to_keep: 1
|
| 133 |
+
scheduler:
|
| 134 |
+
alpha_f: 0.1
|
| 135 |
+
name: linear_decay_with_warmup
|
| 136 |
+
t_warmup: 1ep
|
| 137 |
+
seed: 17
|
| 138 |
+
streaming: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming/
|
| 139 |
+
text_data_path: dataset/ours/pretrain
|
| 140 |
+
tokenizer:
|
| 141 |
+
kwargs:
|
| 142 |
+
model_max_length: 2048
|
| 143 |
+
name: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming//tokenizer
|
| 144 |
+
tokenizer_name: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming//tokenizer
|
| 145 |
+
url_trie: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming/url_trie.pkl
|
| 146 |
+
dist_timeout: 600.0
|
| 147 |
+
n_gpus: 1
|
| 148 |
+
device_train_batch_size: 80
|
| 149 |
+
device_train_grad_accum: 40
|
| 150 |
+
n_params: 1100056576
|
data/streaming/qa/qa_attribution_train/shard.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb386042ae42c9046ed625fb9325cf6e7257670bbd774c86c396a1451944e0ea
|
| 3 |
+
size 84003105
|
data/streaming/tokenizer/added_tokens.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</url>": 32001,
|
| 3 |
+
"<url>": 32000
|
| 4 |
+
}
|
data/streaming/tokenizer/special_tokens_map.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
{
|
| 4 |
+
"content": "<url>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"content": "</url>",
|
| 12 |
+
"lstrip": false,
|
| 13 |
+
"normalized": false,
|
| 14 |
+
"rstrip": false,
|
| 15 |
+
"single_word": false
|
| 16 |
+
}
|
| 17 |
+
],
|
| 18 |
+
"bos_token": {
|
| 19 |
+
"content": "<s>",
|
| 20 |
+
"lstrip": false,
|
| 21 |
+
"normalized": false,
|
| 22 |
+
"rstrip": false,
|
| 23 |
+
"single_word": false
|
| 24 |
+
},
|
| 25 |
+
"eos_token": {
|
| 26 |
+
"content": "</s>",
|
| 27 |
+
"lstrip": false,
|
| 28 |
+
"normalized": false,
|
| 29 |
+
"rstrip": false,
|
| 30 |
+
"single_word": false
|
| 31 |
+
},
|
| 32 |
+
"pad_token": "</s>",
|
| 33 |
+
"unk_token": {
|
| 34 |
+
"content": "<unk>",
|
| 35 |
+
"lstrip": false,
|
| 36 |
+
"normalized": false,
|
| 37 |
+
"rstrip": false,
|
| 38 |
+
"single_word": false
|
| 39 |
+
}
|
| 40 |
+
}
|
data/streaming/tokenizer/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
data/streaming/tokenizer/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
| 3 |
+
size 499723
|
data/streaming/tokenizer/tokenizer_config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<unk>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<s>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "</s>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"32000": {
|
| 28 |
+
"content": "<url>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"32001": {
|
| 36 |
+
"content": "</url>",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"additional_special_tokens": [
|
| 45 |
+
"<url>",
|
| 46 |
+
"</url>"
|
| 47 |
+
],
|
| 48 |
+
"bos_token": "<s>",
|
| 49 |
+
"clean_up_tokenization_spaces": false,
|
| 50 |
+
"eos_token": "</s>",
|
| 51 |
+
"legacy": false,
|
| 52 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 53 |
+
"pad_token": "</s>",
|
| 54 |
+
"padding_side": "right",
|
| 55 |
+
"sp_model_kwargs": {},
|
| 56 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 57 |
+
"unk_token": "<unk>",
|
| 58 |
+
"use_default_system_prompt": true
|
| 59 |
+
}
|
data/streaming/train/shard.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:611d90f3ee5a2a6f7143c3382831f11b5232feb8da989d4667d3a10f7425c4d2
|
| 3 |
+
size 75663055
|
data/streaming/unseen_url_trie.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e397f686e6d6595bfdc7f101f4e0e1e0402b78331e058ec31332dcdefff82ae
|
| 3 |
+
size 5018418
|
data/streaming/url_trie.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48ef08365be9d614aaae9c7fb1e5db648a8b3dc33060e9129a0d3738bc3dc61a
|
| 3 |
+
size 5021235
|
experiment_config.yaml
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
data:
|
| 2 |
+
augment:
|
| 3 |
+
doc:
|
| 4 |
+
do: false
|
| 5 |
+
method: permute
|
| 6 |
+
n_sample_per_doc: 2
|
| 7 |
+
finetune:
|
| 8 |
+
neg_create_probability: 0.0
|
| 9 |
+
number_non_attributable_negatives: 0
|
| 10 |
+
qa_data_path: /root/autodl-tmp/intrinsic-source-citation/dataset/ours
|
| 11 |
+
text_data_path: dataset/ours/pretrain
|
| 12 |
+
train_data_path: /root/autodl-tmp/intrinsic-source-citation/dataset/ours/pretrain/train
|
| 13 |
+
eval:
|
| 14 |
+
disable_all_eval: true
|
| 15 |
+
disable_attribution_eval: true
|
| 16 |
+
disable_non_attrib_eval: true
|
| 17 |
+
disable_qa_eval: true
|
| 18 |
+
icl_eval: false
|
| 19 |
+
ppl_eval: false
|
| 20 |
+
use_ais: false
|
| 21 |
+
experiment:
|
| 22 |
+
name: arxiv-citation-doc-id-begin
|
| 23 |
+
output_dir: outputs/experiments/
|
| 24 |
+
model:
|
| 25 |
+
checkpoint: /root/autodl-tmp/intrinsic-source-citation/outputs/experiments/arxiv-citation-doc-id-begin/checkpoints/latest-rank0.pt.tar
|
| 26 |
+
ckpt_dir: /root/autodl-tmp/intrinsic-source-citation/outputs/experiments/arxiv-citation-doc-id-begin/checkpoints
|
| 27 |
+
name: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
|
| 28 |
+
train:
|
| 29 |
+
config_template_path: conf/templates/train_config.yaml
|
| 30 |
+
cross_doc_attention: false
|
| 31 |
+
device_eval_batch_size: 40
|
| 32 |
+
device_train_microbatch_size: 2
|
| 33 |
+
eval_first: false
|
| 34 |
+
finetune_q_a: false
|
| 35 |
+
finetune_q_a_doc_url: false
|
| 36 |
+
finetune_q_a_url: true
|
| 37 |
+
finetune_q_url_a: false
|
| 38 |
+
loss_type: mask
|
| 39 |
+
lr: 8.0e-05
|
| 40 |
+
max_duration: 5ep
|
| 41 |
+
pretrain: true
|
| 42 |
+
q_a_url_predict_url_only: false
|
| 43 |
+
repeat_url_across_doc: false
|
| 44 |
+
save_folder: outputs/experiments/arxiv-citation-doc-id-begin/checkpoints
|
| 45 |
+
sequential: false
|
| 46 |
+
url_location: no_url
|
| 47 |
+
url_loss_factor: 1.0
|
| 48 |
+
weight_decay: 0.02
|
train_config.yaml
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
algorithms:
|
| 2 |
+
gradient_clipping:
|
| 3 |
+
clipping_threshold: 1.0
|
| 4 |
+
clipping_type: norm
|
| 5 |
+
callbacks:
|
| 6 |
+
lr_monitor: {}
|
| 7 |
+
memory_monitor: {}
|
| 8 |
+
runtime_estimator: {}
|
| 9 |
+
speed_monitor:
|
| 10 |
+
window_size: 10
|
| 11 |
+
console_log_interval: 50ba
|
| 12 |
+
cross_doc_attention: false
|
| 13 |
+
dataloaders:
|
| 14 |
+
- dataset:
|
| 15 |
+
batch_type: lm
|
| 16 |
+
local: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming/
|
| 17 |
+
masking:
|
| 18 |
+
cross_doc_attention: false
|
| 19 |
+
max_seq_len: 2048
|
| 20 |
+
shuffle: true
|
| 21 |
+
split: train
|
| 22 |
+
drop_last: false
|
| 23 |
+
name: train_loader_docs
|
| 24 |
+
num_workers: 0
|
| 25 |
+
- dataset:
|
| 26 |
+
batch_type: fact
|
| 27 |
+
local: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming/qa
|
| 28 |
+
masking:
|
| 29 |
+
cross_doc_attention: false
|
| 30 |
+
max_seq_len: 2048
|
| 31 |
+
shuffle: true
|
| 32 |
+
split: qa_attribution_train
|
| 33 |
+
drop_last: false
|
| 34 |
+
name: train_q_a_url
|
| 35 |
+
num_workers: 0
|
| 36 |
+
deepspeed_config:
|
| 37 |
+
bf16:
|
| 38 |
+
enabled: true
|
| 39 |
+
train_batch_size: 80
|
| 40 |
+
zero_optimization:
|
| 41 |
+
allgather_bucket_size: 200000000.0
|
| 42 |
+
contiguous_gradients: true
|
| 43 |
+
offload_optimizer:
|
| 44 |
+
device: cpu
|
| 45 |
+
pin_memory: true
|
| 46 |
+
overlap_comm: true
|
| 47 |
+
reduce_bucket_size: true
|
| 48 |
+
reduce_scatter: true
|
| 49 |
+
stage: 3
|
| 50 |
+
device_eval_batch_size: 40
|
| 51 |
+
device_train_microbatch_size: 2
|
| 52 |
+
eval_first: false
|
| 53 |
+
eval_interval: 1
|
| 54 |
+
eval_subset_num_batches: -1
|
| 55 |
+
experiment:
|
| 56 |
+
data:
|
| 57 |
+
augment:
|
| 58 |
+
doc:
|
| 59 |
+
do: false
|
| 60 |
+
method: permute
|
| 61 |
+
n_sample_per_doc: 2
|
| 62 |
+
finetune:
|
| 63 |
+
neg_create_probability: 0.0
|
| 64 |
+
number_non_attributable_negatives: 0
|
| 65 |
+
qa_data_path: /root/autodl-tmp/intrinsic-source-citation/dataset/ours
|
| 66 |
+
text_data_path: dataset/ours/pretrain
|
| 67 |
+
train_data_path: /root/autodl-tmp/intrinsic-source-citation/dataset/ours/pretrain/train
|
| 68 |
+
eval:
|
| 69 |
+
disable_all_eval: true
|
| 70 |
+
disable_attribution_eval: true
|
| 71 |
+
disable_non_attrib_eval: true
|
| 72 |
+
disable_qa_eval: true
|
| 73 |
+
icl_eval: false
|
| 74 |
+
ppl_eval: false
|
| 75 |
+
use_ais: false
|
| 76 |
+
experiment:
|
| 77 |
+
name: arxiv-citation-doc-id-begin
|
| 78 |
+
output_dir: outputs/experiments/
|
| 79 |
+
model:
|
| 80 |
+
checkpoint: /root/autodl-tmp/intrinsic-source-citation/outputs/experiments/arxiv-citation-doc-id-begin/checkpoints/latest-rank0.pt.tar
|
| 81 |
+
ckpt_dir: /root/autodl-tmp/intrinsic-source-citation/outputs/experiments/arxiv-citation-doc-id-begin/checkpoints
|
| 82 |
+
name: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
|
| 83 |
+
train:
|
| 84 |
+
config_template_path: conf/templates/train_config.yaml
|
| 85 |
+
cross_doc_attention: false
|
| 86 |
+
device_eval_batch_size: 40
|
| 87 |
+
device_train_microbatch_size: 2
|
| 88 |
+
eval_first: false
|
| 89 |
+
finetune_q_a: false
|
| 90 |
+
finetune_q_a_doc_url: false
|
| 91 |
+
finetune_q_a_url: true
|
| 92 |
+
finetune_q_url_a: false
|
| 93 |
+
loss_type: mask
|
| 94 |
+
lr: 8.0e-05
|
| 95 |
+
max_duration: 5ep
|
| 96 |
+
pretrain: true
|
| 97 |
+
q_a_url_predict_url_only: false
|
| 98 |
+
repeat_url_across_doc: false
|
| 99 |
+
save_folder: outputs/experiments/arxiv-citation-doc-id-begin/checkpoints
|
| 100 |
+
sequential: false
|
| 101 |
+
url_location: no_url
|
| 102 |
+
url_loss_factor: 1.0
|
| 103 |
+
weight_decay: 0.02
|
| 104 |
+
global_seed: 17
|
| 105 |
+
global_train_batch_size: 80
|
| 106 |
+
log_to_console: true
|
| 107 |
+
loggers:
|
| 108 |
+
wandb:
|
| 109 |
+
project: intrinsic-source-citation
|
| 110 |
+
max_duration: 5ep
|
| 111 |
+
max_seq_len: 2048
|
| 112 |
+
model:
|
| 113 |
+
ckpt_dir: outputs/experiments/arxiv-citation-doc-id-begin/checkpoints
|
| 114 |
+
loss:
|
| 115 |
+
type: mask
|
| 116 |
+
url_loss_factor: 1.0
|
| 117 |
+
name: hf_causal_lm
|
| 118 |
+
pretrained: true
|
| 119 |
+
pretrained_model_name_or_path: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
|
| 120 |
+
ood_url_trie: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming/unseen_url_trie.pkl
|
| 121 |
+
optimizer:
|
| 122 |
+
betas:
|
| 123 |
+
- 0.9
|
| 124 |
+
- 0.98
|
| 125 |
+
eps: 1.0e-06
|
| 126 |
+
lr: 8.0e-05
|
| 127 |
+
name: deepspeed_adam
|
| 128 |
+
weight_decay: 0.02
|
| 129 |
+
precision: amp_bf16
|
| 130 |
+
progress_bar: false
|
| 131 |
+
run_name: arxiv-citation-doc-id-begin
|
| 132 |
+
save_folder: outputs/experiments/arxiv-citation-doc-id-begin/checkpoints
|
| 133 |
+
save_interval: 1ep
|
| 134 |
+
save_num_checkpoints_to_keep: 1
|
| 135 |
+
scheduler:
|
| 136 |
+
alpha_f: 0.1
|
| 137 |
+
name: linear_decay_with_warmup
|
| 138 |
+
t_warmup: 1ep
|
| 139 |
+
seed: 17
|
| 140 |
+
streaming: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming/
|
| 141 |
+
text_data_path: dataset/ours/pretrain
|
| 142 |
+
tokenizer:
|
| 143 |
+
kwargs:
|
| 144 |
+
model_max_length: 2048
|
| 145 |
+
name: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming//tokenizer
|
| 146 |
+
tokenizer_name: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming//tokenizer
|
| 147 |
+
url_trie: outputs/experiments/arxiv-citation-doc-id-begin/data/streaming/url_trie.pkl
|