poccio commited on
Commit
67a2b13
·
0 Parent(s):

initial commit

Browse files
.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.tsv filter=lfs diff=lfs merge=lfs -text
2
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: ExtEnD
3
+ emoji: 🚀
4
+ colorFrom: green
5
+ colorTo: gray
6
+ sdk: streamlit
7
+ sdk_version: 1.5.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: cc-by-nc-sa-4.0
11
+ ---
12
+
13
+ <h1 align ="center"> ExtEnD: Extractive Entity Disambiguation </h1>
14
+
15
+ <p align="center">
16
+ <a href="https://sunglasses-ai.github.io/classy/">
17
+ <img alt="Python" src="https://img.shields.io/badge/-classy%200.2.1-black?style=for-the-badge&logoColor=white&logo=">
18
+ </a>
19
+ <a href="">
20
+ <img alt="Python" src="https://img.shields.io/badge/Python 3.8--3.9-blue?style=for-the-badge&logo=python&logoColor=white">
21
+ </a>
22
+ <a href="https://pytorch.org/get-started/locally/">
23
+ <img alt="PyTorch" src="https://img.shields.io/badge/PyTorch 1.9-ee4c2c?style=for-the-badge&logo=pytorch&logoColor=white">
24
+ </a>
25
+ <a href="https://spacy.io/">
26
+ <img alt="plugin: spacy" src="https://img.shields.io/badge/plugin%20for-spaCy%203.2-09A3D5.svg?style=for-the-badge&labelColor=gray">
27
+ </a>
28
+ <a href="https://black.readthedocs.io/en/stable/">
29
+ <img alt="Code style: black" src="https://img.shields.io/badge/code%20style-black-black.svg?style=for-the-badge&labelColor=gray">
30
+ </a>
31
+
32
+ </p>
33
+
34
+ This space contains the demo of [ExtEnD: Extractive Entity Disambiguation](https://www.researchgate.net/publication/359392427_ExtEnD_Extractive_Entity_Disambiguation),
35
+ a novel approach to Entity Disambiguation (i.e. the task of linking a mention in context with its most suitable entity in a reference knowledge base) where we reformulate
36
+ this task as a text extraction problem. This work was accepted at ACL 2022.
37
+
38
+ If you find this demo, our paper, code or framework useful, please reference this work in your paper:
39
+
40
+ ```
41
+ @inproceedings{barba-etal-2021-consec,
42
+ title = "{E}xt{E}n{D}: Extractive Entity Disambiguation",
43
+ author = "Barba, Edoardo and
44
+ Procopio, Luigi and
45
+ Navigli, Roberto",
46
+ booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics",
47
+ month = may,
48
+ year = "2022",
49
+ address = "Online and Dublin, Ireland",
50
+ publisher = "Association for Computational Linguistics",
51
+ }
52
+ ```
53
+
54
+ ![ExtEnD Image](data/repo-assets/extend_formulation.png)
55
+
56
+ ## Acknowledgments
57
+
58
+ The authors gratefully acknowledge the support of the ERC Consolidator Grant MOUSSE No. 726487 under the European Union’s Horizon 2020 research and innovation programme.
59
+
60
+ This work was supported in part by the MIUR under grant “Dipartimenti di eccellenza 2018-2022” of the Department of Computer Science of the Sapienza University of Rome.
61
+
62
+ ## License
63
+
64
+ This work is under the [Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0) license](https://creativecommons.org/licenses/by-nc-sa/4.0/).
app.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import html
3
+ import time
4
+
5
+ from extend import spacy_component # this is needed to register the spacy component
6
+
7
+ import spacy
8
+ import streamlit as st
9
+ from annotated_text import annotation
10
+ from classy.scripts.model.demo import tabbed_navigation
11
+ from classy.utils.streamlit import get_md_200_random_color_generator
12
+
13
+
14
+ def main(
15
+ model_checkpoint_path: str,
16
+ default_inventory_path: str,
17
+ cuda_device: int,
18
+ ):
19
+ # setup examples
20
+ examples = [
21
+ "Italy beat England and won Euro 2021.",
22
+ "Japan began the defence of their Asian Cup title with a lucky 2-1 win against Syria in a Group C championship match on Friday.",
23
+ "The project was coded in Java.",
24
+ ]
25
+
26
+ # css rules
27
+ st.write(
28
+ """
29
+ <style type="text/css">
30
+ a {
31
+ text-decoration: none !important;
32
+ }
33
+ </style>
34
+ """,
35
+ unsafe_allow_html=True,
36
+ )
37
+
38
+ # setup header
39
+ st.markdown("<h1 style='text-align: center;'>ExtEnD: Extractive Entity Disambiguation</h1>", unsafe_allow_html=True)
40
+ st.write(
41
+ """
42
+ <div align="center">
43
+ <a href="https://sunglasses-ai.github.io/classy/">
44
+ <img alt="Python" style="height: 3em; margin: 0 1em" src="">
45
+ </a>
46
+ <a href="https://spacy.io/" tyle="text-decoration: none">
47
+ <img alt="spaCy" style="height: 3em; margin: 0 1em;" src="">
48
+ </a>
49
+ </div>
50
+ """,
51
+ unsafe_allow_html=True,
52
+ )
53
+
54
+ def model_demo():
55
+
56
+ @st.cache(allow_output_mutation=True)
57
+ def load_resources(inventory_path):
58
+
59
+ # load nlp
60
+ nlp = spacy.load("en_core_web_sm")
61
+ extend_config = dict(
62
+ checkpoint_path=model_checkpoint_path,
63
+ mentions_inventory_path=inventory_path,
64
+ device=cuda_device,
65
+ tokens_per_batch=10_000,
66
+ )
67
+ nlp.add_pipe("extend", after="ner", config=extend_config)
68
+
69
+ # mock call to load resources
70
+ nlp(examples[0])
71
+
72
+ # return
73
+ return nlp
74
+
75
+ # read input
76
+ placeholder = st.selectbox(
77
+ "Examples",
78
+ options=examples,
79
+ index=0,
80
+ )
81
+ input_text = st.text_area("Input text to entity-disambiguate", placeholder)
82
+
83
+ # custom inventory
84
+ uploaded_inventory_path = st.file_uploader(
85
+ "[Optional] Upload custom inventory (tsv file, mention \\t desc1 \\t desc2 \\t)",
86
+ accept_multiple_files=False, type=["tsv"])
87
+ if uploaded_inventory_path is not None:
88
+ inventory_path = f"data/inventories/{uploaded_inventory_path.name}"
89
+ with open(inventory_path, "wb") as f:
90
+ f.write(uploaded_inventory_path.getbuffer())
91
+ else:
92
+ inventory_path = default_inventory_path
93
+
94
+ if st.button("Classify", key="classify"):
95
+
96
+ # load model
97
+ nlp = load_resources(inventory_path)
98
+ color_generator = get_md_200_random_color_generator()
99
+
100
+ # tag sentence
101
+ time_start = time.perf_counter()
102
+ doc = nlp(input_text)
103
+ time_end = time.perf_counter()
104
+
105
+ # extract entities
106
+ entities = {}
107
+ for ent in doc.ents:
108
+ if ent._.disambiguated_entity is not None:
109
+ entities[ent.start_char] = (
110
+ ent.start_char,
111
+ ent.end_char,
112
+ ent.text,
113
+ ent._.disambiguated_entity,
114
+ )
115
+
116
+ # create annotated html components
117
+
118
+ annotated_html_components = []
119
+
120
+ assert all(any(t.idx == _s for t in doc) for _s in entities)
121
+ it = iter(list(doc))
122
+ while True:
123
+ try:
124
+ t = next(it)
125
+ except StopIteration:
126
+ break
127
+ if t.idx in entities:
128
+ _start, _end, _text, _entity = entities[t.idx]
129
+ while t.idx + len(t) != _end:
130
+ t = next(it)
131
+ annotated_html_components.append(
132
+ str(annotation(*(_text, _entity, color_generator())))
133
+ )
134
+ else:
135
+ annotated_html_components.append(str(html.escape(t.text)))
136
+
137
+ st.markdown(
138
+ "\n".join(
139
+ [
140
+ "<div>",
141
+ *annotated_html_components,
142
+ "<p></p>"
143
+ f'<div style="text-align: right"><p style="color: gray">Time: {(time_end - time_start):.2f}s</p></div>'
144
+ "</div>",
145
+ ]
146
+ ),
147
+ unsafe_allow_html=True,
148
+ )
149
+
150
+ def hiw():
151
+ st.markdown("ExtEnD frames Entity Disambiguation as a text extraction problem:")
152
+ st.image("data/repo-assets/extend_formulation.png", caption="ExtEnD Formulation")
153
+ st.markdown("""
154
+ Given the sentence *After a long fight Superman saved Metropolis*, where *Superman* is the mention
155
+ to disambiguate, ExtEnD first concatenates the descriptions of all the possible candidates of *Superman* in the
156
+ inventory and then selects the span whose description best suits the mention in its context.
157
+
158
+ To convert this task to end2end entity linking, as we do in *Model demo*, we leverage spaCy
159
+ (more specifically, its NER) and run ExtEnD on each named entity spaCy identifies
160
+ (if the corresponding mention is contained in the inventory).
161
+ """)
162
+
163
+ def abstract():
164
+ st.write(
165
+ """
166
+ Word Sense Disambiguation (WSD) is a historical NLP task aimed at linking words in contexts to discrete sense inventories and it is usually cast as a multi-label classification task. Recently, several neural approaches have employed sense definitions to better represent word meanings. Yet, these approaches do not observe the input sentence and the sense definition candidates all at once, thus potentially reducing the model performance and generalization power. We cope with this issue by reframing WSD as a span extraction problem --- which we called Extractive Sense Comprehension (ESC) --- and propose ESCHER, a transformer-based neural architecture for this new formulation. By means of an extensive array of experiments, we show that ESC unleashes the full potential of our model, leading it to outdo all of its competitors and to set a new state of the art on the English WSD task. In the few-shot scenario, ESCHER proves to exploit training data efficiently, attaining the same performance as its closest competitor while relying on almost three times fewer annotations. Furthermore, ESCHER can nimbly combine data annotated with senses from different lexical resources, achieving performances that were previously out of everyone's reach. The model along with data is available at https://github.com/SapienzaNLP/esc.
167
+ """
168
+ )
169
+
170
+ tabs = dict(model=("Model demo", model_demo), hiw=("How it works", hiw), abstract=("Abstract", abstract))
171
+
172
+ tabbed_navigation(tabs, "model")
173
+
174
+
175
+ if __name__ == "__main__":
176
+ main("experiments/extend-longformer-large/2021-10-22/09-11-39/checkpoints/best.ckpt", "data/inventories/aida.tsv", cuda_device=-1)
data/.placeholder ADDED
File without changes
data/inventories/aida.tsv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79aee10d058068ddfd791c853658c406e8c81dff6a2ea8fabcc7bb468df81986
3
+ size 34209452
data/repo-assets/extend_formulation.png ADDED
experiments/.placeholder ADDED
File without changes
experiments/extend-longformer-large/2021-10-22/09-11-39/.hydra/config.bk.yaml ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ callbacks:
2
+ callbacks: []
3
+ data:
4
+ datamodule:
5
+ _target_: classy.data.data_modules.ClassyDataModule
6
+ task: ${task}
7
+ dataset_path: data/aida
8
+ train_dataset:
9
+ _target_: classy.data.dataset.hf.classification.HFQADataset.from_file
10
+ transformer_model: ${transformer_model}
11
+ additional_special_tokens: ${model.additional_special_tokens}
12
+ min_length: 5
13
+ max_length: 500
14
+ tokens_per_batch: 2000
15
+ max_batch_size: 10
16
+ section_size: 10000
17
+ prebatch: true
18
+ materialize: false
19
+ for_inference: false
20
+ validation_dataset:
21
+ _target_: classy.data.dataset.hf.classification.HFQADataset.from_file
22
+ transformer_model: ${transformer_model}
23
+ additional_special_tokens: ${model.additional_special_tokens}
24
+ min_length: 5
25
+ max_length: 500
26
+ tokens_per_batch: 2000
27
+ max_batch_size: 10
28
+ section_size: 10000
29
+ prebatch: true
30
+ materialize: true
31
+ for_inference: true
32
+ validation_split_size: 0.1
33
+ test_split_size: 0.1
34
+ max_nontrain_split_size: 10000
35
+ shuffle_dataset: true
36
+ device:
37
+ gpus:
38
+ - 0
39
+ precision: 32
40
+ amp_level: O0
41
+ model:
42
+ _target_: classy.pl_modules.hf.classification.HFQAPLModule
43
+ transformer_model: ${transformer_model}
44
+ additional_special_tokens: []
45
+ optim_conf:
46
+ _target_: classy.optim.factories.RAdamFactory
47
+ lr: 1.0e-05
48
+ weight_decay: 0.01
49
+ no_decay_params:
50
+ - bias
51
+ - LayerNorm.weight
52
+ prediction:
53
+ dataset:
54
+ _target_: classy.data.dataset.hf.classification.HFQADataset.from_samples
55
+ transformer_model: ${transformer_model}
56
+ additional_special_tokens: ${model.additional_special_tokens}
57
+ min_length: -1
58
+ max_length: -1
59
+ tokens_per_batch: 800
60
+ max_batch_size: -1
61
+ section_size: 10000
62
+ prebatch: true
63
+ materialize: false
64
+ for_inference: true
65
+ training:
66
+ seed: 12
67
+ pl_trainer:
68
+ _target_: pytorch_lightning.Trainer
69
+ accumulate_grad_batches: 4
70
+ gradient_clip_val: 10.0
71
+ val_check_interval: 1.0
72
+ max_steps: 1000000
73
+ early_stopping_callback:
74
+ _target_: pytorch_lightning.callbacks.EarlyStopping
75
+ monitor: ${callbacks_monitor}
76
+ mode: ${callbacks_mode}
77
+ patience: 25
78
+ model_checkpoint_callback:
79
+ _target_: classy.pl_callbacks.best_checkpoint.ModelCheckpointWithBest
80
+ monitor: ${callbacks_monitor}
81
+ mode: ${callbacks_mode}
82
+ verbose: true
83
+ save_top_k: 3
84
+ dirpath: checkpoints
85
+ save_last: true
86
+ resume_from: null
87
+ logging:
88
+ wandb:
89
+ use_wandb: true
90
+ project_name: esc-ed
91
+ experiment_name: aida-longformer-large-*sep-gam-cand-shuffle
92
+ anonymous: null
93
+ run_id: null
94
+ task: qa
95
+ project_name: classy
96
+ exp_name: esc-aida-longformer-large-gam-cand-shuffle
97
+ exp_folder: ./experiments/${exp_name}
98
+ transformer_model: bert-base-cased
99
+ callbacks_monitor: val_accuracy
100
+ callbacks_mode: max
101
+ profiles:
102
+ supported_tasks:
103
+ - qa
104
+ - sentence-pair
105
+ - sequence
106
+ - token
107
+ - generation
108
+ transformer_model: allenai/longformer-large-4096
109
+ candidates_separator: '*'
110
+ training:
111
+ pl_trainer:
112
+ accumulate_grad_batches: 8
113
+ val_check_interval: 2048
114
+ max_steps: 100000
115
+ model:
116
+ _target_: src.esc_ed_module.ESCModule
117
+ additional_special_tokens: []
118
+ transformer_model: ${transformer_model}
119
+ attention_window: 64
120
+ modify_global_attention: true
121
+ optim_conf:
122
+ _target_: classy.optim.factories.RAdamFactory
123
+ lr: 1.0e-05
124
+ weight_decay: 0.01
125
+ no_decay_params:
126
+ - bias
127
+ - LayerNorm.weight
128
+ data:
129
+ datamodule:
130
+ train_dataset:
131
+ _target_: src.data.esc_ed_dataset.ESCEDDataset.from_file
132
+ transformer_model: ${transformer_model}
133
+ additional_special_tokens: ${model.additional_special_tokens}
134
+ candidates_separator: ${candidates_separator}
135
+ shuffle_candidates_prob: 0.0
136
+ min_length: 0
137
+ max_length: 1024
138
+ tokens_per_batch: 1024
139
+ max_batch_size: 10
140
+ section_size: 20000
141
+ prebatch: true
142
+ materialize: false
143
+ for_inference: false
144
+ validation_dataset:
145
+ _target_: src.data.esc_ed_dataset.ESCEDDataset.from_file
146
+ transformer_model: ${transformer_model}
147
+ additional_special_tokens: ${model.additional_special_tokens}
148
+ candidates_separator: ${candidates_separator}
149
+ min_length: 0
150
+ max_length: 1024
151
+ tokens_per_batch: 2048
152
+ max_batch_size: 10
153
+ section_size: 10000
154
+ prebatch: true
155
+ materialize: true
156
+ for_inference: false
157
+ shuffle_dataset: true
158
+ prediction:
159
+ dataset:
160
+ _target_: src.data.esc_ed_dataset.ESCEDDataset.from_samples
161
+ transformer_model: ${transformer_model}
162
+ additional_special_tokens: ${model.additional_special_tokens}
163
+ candidates_separator: ${candidates_separator}
164
+ min_length: -1
165
+ max_length: -1
166
+ tokens_per_batch: 2048
167
+ max_batch_size: -1
168
+ section_size: 10000
169
+ prebatch: true
170
+ materialize: false
171
+ for_inference: true
experiments/extend-longformer-large/2021-10-22/09-11-39/.hydra/config.yaml ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ callbacks:
2
+ callbacks: []
3
+ data:
4
+ datamodule:
5
+ _target_: classy.data.data_modules.ClassyDataModule
6
+ task: qa
7
+ dataset_path: data/aida
8
+ train_dataset:
9
+ _target_: extend.data.esc_ed_dataset.ESCEDDataset.from_file
10
+ transformer_model: allenai/longformer-large-4096
11
+ additional_special_tokens: []
12
+ candidates_separator: '*'
13
+ shuffle_candidates_prob: 0.0
14
+ min_length: 0
15
+ max_length: 1024
16
+ tokens_per_batch: 1024
17
+ max_batch_size: 10
18
+ section_size: 20000
19
+ prebatch: true
20
+ materialize: false
21
+ for_inference: false
22
+ validation_dataset:
23
+ _target_: extend.data.esc_ed_dataset.ESCEDDataset.from_file
24
+ transformer_model: allenai/longformer-large-4096
25
+ additional_special_tokens: []
26
+ candidates_separator: '*'
27
+ min_length: 0
28
+ max_length: 1024
29
+ tokens_per_batch: 2048
30
+ max_batch_size: 10
31
+ section_size: 10000
32
+ prebatch: true
33
+ materialize: true
34
+ for_inference: false
35
+ validation_split_size: 0.1
36
+ test_split_size: 0.1
37
+ max_nontrain_split_size: 10000
38
+ shuffle_dataset: true
39
+ device:
40
+ gpus:
41
+ - 0
42
+ precision: 32
43
+ amp_level: O0
44
+ model:
45
+ _target_: extend.esc_ed_module.ESCModule
46
+ additional_special_tokens: []
47
+ transformer_model: allenai/longformer-large-4096
48
+ attention_window: 64
49
+ modify_global_attention: 0
50
+ optim_conf:
51
+ _target_: classy.optim.factories.RAdamFactory
52
+ lr: 1.0e-05
53
+ weight_decay: 0.01
54
+ no_decay_params:
55
+ - bias
56
+ - LayerNorm.weight
57
+ prediction:
58
+ dataset:
59
+ _target_: extend.data.esc_ed_dataset.ESCEDDataset.from_samples
60
+ transformer_model: allenai/longformer-large-4096
61
+ additional_special_tokens: []
62
+ candidates_separator: '*'
63
+ min_length: -1
64
+ max_length: -1
65
+ tokens_per_batch: 2048
66
+ max_batch_size: -1
67
+ section_size: 10000
68
+ prebatch: true
69
+ materialize: false
70
+ for_inference: true
71
+ training:
72
+ seed: 12
73
+ pl_trainer:
74
+ _target_: pytorch_lightning.Trainer
75
+ accumulate_grad_batches: 8
76
+ gradient_clip_val: 10.0
77
+ val_check_interval: 2048
78
+ max_steps: 100000
79
+ early_stopping_callback:
80
+ _target_: pytorch_lightning.callbacks.EarlyStopping
81
+ monitor: val_accuracy
82
+ mode: max
83
+ patience: 25
84
+ model_checkpoint_callback:
85
+ _target_: classy.pl_callbacks.best_checkpoint.ModelCheckpointWithBest
86
+ monitor: val_accuracy
87
+ mode: max
88
+ verbose: true
89
+ save_top_k: 3
90
+ dirpath: checkpoints
91
+ save_last: true
92
+ resume_from: null
93
+ logging:
94
+ wandb:
95
+ use_wandb: true
96
+ project_name: esc-ed
97
+ experiment_name: aida-longformer-large-*sep-gam-cand-shuffle
98
+ anonymous: null
99
+ run_id: 3v74woaz
100
+ task: qa
101
+ project_name: classy
102
+ exp_name: esc-aida-longformer-large-gam-cand-shuffle
103
+ exp_folder: /root/esc-ed/experiments/esc-aida-longformer-large-gam-cand-shuffle
104
+ transformer_model: allenai/longformer-large-4096
105
+ callbacks_monitor: val_accuracy
106
+ callbacks_mode: max
107
+ supported_tasks:
108
+ - qa
109
+ - sentence-pair
110
+ - sequence
111
+ - token
112
+ - generation
113
+ candidates_separator: '*'
114
+ evaluation:
115
+ _target_: extend.evaluation.InKBF1
experiments/extend-longformer-large/2021-10-22/09-11-39/.hydra/config_post_trainer_init.yaml ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ callbacks:
2
+ callbacks: []
3
+ data:
4
+ datamodule:
5
+ _target_: classy.data.data_modules.ClassyDataModule
6
+ task: qa
7
+ dataset_path: /root/esc-ed/data/aida
8
+ train_dataset:
9
+ _target_: src.data.esc_ed_dataset.ESCEDDataset.from_file
10
+ transformer_model: allenai/longformer-large-4096
11
+ additional_special_tokens: []
12
+ candidates_separator: '*'
13
+ shuffle_candidates_prob: 0.0
14
+ min_length: 0
15
+ max_length: 1024
16
+ tokens_per_batch: 1024
17
+ max_batch_size: 10
18
+ section_size: 20000
19
+ prebatch: true
20
+ materialize: false
21
+ for_inference: false
22
+ validation_dataset:
23
+ _target_: src.data.esc_ed_dataset.ESCEDDataset.from_file
24
+ transformer_model: allenai/longformer-large-4096
25
+ additional_special_tokens: []
26
+ candidates_separator: '*'
27
+ min_length: 0
28
+ max_length: 1024
29
+ tokens_per_batch: 2048
30
+ max_batch_size: 10
31
+ section_size: 10000
32
+ prebatch: true
33
+ materialize: true
34
+ for_inference: false
35
+ validation_split_size: 0.1
36
+ test_split_size: 0.1
37
+ max_nontrain_split_size: 10000
38
+ shuffle_dataset: true
39
+ device:
40
+ gpus:
41
+ - 0
42
+ precision: 32
43
+ amp_level: O0
44
+ model:
45
+ _target_: src.esc_ed_module.ESCModule
46
+ additional_special_tokens: []
47
+ transformer_model: allenai/longformer-large-4096
48
+ attention_window: 64
49
+ modify_global_attention: true
50
+ optim_conf:
51
+ _target_: classy.optim.factories.RAdamFactory
52
+ lr: 1.0e-05
53
+ weight_decay: 0.01
54
+ no_decay_params:
55
+ - bias
56
+ - LayerNorm.weight
57
+ prediction:
58
+ dataset:
59
+ _target_: src.data.esc_ed_dataset.ESCEDDataset.from_samples
60
+ transformer_model: allenai/longformer-large-4096
61
+ additional_special_tokens: []
62
+ candidates_separator: '*'
63
+ min_length: -1
64
+ max_length: -1
65
+ tokens_per_batch: 2048
66
+ max_batch_size: -1
67
+ section_size: 10000
68
+ prebatch: true
69
+ materialize: false
70
+ for_inference: true
71
+ training:
72
+ seed: 12
73
+ pl_trainer:
74
+ _target_: pytorch_lightning.Trainer
75
+ accumulate_grad_batches: 8
76
+ gradient_clip_val: 10.0
77
+ val_check_interval: 2048
78
+ max_steps: 100000
79
+ early_stopping_callback:
80
+ _target_: pytorch_lightning.callbacks.EarlyStopping
81
+ monitor: val_accuracy
82
+ mode: max
83
+ patience: 25
84
+ model_checkpoint_callback:
85
+ _target_: classy.pl_callbacks.best_checkpoint.ModelCheckpointWithBest
86
+ monitor: val_accuracy
87
+ mode: max
88
+ verbose: true
89
+ save_top_k: 3
90
+ dirpath: checkpoints
91
+ save_last: true
92
+ resume_from: null
93
+ logging:
94
+ wandb:
95
+ use_wandb: true
96
+ project_name: esc-ed
97
+ experiment_name: aida-longformer-large-*sep-gam-cand-shuffle
98
+ anonymous: null
99
+ run_id: 3v74woaz
100
+ task: qa
101
+ project_name: classy
102
+ exp_name: esc-aida-longformer-large-gam-cand-shuffle
103
+ exp_folder: /root/esc-ed/experiments/esc-aida-longformer-large-gam-cand-shuffle
104
+ transformer_model: allenai/longformer-large-4096
105
+ callbacks_monitor: val_accuracy
106
+ callbacks_mode: max
107
+ supported_tasks:
108
+ - qa
109
+ - sentence-pair
110
+ - sequence
111
+ - token
112
+ - generation
113
+ candidates_separator: '*'
experiments/extend-longformer-large/2021-10-22/09-11-39/.hydra/hydra.yaml ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ./experiments/${exp_name}/${now:%Y-%m-%d}/${now:%H-%M-%S}
4
+ sweep:
5
+ dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
6
+ subdir: ${hydra.job.num}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ help:
13
+ app_name: ${hydra.job.name}
14
+ header: '${hydra.help.app_name} is powered by Hydra.
15
+
16
+ '
17
+ footer: 'Powered by Hydra (https://hydra.cc)
18
+
19
+ Use --hydra-help to view Hydra specific help
20
+
21
+ '
22
+ template: '${hydra.help.header}
23
+
24
+ == Configuration groups ==
25
+
26
+ Compose your configuration from those groups (group=option)
27
+
28
+
29
+ $APP_CONFIG_GROUPS
30
+
31
+
32
+ == Config ==
33
+
34
+ Override anything in the config (foo.bar=value)
35
+
36
+
37
+ $CONFIG
38
+
39
+
40
+ ${hydra.help.footer}
41
+
42
+ '
43
+ hydra_help:
44
+ template: 'Hydra (${hydra.runtime.version})
45
+
46
+ See https://hydra.cc for more info.
47
+
48
+
49
+ == Flags ==
50
+
51
+ $FLAGS_HELP
52
+
53
+
54
+ == Configuration groups ==
55
+
56
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
57
+ to command line)
58
+
59
+
60
+ $HYDRA_CONFIG_GROUPS
61
+
62
+
63
+ Use ''--cfg hydra'' to Show the Hydra config.
64
+
65
+ '
66
+ hydra_help: ???
67
+ hydra_logging:
68
+ version: 1
69
+ formatters:
70
+ simple:
71
+ format: '[%(asctime)s][HYDRA] %(message)s'
72
+ handlers:
73
+ console:
74
+ class: logging.StreamHandler
75
+ formatter: simple
76
+ stream: ext://sys.stdout
77
+ root:
78
+ level: INFO
79
+ handlers:
80
+ - console
81
+ loggers:
82
+ logging_example:
83
+ level: DEBUG
84
+ disable_existing_loggers: false
85
+ job_logging:
86
+ version: 1
87
+ formatters:
88
+ simple:
89
+ format: '%(asctime)s - %(levelname)s - %(name)s - %(message)s'
90
+ handlers:
91
+ console:
92
+ class: logging.StreamHandler
93
+ formatter: simple
94
+ stream: ext://sys.stdout
95
+ file:
96
+ class: logging.FileHandler
97
+ formatter: simple
98
+ filename: ${hydra.job.name}.log
99
+ root:
100
+ level: WARN
101
+ handlers:
102
+ - console
103
+ - file
104
+ disable_existing_loggers: false
105
+ env: {}
106
+ searchpath: []
107
+ callbacks: {}
108
+ output_subdir: .hydra
109
+ overrides:
110
+ hydra: []
111
+ task:
112
+ - +profiles=aida-longformer-large-gam
113
+ - device=cuda
114
+ - device.gpus=[0]
115
+ - exp_name=esc-aida-longformer-large-gam-cand-shuffle
116
+ - data.datamodule.dataset_path=data/aida
117
+ - logging.wandb.use_wandb=True
118
+ - logging.wandb.project_name=esc-ed
119
+ - logging.wandb.experiment_name=aida-longformer-large-*sep-gam-cand-shuffle
120
+ - training.early_stopping_callback.patience=25
121
+ job:
122
+ name: train
123
+ override_dirname: +profiles=aida-longformer-large-gam,data.datamodule.dataset_path=data/aida,device.gpus=[0],device=cuda,exp_name=esc-aida-longformer-large-gam-cand-shuffle,logging.wandb.experiment_name=aida-longformer-large-*sep-gam-cand-shuffle,logging.wandb.project_name=esc-ed,logging.wandb.use_wandb=True,training.early_stopping_callback.patience=25
124
+ id: ???
125
+ num: ???
126
+ config_name: qa
127
+ env_set: {}
128
+ env_copy: []
129
+ config:
130
+ override_dirname:
131
+ kv_sep: '='
132
+ item_sep: ','
133
+ exclude_keys: []
134
+ runtime:
135
+ version: 1.1.1
136
+ cwd: /root/esc-ed
137
+ config_sources:
138
+ - path: hydra.conf
139
+ schema: pkg
140
+ provider: hydra
141
+ - path: classy.scripts.cli
142
+ schema: pkg
143
+ provider: main
144
+ - path: configurations
145
+ schema: pkg
146
+ provider: classy-searchpath-plugin
147
+ - path: /root/esc-ed/configurations
148
+ schema: file
149
+ provider: command-line
150
+ - path: ''
151
+ schema: structured
152
+ provider: schema
153
+ choices:
154
+ profiles: aida-longformer-large-gam
155
+ logging: default
156
+ training: default
157
+ prediction: qa
158
+ model: qa
159
+ device: cuda
160
+ data: qa
161
+ callbacks: empty
162
+ hydra/env: default
163
+ hydra/callbacks: null
164
+ hydra/job_logging: default
165
+ hydra/hydra_logging: default
166
+ hydra/hydra_help: default
167
+ hydra/help: default
168
+ hydra/sweeper: basic
169
+ hydra/launcher: basic
170
+ hydra/output: default
171
+ verbose:
172
+ - classy
experiments/extend-longformer-large/2021-10-22/09-11-39/.hydra/overrides.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ - +profiles=aida-longformer-large-gam
2
+ - device=cuda
3
+ - device.gpus=[0]
4
+ - exp_name=esc-aida-longformer-large-gam-cand-shuffle
5
+ - data.datamodule.dataset_path=data/aida
6
+ - logging.wandb.use_wandb=True
7
+ - logging.wandb.project_name=esc-ed
8
+ - logging.wandb.experiment_name=aida-longformer-large-*sep-gam-cand-shuffle
9
+ - training.early_stopping_callback.patience=25
experiments/extend-longformer-large/2021-10-22/09-11-39/checkpoints/best.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daf5ca2a79a3cdcac3864db7a17b3a9fb084457c7167a7d8e656d8f8cb31cdd7
3
+ size 5203223845
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ git+git://github.com/sapienzanlp/extend