Commit
·
67a2b13
0
Parent(s):
initial commit
Browse files- .gitattributes +2 -0
- README.md +64 -0
- app.py +176 -0
- data/.placeholder +0 -0
- data/inventories/aida.tsv +3 -0
- data/repo-assets/extend_formulation.png +0 -0
- experiments/.placeholder +0 -0
- experiments/extend-longformer-large/2021-10-22/09-11-39/.hydra/config.bk.yaml +171 -0
- experiments/extend-longformer-large/2021-10-22/09-11-39/.hydra/config.yaml +115 -0
- experiments/extend-longformer-large/2021-10-22/09-11-39/.hydra/config_post_trainer_init.yaml +113 -0
- experiments/extend-longformer-large/2021-10-22/09-11-39/.hydra/hydra.yaml +172 -0
- experiments/extend-longformer-large/2021-10-22/09-11-39/.hydra/overrides.yaml +9 -0
- experiments/extend-longformer-large/2021-10-22/09-11-39/checkpoints/best.ckpt +3 -0
- requirements.txt +1 -0
.gitattributes
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.tsv filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: ExtEnD
|
| 3 |
+
emoji: 🚀
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: gray
|
| 6 |
+
sdk: streamlit
|
| 7 |
+
sdk_version: 1.5.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: cc-by-nc-sa-4.0
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
<h1 align ="center"> ExtEnD: Extractive Entity Disambiguation </h1>
|
| 14 |
+
|
| 15 |
+
<p align="center">
|
| 16 |
+
<a href="https://sunglasses-ai.github.io/classy/">
|
| 17 |
+
<img alt="Python" src="https://img.shields.io/badge/-classy%200.2.1-black?style=for-the-badge&logoColor=white&logo=">
|
| 18 |
+
</a>
|
| 19 |
+
<a href="">
|
| 20 |
+
<img alt="Python" src="https://img.shields.io/badge/Python 3.8--3.9-blue?style=for-the-badge&logo=python&logoColor=white">
|
| 21 |
+
</a>
|
| 22 |
+
<a href="https://pytorch.org/get-started/locally/">
|
| 23 |
+
<img alt="PyTorch" src="https://img.shields.io/badge/PyTorch 1.9-ee4c2c?style=for-the-badge&logo=pytorch&logoColor=white">
|
| 24 |
+
</a>
|
| 25 |
+
<a href="https://spacy.io/">
|
| 26 |
+
<img alt="plugin: spacy" src="https://img.shields.io/badge/plugin%20for-spaCy%203.2-09A3D5.svg?style=for-the-badge&labelColor=gray">
|
| 27 |
+
</a>
|
| 28 |
+
<a href="https://black.readthedocs.io/en/stable/">
|
| 29 |
+
<img alt="Code style: black" src="https://img.shields.io/badge/code%20style-black-black.svg?style=for-the-badge&labelColor=gray">
|
| 30 |
+
</a>
|
| 31 |
+
|
| 32 |
+
</p>
|
| 33 |
+
|
| 34 |
+
This space contains the demo of [ExtEnD: Extractive Entity Disambiguation](https://www.researchgate.net/publication/359392427_ExtEnD_Extractive_Entity_Disambiguation),
|
| 35 |
+
a novel approach to Entity Disambiguation (i.e. the task of linking a mention in context with its most suitable entity in a reference knowledge base) where we reformulate
|
| 36 |
+
this task as a text extraction problem. This work was accepted at ACL 2022.
|
| 37 |
+
|
| 38 |
+
If you find this demo, our paper, code or framework useful, please reference this work in your paper:
|
| 39 |
+
|
| 40 |
+
```
|
| 41 |
+
@inproceedings{barba-etal-2021-consec,
|
| 42 |
+
title = "{E}xt{E}n{D}: Extractive Entity Disambiguation",
|
| 43 |
+
author = "Barba, Edoardo and
|
| 44 |
+
Procopio, Luigi and
|
| 45 |
+
Navigli, Roberto",
|
| 46 |
+
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics",
|
| 47 |
+
month = may,
|
| 48 |
+
year = "2022",
|
| 49 |
+
address = "Online and Dublin, Ireland",
|
| 50 |
+
publisher = "Association for Computational Linguistics",
|
| 51 |
+
}
|
| 52 |
+
```
|
| 53 |
+
|
| 54 |
+

|
| 55 |
+
|
| 56 |
+
## Acknowledgments
|
| 57 |
+
|
| 58 |
+
The authors gratefully acknowledge the support of the ERC Consolidator Grant MOUSSE No. 726487 under the European Union’s Horizon 2020 research and innovation programme.
|
| 59 |
+
|
| 60 |
+
This work was supported in part by the MIUR under grant “Dipartimenti di eccellenza 2018-2022” of the Department of Computer Science of the Sapienza University of Rome.
|
| 61 |
+
|
| 62 |
+
## License
|
| 63 |
+
|
| 64 |
+
This work is under the [Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0) license](https://creativecommons.org/licenses/by-nc-sa/4.0/).
|
app.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import html
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
from extend import spacy_component # this is needed to register the spacy component
|
| 6 |
+
|
| 7 |
+
import spacy
|
| 8 |
+
import streamlit as st
|
| 9 |
+
from annotated_text import annotation
|
| 10 |
+
from classy.scripts.model.demo import tabbed_navigation
|
| 11 |
+
from classy.utils.streamlit import get_md_200_random_color_generator
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def main(
|
| 15 |
+
model_checkpoint_path: str,
|
| 16 |
+
default_inventory_path: str,
|
| 17 |
+
cuda_device: int,
|
| 18 |
+
):
|
| 19 |
+
# setup examples
|
| 20 |
+
examples = [
|
| 21 |
+
"Italy beat England and won Euro 2021.",
|
| 22 |
+
"Japan began the defence of their Asian Cup title with a lucky 2-1 win against Syria in a Group C championship match on Friday.",
|
| 23 |
+
"The project was coded in Java.",
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
# css rules
|
| 27 |
+
st.write(
|
| 28 |
+
"""
|
| 29 |
+
<style type="text/css">
|
| 30 |
+
a {
|
| 31 |
+
text-decoration: none !important;
|
| 32 |
+
}
|
| 33 |
+
</style>
|
| 34 |
+
""",
|
| 35 |
+
unsafe_allow_html=True,
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
# setup header
|
| 39 |
+
st.markdown("<h1 style='text-align: center;'>ExtEnD: Extractive Entity Disambiguation</h1>", unsafe_allow_html=True)
|
| 40 |
+
st.write(
|
| 41 |
+
"""
|
| 42 |
+
<div align="center">
|
| 43 |
+
<a href="https://sunglasses-ai.github.io/classy/">
|
| 44 |
+
<img alt="Python" style="height: 3em; margin: 0 1em" src="">
|
| 45 |
+
</a>
|
| 46 |
+
<a href="https://spacy.io/" tyle="text-decoration: none">
|
| 47 |
+
<img alt="spaCy" style="height: 3em; margin: 0 1em;" src="">
|
| 48 |
+
</a>
|
| 49 |
+
</div>
|
| 50 |
+
""",
|
| 51 |
+
unsafe_allow_html=True,
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
def model_demo():
|
| 55 |
+
|
| 56 |
+
@st.cache(allow_output_mutation=True)
|
| 57 |
+
def load_resources(inventory_path):
|
| 58 |
+
|
| 59 |
+
# load nlp
|
| 60 |
+
nlp = spacy.load("en_core_web_sm")
|
| 61 |
+
extend_config = dict(
|
| 62 |
+
checkpoint_path=model_checkpoint_path,
|
| 63 |
+
mentions_inventory_path=inventory_path,
|
| 64 |
+
device=cuda_device,
|
| 65 |
+
tokens_per_batch=10_000,
|
| 66 |
+
)
|
| 67 |
+
nlp.add_pipe("extend", after="ner", config=extend_config)
|
| 68 |
+
|
| 69 |
+
# mock call to load resources
|
| 70 |
+
nlp(examples[0])
|
| 71 |
+
|
| 72 |
+
# return
|
| 73 |
+
return nlp
|
| 74 |
+
|
| 75 |
+
# read input
|
| 76 |
+
placeholder = st.selectbox(
|
| 77 |
+
"Examples",
|
| 78 |
+
options=examples,
|
| 79 |
+
index=0,
|
| 80 |
+
)
|
| 81 |
+
input_text = st.text_area("Input text to entity-disambiguate", placeholder)
|
| 82 |
+
|
| 83 |
+
# custom inventory
|
| 84 |
+
uploaded_inventory_path = st.file_uploader(
|
| 85 |
+
"[Optional] Upload custom inventory (tsv file, mention \\t desc1 \\t desc2 \\t)",
|
| 86 |
+
accept_multiple_files=False, type=["tsv"])
|
| 87 |
+
if uploaded_inventory_path is not None:
|
| 88 |
+
inventory_path = f"data/inventories/{uploaded_inventory_path.name}"
|
| 89 |
+
with open(inventory_path, "wb") as f:
|
| 90 |
+
f.write(uploaded_inventory_path.getbuffer())
|
| 91 |
+
else:
|
| 92 |
+
inventory_path = default_inventory_path
|
| 93 |
+
|
| 94 |
+
if st.button("Classify", key="classify"):
|
| 95 |
+
|
| 96 |
+
# load model
|
| 97 |
+
nlp = load_resources(inventory_path)
|
| 98 |
+
color_generator = get_md_200_random_color_generator()
|
| 99 |
+
|
| 100 |
+
# tag sentence
|
| 101 |
+
time_start = time.perf_counter()
|
| 102 |
+
doc = nlp(input_text)
|
| 103 |
+
time_end = time.perf_counter()
|
| 104 |
+
|
| 105 |
+
# extract entities
|
| 106 |
+
entities = {}
|
| 107 |
+
for ent in doc.ents:
|
| 108 |
+
if ent._.disambiguated_entity is not None:
|
| 109 |
+
entities[ent.start_char] = (
|
| 110 |
+
ent.start_char,
|
| 111 |
+
ent.end_char,
|
| 112 |
+
ent.text,
|
| 113 |
+
ent._.disambiguated_entity,
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
# create annotated html components
|
| 117 |
+
|
| 118 |
+
annotated_html_components = []
|
| 119 |
+
|
| 120 |
+
assert all(any(t.idx == _s for t in doc) for _s in entities)
|
| 121 |
+
it = iter(list(doc))
|
| 122 |
+
while True:
|
| 123 |
+
try:
|
| 124 |
+
t = next(it)
|
| 125 |
+
except StopIteration:
|
| 126 |
+
break
|
| 127 |
+
if t.idx in entities:
|
| 128 |
+
_start, _end, _text, _entity = entities[t.idx]
|
| 129 |
+
while t.idx + len(t) != _end:
|
| 130 |
+
t = next(it)
|
| 131 |
+
annotated_html_components.append(
|
| 132 |
+
str(annotation(*(_text, _entity, color_generator())))
|
| 133 |
+
)
|
| 134 |
+
else:
|
| 135 |
+
annotated_html_components.append(str(html.escape(t.text)))
|
| 136 |
+
|
| 137 |
+
st.markdown(
|
| 138 |
+
"\n".join(
|
| 139 |
+
[
|
| 140 |
+
"<div>",
|
| 141 |
+
*annotated_html_components,
|
| 142 |
+
"<p></p>"
|
| 143 |
+
f'<div style="text-align: right"><p style="color: gray">Time: {(time_end - time_start):.2f}s</p></div>'
|
| 144 |
+
"</div>",
|
| 145 |
+
]
|
| 146 |
+
),
|
| 147 |
+
unsafe_allow_html=True,
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
def hiw():
|
| 151 |
+
st.markdown("ExtEnD frames Entity Disambiguation as a text extraction problem:")
|
| 152 |
+
st.image("data/repo-assets/extend_formulation.png", caption="ExtEnD Formulation")
|
| 153 |
+
st.markdown("""
|
| 154 |
+
Given the sentence *After a long fight Superman saved Metropolis*, where *Superman* is the mention
|
| 155 |
+
to disambiguate, ExtEnD first concatenates the descriptions of all the possible candidates of *Superman* in the
|
| 156 |
+
inventory and then selects the span whose description best suits the mention in its context.
|
| 157 |
+
|
| 158 |
+
To convert this task to end2end entity linking, as we do in *Model demo*, we leverage spaCy
|
| 159 |
+
(more specifically, its NER) and run ExtEnD on each named entity spaCy identifies
|
| 160 |
+
(if the corresponding mention is contained in the inventory).
|
| 161 |
+
""")
|
| 162 |
+
|
| 163 |
+
def abstract():
|
| 164 |
+
st.write(
|
| 165 |
+
"""
|
| 166 |
+
Word Sense Disambiguation (WSD) is a historical NLP task aimed at linking words in contexts to discrete sense inventories and it is usually cast as a multi-label classification task. Recently, several neural approaches have employed sense definitions to better represent word meanings. Yet, these approaches do not observe the input sentence and the sense definition candidates all at once, thus potentially reducing the model performance and generalization power. We cope with this issue by reframing WSD as a span extraction problem --- which we called Extractive Sense Comprehension (ESC) --- and propose ESCHER, a transformer-based neural architecture for this new formulation. By means of an extensive array of experiments, we show that ESC unleashes the full potential of our model, leading it to outdo all of its competitors and to set a new state of the art on the English WSD task. In the few-shot scenario, ESCHER proves to exploit training data efficiently, attaining the same performance as its closest competitor while relying on almost three times fewer annotations. Furthermore, ESCHER can nimbly combine data annotated with senses from different lexical resources, achieving performances that were previously out of everyone's reach. The model along with data is available at https://github.com/SapienzaNLP/esc.
|
| 167 |
+
"""
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
tabs = dict(model=("Model demo", model_demo), hiw=("How it works", hiw), abstract=("Abstract", abstract))
|
| 171 |
+
|
| 172 |
+
tabbed_navigation(tabs, "model")
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
if __name__ == "__main__":
|
| 176 |
+
main("experiments/extend-longformer-large/2021-10-22/09-11-39/checkpoints/best.ckpt", "data/inventories/aida.tsv", cuda_device=-1)
|
data/.placeholder
ADDED
|
File without changes
|
data/inventories/aida.tsv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79aee10d058068ddfd791c853658c406e8c81dff6a2ea8fabcc7bb468df81986
|
| 3 |
+
size 34209452
|
data/repo-assets/extend_formulation.png
ADDED
|
experiments/.placeholder
ADDED
|
File without changes
|
experiments/extend-longformer-large/2021-10-22/09-11-39/.hydra/config.bk.yaml
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
callbacks:
|
| 2 |
+
callbacks: []
|
| 3 |
+
data:
|
| 4 |
+
datamodule:
|
| 5 |
+
_target_: classy.data.data_modules.ClassyDataModule
|
| 6 |
+
task: ${task}
|
| 7 |
+
dataset_path: data/aida
|
| 8 |
+
train_dataset:
|
| 9 |
+
_target_: classy.data.dataset.hf.classification.HFQADataset.from_file
|
| 10 |
+
transformer_model: ${transformer_model}
|
| 11 |
+
additional_special_tokens: ${model.additional_special_tokens}
|
| 12 |
+
min_length: 5
|
| 13 |
+
max_length: 500
|
| 14 |
+
tokens_per_batch: 2000
|
| 15 |
+
max_batch_size: 10
|
| 16 |
+
section_size: 10000
|
| 17 |
+
prebatch: true
|
| 18 |
+
materialize: false
|
| 19 |
+
for_inference: false
|
| 20 |
+
validation_dataset:
|
| 21 |
+
_target_: classy.data.dataset.hf.classification.HFQADataset.from_file
|
| 22 |
+
transformer_model: ${transformer_model}
|
| 23 |
+
additional_special_tokens: ${model.additional_special_tokens}
|
| 24 |
+
min_length: 5
|
| 25 |
+
max_length: 500
|
| 26 |
+
tokens_per_batch: 2000
|
| 27 |
+
max_batch_size: 10
|
| 28 |
+
section_size: 10000
|
| 29 |
+
prebatch: true
|
| 30 |
+
materialize: true
|
| 31 |
+
for_inference: true
|
| 32 |
+
validation_split_size: 0.1
|
| 33 |
+
test_split_size: 0.1
|
| 34 |
+
max_nontrain_split_size: 10000
|
| 35 |
+
shuffle_dataset: true
|
| 36 |
+
device:
|
| 37 |
+
gpus:
|
| 38 |
+
- 0
|
| 39 |
+
precision: 32
|
| 40 |
+
amp_level: O0
|
| 41 |
+
model:
|
| 42 |
+
_target_: classy.pl_modules.hf.classification.HFQAPLModule
|
| 43 |
+
transformer_model: ${transformer_model}
|
| 44 |
+
additional_special_tokens: []
|
| 45 |
+
optim_conf:
|
| 46 |
+
_target_: classy.optim.factories.RAdamFactory
|
| 47 |
+
lr: 1.0e-05
|
| 48 |
+
weight_decay: 0.01
|
| 49 |
+
no_decay_params:
|
| 50 |
+
- bias
|
| 51 |
+
- LayerNorm.weight
|
| 52 |
+
prediction:
|
| 53 |
+
dataset:
|
| 54 |
+
_target_: classy.data.dataset.hf.classification.HFQADataset.from_samples
|
| 55 |
+
transformer_model: ${transformer_model}
|
| 56 |
+
additional_special_tokens: ${model.additional_special_tokens}
|
| 57 |
+
min_length: -1
|
| 58 |
+
max_length: -1
|
| 59 |
+
tokens_per_batch: 800
|
| 60 |
+
max_batch_size: -1
|
| 61 |
+
section_size: 10000
|
| 62 |
+
prebatch: true
|
| 63 |
+
materialize: false
|
| 64 |
+
for_inference: true
|
| 65 |
+
training:
|
| 66 |
+
seed: 12
|
| 67 |
+
pl_trainer:
|
| 68 |
+
_target_: pytorch_lightning.Trainer
|
| 69 |
+
accumulate_grad_batches: 4
|
| 70 |
+
gradient_clip_val: 10.0
|
| 71 |
+
val_check_interval: 1.0
|
| 72 |
+
max_steps: 1000000
|
| 73 |
+
early_stopping_callback:
|
| 74 |
+
_target_: pytorch_lightning.callbacks.EarlyStopping
|
| 75 |
+
monitor: ${callbacks_monitor}
|
| 76 |
+
mode: ${callbacks_mode}
|
| 77 |
+
patience: 25
|
| 78 |
+
model_checkpoint_callback:
|
| 79 |
+
_target_: classy.pl_callbacks.best_checkpoint.ModelCheckpointWithBest
|
| 80 |
+
monitor: ${callbacks_monitor}
|
| 81 |
+
mode: ${callbacks_mode}
|
| 82 |
+
verbose: true
|
| 83 |
+
save_top_k: 3
|
| 84 |
+
dirpath: checkpoints
|
| 85 |
+
save_last: true
|
| 86 |
+
resume_from: null
|
| 87 |
+
logging:
|
| 88 |
+
wandb:
|
| 89 |
+
use_wandb: true
|
| 90 |
+
project_name: esc-ed
|
| 91 |
+
experiment_name: aida-longformer-large-*sep-gam-cand-shuffle
|
| 92 |
+
anonymous: null
|
| 93 |
+
run_id: null
|
| 94 |
+
task: qa
|
| 95 |
+
project_name: classy
|
| 96 |
+
exp_name: esc-aida-longformer-large-gam-cand-shuffle
|
| 97 |
+
exp_folder: ./experiments/${exp_name}
|
| 98 |
+
transformer_model: bert-base-cased
|
| 99 |
+
callbacks_monitor: val_accuracy
|
| 100 |
+
callbacks_mode: max
|
| 101 |
+
profiles:
|
| 102 |
+
supported_tasks:
|
| 103 |
+
- qa
|
| 104 |
+
- sentence-pair
|
| 105 |
+
- sequence
|
| 106 |
+
- token
|
| 107 |
+
- generation
|
| 108 |
+
transformer_model: allenai/longformer-large-4096
|
| 109 |
+
candidates_separator: '*'
|
| 110 |
+
training:
|
| 111 |
+
pl_trainer:
|
| 112 |
+
accumulate_grad_batches: 8
|
| 113 |
+
val_check_interval: 2048
|
| 114 |
+
max_steps: 100000
|
| 115 |
+
model:
|
| 116 |
+
_target_: src.esc_ed_module.ESCModule
|
| 117 |
+
additional_special_tokens: []
|
| 118 |
+
transformer_model: ${transformer_model}
|
| 119 |
+
attention_window: 64
|
| 120 |
+
modify_global_attention: true
|
| 121 |
+
optim_conf:
|
| 122 |
+
_target_: classy.optim.factories.RAdamFactory
|
| 123 |
+
lr: 1.0e-05
|
| 124 |
+
weight_decay: 0.01
|
| 125 |
+
no_decay_params:
|
| 126 |
+
- bias
|
| 127 |
+
- LayerNorm.weight
|
| 128 |
+
data:
|
| 129 |
+
datamodule:
|
| 130 |
+
train_dataset:
|
| 131 |
+
_target_: src.data.esc_ed_dataset.ESCEDDataset.from_file
|
| 132 |
+
transformer_model: ${transformer_model}
|
| 133 |
+
additional_special_tokens: ${model.additional_special_tokens}
|
| 134 |
+
candidates_separator: ${candidates_separator}
|
| 135 |
+
shuffle_candidates_prob: 0.0
|
| 136 |
+
min_length: 0
|
| 137 |
+
max_length: 1024
|
| 138 |
+
tokens_per_batch: 1024
|
| 139 |
+
max_batch_size: 10
|
| 140 |
+
section_size: 20000
|
| 141 |
+
prebatch: true
|
| 142 |
+
materialize: false
|
| 143 |
+
for_inference: false
|
| 144 |
+
validation_dataset:
|
| 145 |
+
_target_: src.data.esc_ed_dataset.ESCEDDataset.from_file
|
| 146 |
+
transformer_model: ${transformer_model}
|
| 147 |
+
additional_special_tokens: ${model.additional_special_tokens}
|
| 148 |
+
candidates_separator: ${candidates_separator}
|
| 149 |
+
min_length: 0
|
| 150 |
+
max_length: 1024
|
| 151 |
+
tokens_per_batch: 2048
|
| 152 |
+
max_batch_size: 10
|
| 153 |
+
section_size: 10000
|
| 154 |
+
prebatch: true
|
| 155 |
+
materialize: true
|
| 156 |
+
for_inference: false
|
| 157 |
+
shuffle_dataset: true
|
| 158 |
+
prediction:
|
| 159 |
+
dataset:
|
| 160 |
+
_target_: src.data.esc_ed_dataset.ESCEDDataset.from_samples
|
| 161 |
+
transformer_model: ${transformer_model}
|
| 162 |
+
additional_special_tokens: ${model.additional_special_tokens}
|
| 163 |
+
candidates_separator: ${candidates_separator}
|
| 164 |
+
min_length: -1
|
| 165 |
+
max_length: -1
|
| 166 |
+
tokens_per_batch: 2048
|
| 167 |
+
max_batch_size: -1
|
| 168 |
+
section_size: 10000
|
| 169 |
+
prebatch: true
|
| 170 |
+
materialize: false
|
| 171 |
+
for_inference: true
|
experiments/extend-longformer-large/2021-10-22/09-11-39/.hydra/config.yaml
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
callbacks:
|
| 2 |
+
callbacks: []
|
| 3 |
+
data:
|
| 4 |
+
datamodule:
|
| 5 |
+
_target_: classy.data.data_modules.ClassyDataModule
|
| 6 |
+
task: qa
|
| 7 |
+
dataset_path: data/aida
|
| 8 |
+
train_dataset:
|
| 9 |
+
_target_: extend.data.esc_ed_dataset.ESCEDDataset.from_file
|
| 10 |
+
transformer_model: allenai/longformer-large-4096
|
| 11 |
+
additional_special_tokens: []
|
| 12 |
+
candidates_separator: '*'
|
| 13 |
+
shuffle_candidates_prob: 0.0
|
| 14 |
+
min_length: 0
|
| 15 |
+
max_length: 1024
|
| 16 |
+
tokens_per_batch: 1024
|
| 17 |
+
max_batch_size: 10
|
| 18 |
+
section_size: 20000
|
| 19 |
+
prebatch: true
|
| 20 |
+
materialize: false
|
| 21 |
+
for_inference: false
|
| 22 |
+
validation_dataset:
|
| 23 |
+
_target_: extend.data.esc_ed_dataset.ESCEDDataset.from_file
|
| 24 |
+
transformer_model: allenai/longformer-large-4096
|
| 25 |
+
additional_special_tokens: []
|
| 26 |
+
candidates_separator: '*'
|
| 27 |
+
min_length: 0
|
| 28 |
+
max_length: 1024
|
| 29 |
+
tokens_per_batch: 2048
|
| 30 |
+
max_batch_size: 10
|
| 31 |
+
section_size: 10000
|
| 32 |
+
prebatch: true
|
| 33 |
+
materialize: true
|
| 34 |
+
for_inference: false
|
| 35 |
+
validation_split_size: 0.1
|
| 36 |
+
test_split_size: 0.1
|
| 37 |
+
max_nontrain_split_size: 10000
|
| 38 |
+
shuffle_dataset: true
|
| 39 |
+
device:
|
| 40 |
+
gpus:
|
| 41 |
+
- 0
|
| 42 |
+
precision: 32
|
| 43 |
+
amp_level: O0
|
| 44 |
+
model:
|
| 45 |
+
_target_: extend.esc_ed_module.ESCModule
|
| 46 |
+
additional_special_tokens: []
|
| 47 |
+
transformer_model: allenai/longformer-large-4096
|
| 48 |
+
attention_window: 64
|
| 49 |
+
modify_global_attention: 0
|
| 50 |
+
optim_conf:
|
| 51 |
+
_target_: classy.optim.factories.RAdamFactory
|
| 52 |
+
lr: 1.0e-05
|
| 53 |
+
weight_decay: 0.01
|
| 54 |
+
no_decay_params:
|
| 55 |
+
- bias
|
| 56 |
+
- LayerNorm.weight
|
| 57 |
+
prediction:
|
| 58 |
+
dataset:
|
| 59 |
+
_target_: extend.data.esc_ed_dataset.ESCEDDataset.from_samples
|
| 60 |
+
transformer_model: allenai/longformer-large-4096
|
| 61 |
+
additional_special_tokens: []
|
| 62 |
+
candidates_separator: '*'
|
| 63 |
+
min_length: -1
|
| 64 |
+
max_length: -1
|
| 65 |
+
tokens_per_batch: 2048
|
| 66 |
+
max_batch_size: -1
|
| 67 |
+
section_size: 10000
|
| 68 |
+
prebatch: true
|
| 69 |
+
materialize: false
|
| 70 |
+
for_inference: true
|
| 71 |
+
training:
|
| 72 |
+
seed: 12
|
| 73 |
+
pl_trainer:
|
| 74 |
+
_target_: pytorch_lightning.Trainer
|
| 75 |
+
accumulate_grad_batches: 8
|
| 76 |
+
gradient_clip_val: 10.0
|
| 77 |
+
val_check_interval: 2048
|
| 78 |
+
max_steps: 100000
|
| 79 |
+
early_stopping_callback:
|
| 80 |
+
_target_: pytorch_lightning.callbacks.EarlyStopping
|
| 81 |
+
monitor: val_accuracy
|
| 82 |
+
mode: max
|
| 83 |
+
patience: 25
|
| 84 |
+
model_checkpoint_callback:
|
| 85 |
+
_target_: classy.pl_callbacks.best_checkpoint.ModelCheckpointWithBest
|
| 86 |
+
monitor: val_accuracy
|
| 87 |
+
mode: max
|
| 88 |
+
verbose: true
|
| 89 |
+
save_top_k: 3
|
| 90 |
+
dirpath: checkpoints
|
| 91 |
+
save_last: true
|
| 92 |
+
resume_from: null
|
| 93 |
+
logging:
|
| 94 |
+
wandb:
|
| 95 |
+
use_wandb: true
|
| 96 |
+
project_name: esc-ed
|
| 97 |
+
experiment_name: aida-longformer-large-*sep-gam-cand-shuffle
|
| 98 |
+
anonymous: null
|
| 99 |
+
run_id: 3v74woaz
|
| 100 |
+
task: qa
|
| 101 |
+
project_name: classy
|
| 102 |
+
exp_name: esc-aida-longformer-large-gam-cand-shuffle
|
| 103 |
+
exp_folder: /root/esc-ed/experiments/esc-aida-longformer-large-gam-cand-shuffle
|
| 104 |
+
transformer_model: allenai/longformer-large-4096
|
| 105 |
+
callbacks_monitor: val_accuracy
|
| 106 |
+
callbacks_mode: max
|
| 107 |
+
supported_tasks:
|
| 108 |
+
- qa
|
| 109 |
+
- sentence-pair
|
| 110 |
+
- sequence
|
| 111 |
+
- token
|
| 112 |
+
- generation
|
| 113 |
+
candidates_separator: '*'
|
| 114 |
+
evaluation:
|
| 115 |
+
_target_: extend.evaluation.InKBF1
|
experiments/extend-longformer-large/2021-10-22/09-11-39/.hydra/config_post_trainer_init.yaml
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
callbacks:
|
| 2 |
+
callbacks: []
|
| 3 |
+
data:
|
| 4 |
+
datamodule:
|
| 5 |
+
_target_: classy.data.data_modules.ClassyDataModule
|
| 6 |
+
task: qa
|
| 7 |
+
dataset_path: /root/esc-ed/data/aida
|
| 8 |
+
train_dataset:
|
| 9 |
+
_target_: src.data.esc_ed_dataset.ESCEDDataset.from_file
|
| 10 |
+
transformer_model: allenai/longformer-large-4096
|
| 11 |
+
additional_special_tokens: []
|
| 12 |
+
candidates_separator: '*'
|
| 13 |
+
shuffle_candidates_prob: 0.0
|
| 14 |
+
min_length: 0
|
| 15 |
+
max_length: 1024
|
| 16 |
+
tokens_per_batch: 1024
|
| 17 |
+
max_batch_size: 10
|
| 18 |
+
section_size: 20000
|
| 19 |
+
prebatch: true
|
| 20 |
+
materialize: false
|
| 21 |
+
for_inference: false
|
| 22 |
+
validation_dataset:
|
| 23 |
+
_target_: src.data.esc_ed_dataset.ESCEDDataset.from_file
|
| 24 |
+
transformer_model: allenai/longformer-large-4096
|
| 25 |
+
additional_special_tokens: []
|
| 26 |
+
candidates_separator: '*'
|
| 27 |
+
min_length: 0
|
| 28 |
+
max_length: 1024
|
| 29 |
+
tokens_per_batch: 2048
|
| 30 |
+
max_batch_size: 10
|
| 31 |
+
section_size: 10000
|
| 32 |
+
prebatch: true
|
| 33 |
+
materialize: true
|
| 34 |
+
for_inference: false
|
| 35 |
+
validation_split_size: 0.1
|
| 36 |
+
test_split_size: 0.1
|
| 37 |
+
max_nontrain_split_size: 10000
|
| 38 |
+
shuffle_dataset: true
|
| 39 |
+
device:
|
| 40 |
+
gpus:
|
| 41 |
+
- 0
|
| 42 |
+
precision: 32
|
| 43 |
+
amp_level: O0
|
| 44 |
+
model:
|
| 45 |
+
_target_: src.esc_ed_module.ESCModule
|
| 46 |
+
additional_special_tokens: []
|
| 47 |
+
transformer_model: allenai/longformer-large-4096
|
| 48 |
+
attention_window: 64
|
| 49 |
+
modify_global_attention: true
|
| 50 |
+
optim_conf:
|
| 51 |
+
_target_: classy.optim.factories.RAdamFactory
|
| 52 |
+
lr: 1.0e-05
|
| 53 |
+
weight_decay: 0.01
|
| 54 |
+
no_decay_params:
|
| 55 |
+
- bias
|
| 56 |
+
- LayerNorm.weight
|
| 57 |
+
prediction:
|
| 58 |
+
dataset:
|
| 59 |
+
_target_: src.data.esc_ed_dataset.ESCEDDataset.from_samples
|
| 60 |
+
transformer_model: allenai/longformer-large-4096
|
| 61 |
+
additional_special_tokens: []
|
| 62 |
+
candidates_separator: '*'
|
| 63 |
+
min_length: -1
|
| 64 |
+
max_length: -1
|
| 65 |
+
tokens_per_batch: 2048
|
| 66 |
+
max_batch_size: -1
|
| 67 |
+
section_size: 10000
|
| 68 |
+
prebatch: true
|
| 69 |
+
materialize: false
|
| 70 |
+
for_inference: true
|
| 71 |
+
training:
|
| 72 |
+
seed: 12
|
| 73 |
+
pl_trainer:
|
| 74 |
+
_target_: pytorch_lightning.Trainer
|
| 75 |
+
accumulate_grad_batches: 8
|
| 76 |
+
gradient_clip_val: 10.0
|
| 77 |
+
val_check_interval: 2048
|
| 78 |
+
max_steps: 100000
|
| 79 |
+
early_stopping_callback:
|
| 80 |
+
_target_: pytorch_lightning.callbacks.EarlyStopping
|
| 81 |
+
monitor: val_accuracy
|
| 82 |
+
mode: max
|
| 83 |
+
patience: 25
|
| 84 |
+
model_checkpoint_callback:
|
| 85 |
+
_target_: classy.pl_callbacks.best_checkpoint.ModelCheckpointWithBest
|
| 86 |
+
monitor: val_accuracy
|
| 87 |
+
mode: max
|
| 88 |
+
verbose: true
|
| 89 |
+
save_top_k: 3
|
| 90 |
+
dirpath: checkpoints
|
| 91 |
+
save_last: true
|
| 92 |
+
resume_from: null
|
| 93 |
+
logging:
|
| 94 |
+
wandb:
|
| 95 |
+
use_wandb: true
|
| 96 |
+
project_name: esc-ed
|
| 97 |
+
experiment_name: aida-longformer-large-*sep-gam-cand-shuffle
|
| 98 |
+
anonymous: null
|
| 99 |
+
run_id: 3v74woaz
|
| 100 |
+
task: qa
|
| 101 |
+
project_name: classy
|
| 102 |
+
exp_name: esc-aida-longformer-large-gam-cand-shuffle
|
| 103 |
+
exp_folder: /root/esc-ed/experiments/esc-aida-longformer-large-gam-cand-shuffle
|
| 104 |
+
transformer_model: allenai/longformer-large-4096
|
| 105 |
+
callbacks_monitor: val_accuracy
|
| 106 |
+
callbacks_mode: max
|
| 107 |
+
supported_tasks:
|
| 108 |
+
- qa
|
| 109 |
+
- sentence-pair
|
| 110 |
+
- sequence
|
| 111 |
+
- token
|
| 112 |
+
- generation
|
| 113 |
+
candidates_separator: '*'
|
experiments/extend-longformer-large/2021-10-22/09-11-39/.hydra/hydra.yaml
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
hydra:
|
| 2 |
+
run:
|
| 3 |
+
dir: ./experiments/${exp_name}/${now:%Y-%m-%d}/${now:%H-%M-%S}
|
| 4 |
+
sweep:
|
| 5 |
+
dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
|
| 6 |
+
subdir: ${hydra.job.num}
|
| 7 |
+
launcher:
|
| 8 |
+
_target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
|
| 9 |
+
sweeper:
|
| 10 |
+
_target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
|
| 11 |
+
max_batch_size: null
|
| 12 |
+
help:
|
| 13 |
+
app_name: ${hydra.job.name}
|
| 14 |
+
header: '${hydra.help.app_name} is powered by Hydra.
|
| 15 |
+
|
| 16 |
+
'
|
| 17 |
+
footer: 'Powered by Hydra (https://hydra.cc)
|
| 18 |
+
|
| 19 |
+
Use --hydra-help to view Hydra specific help
|
| 20 |
+
|
| 21 |
+
'
|
| 22 |
+
template: '${hydra.help.header}
|
| 23 |
+
|
| 24 |
+
== Configuration groups ==
|
| 25 |
+
|
| 26 |
+
Compose your configuration from those groups (group=option)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
$APP_CONFIG_GROUPS
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
== Config ==
|
| 33 |
+
|
| 34 |
+
Override anything in the config (foo.bar=value)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
$CONFIG
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
${hydra.help.footer}
|
| 41 |
+
|
| 42 |
+
'
|
| 43 |
+
hydra_help:
|
| 44 |
+
template: 'Hydra (${hydra.runtime.version})
|
| 45 |
+
|
| 46 |
+
See https://hydra.cc for more info.
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
== Flags ==
|
| 50 |
+
|
| 51 |
+
$FLAGS_HELP
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
== Configuration groups ==
|
| 55 |
+
|
| 56 |
+
Compose your configuration from those groups (For example, append hydra/job_logging=disabled
|
| 57 |
+
to command line)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
$HYDRA_CONFIG_GROUPS
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
Use ''--cfg hydra'' to Show the Hydra config.
|
| 64 |
+
|
| 65 |
+
'
|
| 66 |
+
hydra_help: ???
|
| 67 |
+
hydra_logging:
|
| 68 |
+
version: 1
|
| 69 |
+
formatters:
|
| 70 |
+
simple:
|
| 71 |
+
format: '[%(asctime)s][HYDRA] %(message)s'
|
| 72 |
+
handlers:
|
| 73 |
+
console:
|
| 74 |
+
class: logging.StreamHandler
|
| 75 |
+
formatter: simple
|
| 76 |
+
stream: ext://sys.stdout
|
| 77 |
+
root:
|
| 78 |
+
level: INFO
|
| 79 |
+
handlers:
|
| 80 |
+
- console
|
| 81 |
+
loggers:
|
| 82 |
+
logging_example:
|
| 83 |
+
level: DEBUG
|
| 84 |
+
disable_existing_loggers: false
|
| 85 |
+
job_logging:
|
| 86 |
+
version: 1
|
| 87 |
+
formatters:
|
| 88 |
+
simple:
|
| 89 |
+
format: '%(asctime)s - %(levelname)s - %(name)s - %(message)s'
|
| 90 |
+
handlers:
|
| 91 |
+
console:
|
| 92 |
+
class: logging.StreamHandler
|
| 93 |
+
formatter: simple
|
| 94 |
+
stream: ext://sys.stdout
|
| 95 |
+
file:
|
| 96 |
+
class: logging.FileHandler
|
| 97 |
+
formatter: simple
|
| 98 |
+
filename: ${hydra.job.name}.log
|
| 99 |
+
root:
|
| 100 |
+
level: WARN
|
| 101 |
+
handlers:
|
| 102 |
+
- console
|
| 103 |
+
- file
|
| 104 |
+
disable_existing_loggers: false
|
| 105 |
+
env: {}
|
| 106 |
+
searchpath: []
|
| 107 |
+
callbacks: {}
|
| 108 |
+
output_subdir: .hydra
|
| 109 |
+
overrides:
|
| 110 |
+
hydra: []
|
| 111 |
+
task:
|
| 112 |
+
- +profiles=aida-longformer-large-gam
|
| 113 |
+
- device=cuda
|
| 114 |
+
- device.gpus=[0]
|
| 115 |
+
- exp_name=esc-aida-longformer-large-gam-cand-shuffle
|
| 116 |
+
- data.datamodule.dataset_path=data/aida
|
| 117 |
+
- logging.wandb.use_wandb=True
|
| 118 |
+
- logging.wandb.project_name=esc-ed
|
| 119 |
+
- logging.wandb.experiment_name=aida-longformer-large-*sep-gam-cand-shuffle
|
| 120 |
+
- training.early_stopping_callback.patience=25
|
| 121 |
+
job:
|
| 122 |
+
name: train
|
| 123 |
+
override_dirname: +profiles=aida-longformer-large-gam,data.datamodule.dataset_path=data/aida,device.gpus=[0],device=cuda,exp_name=esc-aida-longformer-large-gam-cand-shuffle,logging.wandb.experiment_name=aida-longformer-large-*sep-gam-cand-shuffle,logging.wandb.project_name=esc-ed,logging.wandb.use_wandb=True,training.early_stopping_callback.patience=25
|
| 124 |
+
id: ???
|
| 125 |
+
num: ???
|
| 126 |
+
config_name: qa
|
| 127 |
+
env_set: {}
|
| 128 |
+
env_copy: []
|
| 129 |
+
config:
|
| 130 |
+
override_dirname:
|
| 131 |
+
kv_sep: '='
|
| 132 |
+
item_sep: ','
|
| 133 |
+
exclude_keys: []
|
| 134 |
+
runtime:
|
| 135 |
+
version: 1.1.1
|
| 136 |
+
cwd: /root/esc-ed
|
| 137 |
+
config_sources:
|
| 138 |
+
- path: hydra.conf
|
| 139 |
+
schema: pkg
|
| 140 |
+
provider: hydra
|
| 141 |
+
- path: classy.scripts.cli
|
| 142 |
+
schema: pkg
|
| 143 |
+
provider: main
|
| 144 |
+
- path: configurations
|
| 145 |
+
schema: pkg
|
| 146 |
+
provider: classy-searchpath-plugin
|
| 147 |
+
- path: /root/esc-ed/configurations
|
| 148 |
+
schema: file
|
| 149 |
+
provider: command-line
|
| 150 |
+
- path: ''
|
| 151 |
+
schema: structured
|
| 152 |
+
provider: schema
|
| 153 |
+
choices:
|
| 154 |
+
profiles: aida-longformer-large-gam
|
| 155 |
+
logging: default
|
| 156 |
+
training: default
|
| 157 |
+
prediction: qa
|
| 158 |
+
model: qa
|
| 159 |
+
device: cuda
|
| 160 |
+
data: qa
|
| 161 |
+
callbacks: empty
|
| 162 |
+
hydra/env: default
|
| 163 |
+
hydra/callbacks: null
|
| 164 |
+
hydra/job_logging: default
|
| 165 |
+
hydra/hydra_logging: default
|
| 166 |
+
hydra/hydra_help: default
|
| 167 |
+
hydra/help: default
|
| 168 |
+
hydra/sweeper: basic
|
| 169 |
+
hydra/launcher: basic
|
| 170 |
+
hydra/output: default
|
| 171 |
+
verbose:
|
| 172 |
+
- classy
|
experiments/extend-longformer-large/2021-10-22/09-11-39/.hydra/overrides.yaml
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
- +profiles=aida-longformer-large-gam
|
| 2 |
+
- device=cuda
|
| 3 |
+
- device.gpus=[0]
|
| 4 |
+
- exp_name=esc-aida-longformer-large-gam-cand-shuffle
|
| 5 |
+
- data.datamodule.dataset_path=data/aida
|
| 6 |
+
- logging.wandb.use_wandb=True
|
| 7 |
+
- logging.wandb.project_name=esc-ed
|
| 8 |
+
- logging.wandb.experiment_name=aida-longformer-large-*sep-gam-cand-shuffle
|
| 9 |
+
- training.early_stopping_callback.patience=25
|
experiments/extend-longformer-large/2021-10-22/09-11-39/checkpoints/best.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:daf5ca2a79a3cdcac3864db7a17b3a9fb084457c7167a7d8e656d8f8cb31cdd7
|
| 3 |
+
size 5203223845
|
requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
git+git://github.com/sapienzanlp/extend
|