Update README.md
Browse files
README.md
CHANGED
|
@@ -27,4 +27,27 @@ IceBERT was trained with fairseq using the RoBERTa-base architecture. The traini
|
|
| 27 |
| Open Icelandic e-books (Rafbókavefurinn) | 14 MB | 2.6M |
|
| 28 |
| Data from the medical library of Landspitali | 33 MB | 5.2M |
|
| 29 |
| Student theses from Icelandic universities (Skemman) | 2.2 GB | 367M |
|
| 30 |
-
| Total | 15.8 GB | 2,664M |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
| Open Icelandic e-books (Rafbókavefurinn) | 14 MB | 2.6M |
|
| 28 |
| Data from the medical library of Landspitali | 33 MB | 5.2M |
|
| 29 |
| Student theses from Icelandic universities (Skemman) | 2.2 GB | 367M |
|
| 30 |
+
| Total | 15.8 GB | 2,664M |
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
If you find this model useful, please cite
|
| 34 |
+
|
| 35 |
+
```
|
| 36 |
+
@inproceedings{snaebjarnarson-etal-2022-warm,
|
| 37 |
+
title = "A Warm Start and a Clean Crawled Corpus - A Recipe for Good Language Models",
|
| 38 |
+
author = "Sn{\ae}bjarnarson, V{\'e}steinn and
|
| 39 |
+
S{\'\i}monarson, Haukur Barri and
|
| 40 |
+
Ragnarsson, P{\'e}tur Orri and
|
| 41 |
+
Ing{\'o}lfsd{\'o}ttir, Svanhv{\'\i}t Lilja and
|
| 42 |
+
J{\'o}nsson, Haukur and
|
| 43 |
+
Thorsteinsson, Vilhjalmur and
|
| 44 |
+
Einarsson, Hafsteinn",
|
| 45 |
+
booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
|
| 46 |
+
month = jun,
|
| 47 |
+
year = "2022",
|
| 48 |
+
address = "Marseille, France",
|
| 49 |
+
publisher = "European Language Resources Association",
|
| 50 |
+
url = "https://aclanthology.org/2022.lrec-1.464",
|
| 51 |
+
pages = "4356--4366",
|
| 52 |
+
}
|
| 53 |
+
```
|