| | --- |
| | language: |
| | - en |
| | - de |
| | - fr |
| | - it |
| | - nl |
| | - multilingual |
| | tags: |
| | - punctuation prediction |
| | - punctuation |
| | datasets: wmt/europarl |
| | license: mit |
| | widget: |
| | - text: "Ondanks dat het nu bijna voorjaar is hebben we nog steds best koude dagen" |
| | example_title: "Dutch" |
| | - text: "Ho sentito che ti sei laureata il che mi fa molto piacere" |
| | example_title: "Italian" |
| | - text: "Tous les matins vers quatre heures mon père ouvrait la porte de ma chambre" |
| | example_title: "French" |
| | - text: "Ist das eine Frage Frau Müller" |
| | example_title: "German" |
| | - text: "My name is Clara and I live in Berkeley California" |
| | example_title: "English" |
| | metrics: |
| | - f1 |
| | --- |
| | |
| | # Work in progress |
| |
|
| | ## Classification report over all languages |
| | ``` |
| | precision recall f1-score support |
| | |
| | 0 0.99 0.99 0.99 47903344 |
| | . 0.94 0.95 0.95 2798780 |
| | , 0.85 0.84 0.85 3451618 |
| | ? 0.88 0.85 0.87 88876 |
| | - 0.61 0.32 0.42 157863 |
| | : 0.72 0.52 0.60 103789 |
| | |
| | accuracy 0.98 54504270 |
| | macro avg 0.83 0.75 0.78 54504270 |
| | weighted avg 0.98 0.98 0.98 54504270 |
| | ``` |
| |
|
| |
|
| |
|
| |
|
| | ## How to cite us |
| |
|
| | ``` |
| | @article{guhr-EtAl:2021:fullstop, |
| | title={FullStop: Multilingual Deep Models for Punctuation Prediction}, |
| | author = {Guhr, Oliver and Schumann, Anne-Kathrin and Bahrmann, Frank and Böhme, Hans Joachim}, |
| | booktitle = {Proceedings of the Swiss Text Analytics Conference 2021}, |
| | month = {June}, |
| | year = {2021}, |
| | address = {Winterthur, Switzerland}, |
| | publisher = {CEUR Workshop Proceedings}, |
| | url = {http://ceur-ws.org/Vol-2957/sepp_paper4.pdf} |
| | } |
| | |
| | ``` |
| |
|
| | ``` |
| | @misc{https://doi.org/10.48550/arxiv.2301.03319, |
| | doi = {10.48550/ARXIV.2301.03319}, |
| | url = {https://arxiv.org/abs/2301.03319}, |
| | author = {Vandeghinste, Vincent and Guhr, Oliver}, |
| | keywords = {Computation and Language (cs.CL), Artificial Intelligence (cs.AI), FOS: Computer and information sciences, FOS: Computer and information sciences, I.2.7}, |
| | title = {FullStop:Punctuation and Segmentation Prediction for Dutch with Transformers}, |
| | publisher = {arXiv}, |
| | year = {2023}, |
| | copyright = {Creative Commons Attribution Share Alike 4.0 International} |
| | } |
| | |
| | ``` |
| |
|