Cleanup
Browse files
app.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
from functools import partial
|
| 2 |
from pathlib import Path
|
| 3 |
-
|
| 4 |
import gradio as gr
|
|
|
|
| 5 |
from joeynmt.datasets import build_dataset
|
| 6 |
from joeynmt.helpers import (
|
| 7 |
load_checkpoint,
|
|
@@ -105,7 +106,7 @@ title = """
|
|
| 105 |
[<a href="https://github.com/sinaahmadi/ScriptNormalization" style="color:blue;">GitHub</a>]
|
| 106 |
[<a href="https://s3.amazonaws.com/pf-user-files-01/u-59356/uploads/2023-06-04/rw32pwp/ACL2023.mp4" style="color:blue;">Presentation</a>]
|
| 107 |
</h3>
|
| 108 |
-
|
| 109 |
|
| 110 |
description = """
|
| 111 |
<ul>
|
|
@@ -115,11 +116,8 @@ description = """
|
|
| 115 |
</ul>
|
| 116 |
|
| 117 |
<p style="font-size:120%;">What do all these sentences have in common? Being greeted in Arabic with "<em>mar7aba</em>" written in the Latin script, then asked how you are ("<em>هاو ئار یوو؟</em>") in English using the Perso-Arabic script of Kurdish and then, welcomed to this demo in French ("<em>Μπιάνβενου α σετ ντεμό!</em>") written in Greek script. All these sentences are written in an <strong>unconventional</strong> script.</p>
|
| 118 |
-
|
| 119 |
<p style="font-size:120%;">Although you may find these sentences risible, unconventional writing is a common practice among millions of speakers in bilingual communities. In our paper entitled "<a href="https://sinaahmadi.github.io/docs/articles/ahmadi2023acl.pdf" target="_blank"><strong>Script Normalization for Unconventional Writing of Under-Resourced Languages in Bilingual Communities</strong></a>", we shed light on this problem and propose an approach to normalize noisy text written in unconventional writing.</p>
|
| 120 |
-
|
| 121 |
<p style="font-size:120%;">This demo deploys a few models that are trained for <strong>the normalization of unconventional writing</strong>. Please note that this tool is not a spell-checker and cannot correct errors beyond character normalization. For better performance, you can apply hard-coded rules on the input and then pass it to the models, hence a hybrid system.</p>
|
| 122 |
-
|
| 123 |
<p style="font-size:120%;">For more information, you can check out the project on GitHub too: <a href="https://github.com/sinaahmadi/ScriptNormalization" target="_blank"><strong>https://github.com/sinaahmadi/ScriptNormalization</strong></a></p>
|
| 124 |
"""
|
| 125 |
|
|
@@ -142,14 +140,6 @@ examples = [
|
|
| 142 |
]
|
| 143 |
|
| 144 |
|
| 145 |
-
article = """
|
| 146 |
-
<div style="text-align: justify; max-width: 1200px; margin: 20px auto;">
|
| 147 |
-
<h3 style="font-weight: 450; font-size: 1rem; margin: 0rem">
|
| 148 |
-
<b>Created and deployed by Sina Ahmadi <a href="https://sinaahmadi.github.io/">(https://sinaahmadi.github.io/)</a>.
|
| 149 |
-
</h3>
|
| 150 |
-
</div>
|
| 151 |
-
"""
|
| 152 |
-
|
| 153 |
demo = gr.Interface(
|
| 154 |
title=title,
|
| 155 |
description=description,
|
|
@@ -160,7 +150,6 @@ demo = gr.Interface(
|
|
| 160 |
],
|
| 161 |
outputs=gr.Textbox(label="Normalized Text \U0001F642"),
|
| 162 |
examples=examples,
|
| 163 |
-
article=article,
|
| 164 |
examples_per_page=20,
|
| 165 |
)
|
| 166 |
|
|
|
|
| 1 |
from functools import partial
|
| 2 |
from pathlib import Path
|
| 3 |
+
|
| 4 |
import gradio as gr
|
| 5 |
+
import spaces
|
| 6 |
from joeynmt.datasets import build_dataset
|
| 7 |
from joeynmt.helpers import (
|
| 8 |
load_checkpoint,
|
|
|
|
| 106 |
[<a href="https://github.com/sinaahmadi/ScriptNormalization" style="color:blue;">GitHub</a>]
|
| 107 |
[<a href="https://s3.amazonaws.com/pf-user-files-01/u-59356/uploads/2023-06-04/rw32pwp/ACL2023.mp4" style="color:blue;">Presentation</a>]
|
| 108 |
</h3>
|
| 109 |
+
"""
|
| 110 |
|
| 111 |
description = """
|
| 112 |
<ul>
|
|
|
|
| 116 |
</ul>
|
| 117 |
|
| 118 |
<p style="font-size:120%;">What do all these sentences have in common? Being greeted in Arabic with "<em>mar7aba</em>" written in the Latin script, then asked how you are ("<em>هاو ئار یوو؟</em>") in English using the Perso-Arabic script of Kurdish and then, welcomed to this demo in French ("<em>Μπιάνβενου α σετ ντεμό!</em>") written in Greek script. All these sentences are written in an <strong>unconventional</strong> script.</p>
|
|
|
|
| 119 |
<p style="font-size:120%;">Although you may find these sentences risible, unconventional writing is a common practice among millions of speakers in bilingual communities. In our paper entitled "<a href="https://sinaahmadi.github.io/docs/articles/ahmadi2023acl.pdf" target="_blank"><strong>Script Normalization for Unconventional Writing of Under-Resourced Languages in Bilingual Communities</strong></a>", we shed light on this problem and propose an approach to normalize noisy text written in unconventional writing.</p>
|
|
|
|
| 120 |
<p style="font-size:120%;">This demo deploys a few models that are trained for <strong>the normalization of unconventional writing</strong>. Please note that this tool is not a spell-checker and cannot correct errors beyond character normalization. For better performance, you can apply hard-coded rules on the input and then pass it to the models, hence a hybrid system.</p>
|
|
|
|
| 121 |
<p style="font-size:120%;">For more information, you can check out the project on GitHub too: <a href="https://github.com/sinaahmadi/ScriptNormalization" target="_blank"><strong>https://github.com/sinaahmadi/ScriptNormalization</strong></a></p>
|
| 122 |
"""
|
| 123 |
|
|
|
|
| 140 |
]
|
| 141 |
|
| 142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
demo = gr.Interface(
|
| 144 |
title=title,
|
| 145 |
description=description,
|
|
|
|
| 150 |
],
|
| 151 |
outputs=gr.Textbox(label="Normalized Text \U0001F642"),
|
| 152 |
examples=examples,
|
|
|
|
| 153 |
examples_per_page=20,
|
| 154 |
)
|
| 155 |
|