Spaces:
Running
Running
Format plain output with in-syllable quantity markers
Browse files
app.py
CHANGED
|
@@ -7,7 +7,7 @@ import torch
|
|
| 7 |
import torch.nn.functional as F
|
| 8 |
from transformers import AutoModelForTokenClassification, AutoTokenizer
|
| 9 |
|
| 10 |
-
from grc_utils import lower_grc, normalize_word, heavy
|
| 11 |
|
| 12 |
from syllabify import syllabify_joined
|
| 13 |
from preprocess import process_word
|
|
@@ -122,6 +122,20 @@ def _syllable_chip(syllable: str, label_id: int) -> str:
|
|
| 122 |
return f'<span class="chip clear">{escaped}</span>'
|
| 123 |
|
| 124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
def render_results(text: str, model_label: str):
|
| 126 |
lines = [line.strip() for line in text.splitlines() if line.strip()]
|
| 127 |
if not lines:
|
|
@@ -135,6 +149,7 @@ def render_results(text: str, model_label: str):
|
|
| 135 |
for idx, line in enumerate(lines, start=1):
|
| 136 |
aligned = classify_line(line, model_id)
|
| 137 |
chips = "".join(_syllable_chip(syl, label) for syl, label in aligned)
|
|
|
|
| 138 |
|
| 139 |
cards.append(
|
| 140 |
f"""
|
|
@@ -147,9 +162,7 @@ def render_results(text: str, model_label: str):
|
|
| 147 |
)
|
| 148 |
|
| 149 |
export_lines.append(f"Line {idx}: {line}")
|
| 150 |
-
|
| 151 |
-
tag = "long" if label == 1 else "short" if label == 2 else "clear"
|
| 152 |
-
export_lines.append(f" - {syl}: {tag}")
|
| 153 |
|
| 154 |
html_result = (
|
| 155 |
"<div class='legend'><span class='dot long'></span>Long"
|
|
@@ -421,7 +434,7 @@ body.dark-mode {
|
|
| 421 |
"""
|
| 422 |
|
| 423 |
|
| 424 |
-
with gr.Blocks(
|
| 425 |
gr.HTML("""
|
| 426 |
<script>
|
| 427 |
// Detect system dark mode preference and apply on load
|
|
@@ -499,4 +512,4 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
|
|
| 499 |
|
| 500 |
|
| 501 |
if __name__ == "__main__":
|
| 502 |
-
|
|
|
|
| 7 |
import torch.nn.functional as F
|
| 8 |
from transformers import AutoModelForTokenClassification, AutoTokenizer
|
| 9 |
|
| 10 |
+
from grc_utils import lower_grc, normalize_word, heavy, vowel
|
| 11 |
|
| 12 |
from syllabify import syllabify_joined
|
| 13 |
from preprocess import process_word
|
|
|
|
| 122 |
return f'<span class="chip clear">{escaped}</span>'
|
| 123 |
|
| 124 |
|
| 125 |
+
def _mark_syllable_plain(syllable: str, label_id: int) -> str:
|
| 126 |
+
if label_id not in (1, 2):
|
| 127 |
+
return syllable
|
| 128 |
+
|
| 129 |
+
marker = "_" if label_id == 1 else "^"
|
| 130 |
+
chars = list(syllable)
|
| 131 |
+
|
| 132 |
+
for i in range(len(chars) - 1, -1, -1):
|
| 133 |
+
if vowel(chars[i]):
|
| 134 |
+
return "".join(chars[: i + 1]) + marker + "".join(chars[i + 1 :])
|
| 135 |
+
|
| 136 |
+
return syllable + marker
|
| 137 |
+
|
| 138 |
+
|
| 139 |
def render_results(text: str, model_label: str):
|
| 140 |
lines = [line.strip() for line in text.splitlines() if line.strip()]
|
| 141 |
if not lines:
|
|
|
|
| 149 |
for idx, line in enumerate(lines, start=1):
|
| 150 |
aligned = classify_line(line, model_id)
|
| 151 |
chips = "".join(_syllable_chip(syl, label) for syl, label in aligned)
|
| 152 |
+
plain_marked = [_mark_syllable_plain(syl, label) for syl, label in aligned]
|
| 153 |
|
| 154 |
cards.append(
|
| 155 |
f"""
|
|
|
|
| 162 |
)
|
| 163 |
|
| 164 |
export_lines.append(f"Line {idx}: {line}")
|
| 165 |
+
export_lines.append(" " + " ".join(plain_marked) if plain_marked else " (no syllables found)")
|
|
|
|
|
|
|
| 166 |
|
| 167 |
html_result = (
|
| 168 |
"<div class='legend'><span class='dot long'></span>Long"
|
|
|
|
| 434 |
"""
|
| 435 |
|
| 436 |
|
| 437 |
+
with gr.Blocks() as demo:
|
| 438 |
gr.HTML("""
|
| 439 |
<script>
|
| 440 |
// Detect system dark mode preference and apply on load
|
|
|
|
| 512 |
|
| 513 |
|
| 514 |
if __name__ == "__main__":
|
| 515 |
+
demo.launch(css=CSS)
|