Spaces:
Running on Zero
Running on Zero
markup bug fix
Browse files- =0.25.1 +20 -0
- =5.0 +0 -0
- __pycache__/app.cpython-313.pyc +0 -0
- app.py +24 -2
- pytest.ini +1 -1
- tests/test_bug_neaniasfix.py +79 -0
- test_markup.py → tests/test_markup.py +6 -0
=0.25.1
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Requirement already satisfied: transformers in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (4.45.2)
|
| 2 |
+
Requirement already satisfied: huggingface-hub in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (0.24.7)
|
| 3 |
+
Requirement already satisfied: filelock in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers) (3.16.1)
|
| 4 |
+
Requirement already satisfied: numpy>=1.17 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers) (1.26.4)
|
| 5 |
+
Requirement already satisfied: packaging>=20.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers) (24.0)
|
| 6 |
+
Requirement already satisfied: pyyaml>=5.1 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers) (6.0.1)
|
| 7 |
+
Requirement already satisfied: regex!=2019.12.17 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers) (2026.5.9)
|
| 8 |
+
Requirement already satisfied: requests in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers) (2.32.3)
|
| 9 |
+
Requirement already satisfied: safetensors>=0.4.1 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers) (0.4.5)
|
| 10 |
+
Requirement already satisfied: tokenizers<0.21,>=0.20 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers) (0.20.3)
|
| 11 |
+
Requirement already satisfied: tqdm>=4.27 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers) (4.66.5)
|
| 12 |
+
Requirement already satisfied: fsspec>=2023.5.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from huggingface-hub) (2024.6.1)
|
| 13 |
+
Requirement already satisfied: typing-extensions>=3.7.4.3 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from huggingface-hub) (4.11.0)
|
| 14 |
+
Requirement already satisfied: charset-normalizer<4,>=2 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from requests->transformers) (3.3.2)
|
| 15 |
+
Requirement already satisfied: idna<4,>=2.5 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from requests->transformers) (3.6)
|
| 16 |
+
Requirement already satisfied: urllib3<3,>=1.21.1 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from requests->transformers) (2.2.1)
|
| 17 |
+
Requirement already satisfied: certifi>=2017.4.17 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from requests->transformers) (2024.2.2)
|
| 18 |
+
|
| 19 |
+
[notice] A new release of pip is available: 24.3.1 -> 26.1.2
|
| 20 |
+
[notice] To update, run: pip3 install --upgrade pip
|
=5.0
ADDED
|
File without changes
|
__pycache__/app.cpython-313.pyc
CHANGED
|
Binary files a/__pycache__/app.cpython-313.pyc and b/__pycache__/app.cpython-313.pyc differ
|
|
|
app.py
CHANGED
|
@@ -151,10 +151,32 @@ def _mark_syllable_plain(syllable: str, label_id: int) -> str:
|
|
| 151 |
marker = "_" if label_id == 1 else "^"
|
| 152 |
chars = list(syllable)
|
| 153 |
|
|
|
|
|
|
|
| 154 |
for i in range(len(chars) - 1, -1, -1):
|
| 155 |
if vowel(chars[i]):
|
| 156 |
-
|
| 157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
return syllable + marker
|
| 159 |
|
| 160 |
|
|
|
|
| 151 |
marker = "_" if label_id == 1 else "^"
|
| 152 |
chars = list(syllable)
|
| 153 |
|
| 154 |
+
# Find the last vowel, skipping trailing non-letter characters (like punctuation)
|
| 155 |
+
vowel_idx = -1
|
| 156 |
for i in range(len(chars) - 1, -1, -1):
|
| 157 |
if vowel(chars[i]):
|
| 158 |
+
vowel_idx = i
|
| 159 |
+
break
|
| 160 |
+
# Skip markup characters and non-letter characters
|
| 161 |
+
ch = chars[i]
|
| 162 |
+
if ch not in "^_" and ("\u0370" <= ch <= "\u03ff" or "\u1f00" <= ch <= "\u1fff"):
|
| 163 |
+
# It's a Greek letter but not a vowel
|
| 164 |
+
continue
|
| 165 |
+
if not ("\u0370" <= ch <= "\u03ff" or "\u1f00" <= ch <= "\u1fff") and ch not in "^_":
|
| 166 |
+
# It's not a Greek letter or markup - it's punctuation or other, skip
|
| 167 |
+
continue
|
| 168 |
+
|
| 169 |
+
if vowel_idx >= 0:
|
| 170 |
+
# Found a vowel, insert marker after it
|
| 171 |
+
return "".join(chars[:vowel_idx + 1]) + marker + "".join(chars[vowel_idx + 1:])
|
| 172 |
+
|
| 173 |
+
# No vowel found, find the last Greek letter
|
| 174 |
+
for i in range(len(chars) - 1, -1, -1):
|
| 175 |
+
if "\u0370" <= chars[i] <= "\u03ff" or "\u1f00" <= chars[i] <= "\u1fff":
|
| 176 |
+
# Insert marker after the last Greek letter
|
| 177 |
+
return "".join(chars[:i + 1]) + marker + "".join(chars[i + 1:])
|
| 178 |
+
|
| 179 |
+
# No Greek letters found, append marker at the end
|
| 180 |
return syllable + marker
|
| 181 |
|
| 182 |
|
pytest.ini
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
[pytest]
|
| 2 |
-
testpaths =
|
| 3 |
python_files = test_*.py
|
| 4 |
python_classes = Test*
|
| 5 |
python_functions = test_*
|
|
|
|
| 1 |
[pytest]
|
| 2 |
+
testpaths = tests
|
| 3 |
python_files = test_*.py
|
| 4 |
python_classes = Test*
|
| 5 |
python_functions = test_*
|
tests/test_bug_neaniasfix.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Test for the νεανίας bug fix.
|
| 3 |
+
|
| 4 |
+
The bug is that the final alpha in νεανίας should be marked as long,
|
| 5 |
+
but it wasn't being marked correctly.
|
| 6 |
+
|
| 7 |
+
Additionally, punctuation handling was fixed so that marks are not placed
|
| 8 |
+
after punctuation characters.
|
| 9 |
+
|
| 10 |
+
Expected: νεα_νί^α_ς
|
| 11 |
+
Current (buggy): νεα_νί^ας
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import pytest
|
| 15 |
+
import sys
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
|
| 18 |
+
# Add parent directory to path to import app module
|
| 19 |
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
| 20 |
+
|
| 21 |
+
from app import _render_plain_line_per_word
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def test_neaniaste_final_alpha_marked():
|
| 25 |
+
"""Test that the final alpha in νεανίας is correctly marked as long."""
|
| 26 |
+
# Use the mini model since it works correctly
|
| 27 |
+
model_id = "Ericu950/macronizer_mini"
|
| 28 |
+
|
| 29 |
+
input_text = "νεανίας"
|
| 30 |
+
output = _render_plain_line_per_word(input_text, model_id)
|
| 31 |
+
|
| 32 |
+
# The expected output should have the long mark on the final alpha before sigma
|
| 33 |
+
expected = "νεα_νί^α_ς"
|
| 34 |
+
|
| 35 |
+
assert output == expected, (
|
| 36 |
+
f"Output mismatch for '{input_text}'\n"
|
| 37 |
+
f"Expected: {repr(expected)}\n"
|
| 38 |
+
f"Got: {repr(output)}"
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def test_neaniaste_in_full_sentence():
|
| 43 |
+
"""Test νεανίας in the full sentence provided by user."""
|
| 44 |
+
model_id = "Ericu950/macronizer_mini"
|
| 45 |
+
|
| 46 |
+
input_text = "νεανίας ἀάατός ἐστιν καὶ καλός. τὰ παῖδες τὰ καλά"
|
| 47 |
+
output = _render_plain_line_per_word(input_text, model_id)
|
| 48 |
+
|
| 49 |
+
# The first word should be νεα_νί^α_ς
|
| 50 |
+
first_word = output.split()[0]
|
| 51 |
+
expected_first = "νεα_νί^α_ς"
|
| 52 |
+
|
| 53 |
+
assert first_word == expected_first, (
|
| 54 |
+
f"First word mismatch\n"
|
| 55 |
+
f"Expected: {repr(expected_first)}\n"
|
| 56 |
+
f"Got: {repr(first_word)}"
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def test_punctuation_not_marked():
|
| 61 |
+
"""Test that punctuation is preserved and marks are placed before punctuation."""
|
| 62 |
+
model_id = "Ericu950/macronizer_mini"
|
| 63 |
+
|
| 64 |
+
# Word with punctuation
|
| 65 |
+
input_text = "καλός."
|
| 66 |
+
output = _render_plain_line_per_word(input_text, model_id)
|
| 67 |
+
|
| 68 |
+
# The period should be preserved
|
| 69 |
+
assert "." in output, f"Period not preserved in output: {repr(output)}"
|
| 70 |
+
|
| 71 |
+
# The mark should be placed before the period, not forming "σ._"
|
| 72 |
+
# The output should have the period at the very end
|
| 73 |
+
assert output.endswith("."), f"Period should be at the end: {repr(output)}"
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
if __name__ == "__main__":
|
| 78 |
+
pytest.main([__file__, "-v", "-s"])
|
| 79 |
+
|
test_markup.py → tests/test_markup.py
RENAMED
|
@@ -4,6 +4,12 @@ Output should equal: input.replace("^", "").replace("_", "")
|
|
| 4 |
"""
|
| 5 |
|
| 6 |
import pytest
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
from app import _render_plain_line_per_word, DEFAULT_MODEL_ID
|
| 8 |
|
| 9 |
|
|
|
|
| 4 |
"""
|
| 5 |
|
| 6 |
import pytest
|
| 7 |
+
import sys
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
# Add parent directory to path to import app module
|
| 11 |
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
| 12 |
+
|
| 13 |
from app import _render_plain_line_per_word, DEFAULT_MODEL_ID
|
| 14 |
|
| 15 |
|