Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -451,6 +451,7 @@ with tabs[1]:
|
|
| 451 |
# Diplomatic Edition Tab
|
| 452 |
# -------------------------------
|
| 453 |
|
|
|
|
| 454 |
# Function to remove diacritics from text
|
| 455 |
def remove_diacritics(text):
|
| 456 |
"""
|
|
@@ -466,34 +467,49 @@ def remove_diacritics(text):
|
|
| 466 |
def render_diplomatic(text_elem):
|
| 467 |
"""
|
| 468 |
Transforms the XML Text element into uppercase Greek text without diacritics and spaces,
|
| 469 |
-
with line breaks at <lb> tags.
|
| 470 |
"""
|
| 471 |
lines = []
|
| 472 |
current_line = []
|
| 473 |
-
|
| 474 |
-
|
|
|
|
| 475 |
if elem.tag == 'lb':
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 492 |
line_text = ''.join(current_line).strip()
|
| 493 |
if line_text:
|
|
|
|
| 494 |
line_text = remove_diacritics(line_text).replace(' ', '').upper()
|
| 495 |
lines.append(line_text)
|
| 496 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 497 |
# Join all lines with newline characters
|
| 498 |
return '\n'.join(lines)
|
| 499 |
|
|
@@ -518,7 +534,6 @@ with tabs[2]:
|
|
| 518 |
else:
|
| 519 |
st.warning("No text found for the selected inscription.")
|
| 520 |
|
| 521 |
-
|
| 522 |
# -------------------------------
|
| 523 |
# Editor Edition Tab
|
| 524 |
# -------------------------------
|
|
|
|
| 451 |
# Diplomatic Edition Tab
|
| 452 |
# -------------------------------
|
| 453 |
|
| 454 |
+
# Function to remove diacritics from text
|
| 455 |
# Function to remove diacritics from text
|
| 456 |
def remove_diacritics(text):
|
| 457 |
"""
|
|
|
|
| 467 |
def render_diplomatic(text_elem):
|
| 468 |
"""
|
| 469 |
Transforms the XML Text element into uppercase Greek text without diacritics and spaces,
|
| 470 |
+
with line breaks at <lb> tags. Handles <expan> tags by including only the <abbr> text.
|
| 471 |
"""
|
| 472 |
lines = []
|
| 473 |
current_line = []
|
| 474 |
+
|
| 475 |
+
# Define a helper function to process elements recursively
|
| 476 |
+
def process_element(elem):
|
| 477 |
if elem.tag == 'lb':
|
| 478 |
+
finalize_current_line()
|
| 479 |
+
elif elem.tag == 'expan':
|
| 480 |
+
abbr_elem = elem.find('abbr')
|
| 481 |
+
if abbr_elem is not None and abbr_elem.text:
|
| 482 |
+
current_line.append(abbr_elem.text)
|
| 483 |
+
# Do not process <ex> or any other children within <expan>
|
| 484 |
+
else:
|
| 485 |
+
if elem.text:
|
| 486 |
+
current_line.append(elem.text)
|
| 487 |
+
# Recursively process child elements
|
| 488 |
+
for child in elem:
|
| 489 |
+
process_element(child)
|
| 490 |
+
if elem.tail:
|
| 491 |
+
current_line.append(elem.tail)
|
| 492 |
+
|
| 493 |
+
def finalize_current_line():
|
| 494 |
+
"""
|
| 495 |
+
Finalizes the current line by removing diacritics, spaces, converting to uppercase,
|
| 496 |
+
and appending it to the lines list.
|
| 497 |
+
"""
|
| 498 |
+
nonlocal current_line
|
| 499 |
line_text = ''.join(current_line).strip()
|
| 500 |
if line_text:
|
| 501 |
+
# Remove diacritics and spaces, then convert to uppercase
|
| 502 |
line_text = remove_diacritics(line_text).replace(' ', '').upper()
|
| 503 |
lines.append(line_text)
|
| 504 |
+
current_line = []
|
| 505 |
+
|
| 506 |
+
# Start processing from the root text element
|
| 507 |
+
process_element(text_elem)
|
| 508 |
+
|
| 509 |
+
# Finalize the last line if any
|
| 510 |
+
if current_line:
|
| 511 |
+
finalize_current_line()
|
| 512 |
+
|
| 513 |
# Join all lines with newline characters
|
| 514 |
return '\n'.join(lines)
|
| 515 |
|
|
|
|
| 534 |
else:
|
| 535 |
st.warning("No text found for the selected inscription.")
|
| 536 |
|
|
|
|
| 537 |
# -------------------------------
|
| 538 |
# Editor Edition Tab
|
| 539 |
# -------------------------------
|