Spaces:

bestroi
/

InscriptaNet

Sleeping

App Files Files Community

bestroi commited on Jan 7, 2025

Commit

c23a4ee

verified ·

1 Parent(s): 0fa0faa

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -6

app.py CHANGED Viewed

@@ -451,7 +451,10 @@ with tabs[1]:
 # Diplomatic Edition Tab
 # -------------------------------
-# Function to remove diacritics from text
 # Function to remove diacritics from text
 def remove_diacritics(text):
     """
@@ -476,11 +479,16 @@ def render_diplomatic(text_elem):
     def process_element(elem):
         if elem.tag == 'lb':
             finalize_current_line()
         elif elem.tag == 'expan':
             abbr_elem = elem.find('abbr')
             if abbr_elem is not None and abbr_elem.text:
                 current_line.append(abbr_elem.text)
             # Do not process <ex> or any other children within <expan>
         else:
             if elem.text:
                 current_line.append(elem.text)
@@ -513,7 +521,8 @@ def render_diplomatic(text_elem):
     # Join all lines with newline characters
     return '\n'.join(lines)
-# Assuming 'tabs' and 'df' are already defined in your Streamlit app
 with tabs[2]:
     st.subheader("Diplomatic Edition")
@@ -523,10 +532,14 @@ with tabs[2]:
     selected_inscription = df[df['Number'] == selected_inscription_num].iloc[0]
     # Parse the selected inscription's XML to get the Text element
-    tree = ET.ElementTree(ET.fromstring(inscriptions_content))
-    root = tree.getroot()
-    inscription_elem = root.find(f".//inscription[@n='{selected_inscription_num}']")
-    text_element = inscription_elem.find("Text") if inscription_elem is not None else None
     if text_element is not None:
         diplomatic_text = render_diplomatic(text_element)
@@ -534,6 +547,7 @@ with tabs[2]:
     else:
         st.warning("No text found for the selected inscription.")
 # -------------------------------
 # Editor Edition Tab
 # -------------------------------

 # Diplomatic Edition Tab
 # -------------------------------
+import streamlit as st
+import xml.etree.ElementTree as ET
+import unicodedata
 # Function to remove diacritics from text
 def remove_diacritics(text):
     """
     def process_element(elem):
         if elem.tag == 'lb':
             finalize_current_line()
+            if elem.tail:
+                # After <lb>, the tail text is the start of the new line
+                current_line.append(elem.tail)
         elif elem.tag == 'expan':
             abbr_elem = elem.find('abbr')
             if abbr_elem is not None and abbr_elem.text:
                 current_line.append(abbr_elem.text)
             # Do not process <ex> or any other children within <expan>
+            if elem.tail:
+                current_line.append(elem.tail)
         else:
             if elem.text:
                 current_line.append(elem.text)
     # Join all lines with newline characters
     return '\n'.join(lines)
+# Streamlit Application
+# Ensure that 'tabs' and 'df' are properly defined in your Streamlit app context
 with tabs[2]:
     st.subheader("Diplomatic Edition")
     selected_inscription = df[df['Number'] == selected_inscription_num].iloc[0]
     # Parse the selected inscription's XML to get the Text element
+    try:
+        tree = ET.ElementTree(ET.fromstring(inscriptions_content))
+        root = tree.getroot()
+        inscription_elem = root.find(f".//inscription[@n='{selected_inscription_num}']")
+        text_element = inscription_elem.find("Text") if inscription_elem is not None else None
+    except ET.ParseError:
+        st.error("Failed to parse the XML content. Please check the XML structure.")
+        text_element = None
     if text_element is not None:
         diplomatic_text = render_diplomatic(text_element)
     else:
         st.warning("No text found for the selected inscription.")
 # -------------------------------
 # Editor Edition Tab
 # -------------------------------