Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import xml.etree.ElementTree as ET | |
| import pandas as pd | |
| from io import StringIO | |
| import folium | |
| from streamlit_folium import st_folium | |
| import unicodedata | |
| import networkx as nx | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| # ------------------------------- | |
| # Authority Lists as XML Strings | |
| # ------------------------------- | |
| materials_xml = """<?xml version="1.0" encoding="UTF-8"?> | |
| <materials> | |
| <material id="LAPIS"> | |
| <name>Lapis</name> | |
| <name_en>Stone</name_en> | |
| <description>Stone used as a durable medium for inscriptions and engravings.</description> | |
| </material> | |
| <material id="ARGENTUM"> | |
| <name>Argentum</name> | |
| <name_en>Silver</name_en> | |
| <description>Silver used in inscriptions, often for its lustrous appearance and value.</description> | |
| </material> | |
| <material id="PLUMBUM"> | |
| <name>Plumbum</name> | |
| <name_en>Lead</name_en> | |
| <description>Lead utilized in inscriptions, valued for its malleability and ease of engraving.</description> | |
| </material> | |
| <material id="OPUS_FIGLINAE"> | |
| <name>Opus Figlinae</name> | |
| <name_en>Pottery</name_en> | |
| <description>Pottery used as a medium for inscriptions, typically in the form of ceramic artifacts.</description> | |
| </material> | |
| </materials> | |
| """ | |
| places_xml = """<?xml version="1.0" encoding="UTF-8"?> | |
| <places> | |
| <place id="VIZE"> | |
| <name>Vize</name> | |
| <geonamesLink>https://www.geonames.org/738154/vize.html</geonamesLink> | |
| <pleiadesLink>https://pleiades.stoa.org/places/511190</pleiadesLink> | |
| <latitude>40.6545</latitude> | |
| <longitude>28.4078</longitude> | |
| <description>Ancient city located in modern-day Turkey.</description> | |
| </place> | |
| <place id="PHILIPPI"> | |
| <name>Philippi</name> | |
| <geonamesLink>https://www.geonames.org/734652/filippoi-philippi.html</geonamesLink> | |
| <pleiadesLink>https://pleiades.stoa.org/places/501482</pleiadesLink> | |
| <latitude>40.5044</latitude> | |
| <longitude>24.9722</longitude> | |
| <description>Ancient city in Macedonia, founded by Philip II of Macedon.</description> | |
| </place> | |
| <place id="AUGUSTA_TRAIANA"> | |
| <name>Augusta Traiana</name> | |
| <geonamesLink>https://www.geonames.org/maps/google_42.4333_25.65.html</geonamesLink> | |
| <pleiadesLink>https://pleiades.stoa.org/places/216731</pleiadesLink> | |
| <latitude>42.4259</latitude> | |
| <longitude>25.6272</longitude> | |
| <description>Ancient Roman city, present-day Stara Zagora in Bulgaria.</description> | |
| </place> | |
| <place id="DYRRACHIUM"> | |
| <name>Dyrrachium</name> | |
| <geonamesLink>https://www.geonames.org/3185728/durres.html</geonamesLink> | |
| <pleiadesLink>https://pleiades.stoa.org/places/481818</pleiadesLink> | |
| <latitude>41.3231</latitude> | |
| <longitude>19.4417</longitude> | |
| <description>Ancient city on the Adriatic coast, present-day Durrës in Albania.</description> | |
| </place> | |
| <place id="ANTISARA"> | |
| <name>Antisara</name> | |
| <geonamesLink>https://www.geonames.org/736079/akra-kalamitsa.html</geonamesLink> | |
| <pleiadesLink>https://pleiades.stoa.org/places/501351</pleiadesLink> | |
| <latitude>39.5000</latitude> | |
| <longitude>20.0000</longitude> | |
| <description>Ancient settlement, exact modern location TBD.</description> | |
| </place> | |
| <place id="MACEDONIA"> | |
| <name>Macedonia</name> | |
| <geonamesLink>-</geonamesLink> | |
| <pleiadesLink>-</pleiadesLink> | |
| <latitude>40.0000</latitude> | |
| <longitude>22.0000</longitude> | |
| <description>Historical region in Southeast Europe, encompassing parts of modern Greece, North Macedonia, and Bulgaria.</description> | |
| </place> | |
| </places> | |
| """ | |
| titles_xml = """<?xml version="1.0" encoding="UTF-8"?> | |
| <emperorTitles> | |
| <title id="IMPERATOR"> | |
| <name>Imperator</name> | |
| <name_gr>Αυτοκράτορας</name_gr> | |
| <abbreviation>Imp.</abbreviation> | |
| <description>A title granted to a victorious general, later adopted as a formal title by Roman emperors.</description> | |
| </title> | |
| <title id="CAESAR"> | |
| <name>Caesar</name> | |
| <name_gr>Καῖσαρ</name_gr> | |
| <abbreviation>Caes.</abbreviation> | |
| <description>A title used by Roman emperors, originally the family name of Julius Caesar.</description> | |
| </title> | |
| <title id="AUGUSTUS"> | |
| <name>Augustus</name> | |
| <name_gr>-</name_gr> | |
| <abbreviation>Aug.</abbreviation> | |
| <description>The first Roman emperor's title, signifying revered or majestic status.</description> | |
| </title> | |
| </emperorTitles> | |
| """ | |
| # ------------------------------- | |
| # Parse Authority Lists | |
| # ------------------------------- | |
| def parse_materials(xml_string): | |
| materials = {} | |
| root = ET.fromstring(xml_string) | |
| for material in root.findall('material'): | |
| material_id = material.get('id') | |
| materials[material_id] = { | |
| 'Name': material.find('name').text, | |
| 'Name_EN': material.find('name_en').text, | |
| 'Description': material.find('description').text | |
| } | |
| return materials | |
| def parse_places(xml_string): | |
| places = {} | |
| root = ET.fromstring(xml_string) | |
| for place in root.findall('place'): | |
| place_id = place.get('id') | |
| places[place_id] = { | |
| 'Name': place.find('name').text, | |
| 'GeoNames Link': place.find('geonamesLink').text, | |
| 'Pleiades Link': place.find('pleiadesLink').text, | |
| 'Latitude': float(place.find('latitude').text), | |
| 'Longitude': float(place.find('longitude').text), | |
| 'Description': place.find('description').text | |
| } | |
| return places | |
| def parse_titles(xml_string): | |
| titles = {} | |
| root = ET.fromstring(xml_string) | |
| for title in root.findall('title'): | |
| title_id = title.get('id') | |
| titles[title_id] = { | |
| 'Name': title.find('name').text, | |
| 'Name_GR': title.find('name_gr').text, | |
| 'Abbreviation': title.find('abbreviation').text, | |
| 'Description': title.find('description').text | |
| } | |
| return titles | |
| # Load authority data | |
| materials_dict = parse_materials(materials_xml) | |
| places_dict = parse_places(places_xml) | |
| titles_dict = parse_titles(titles_xml) | |
| # ------------------------------- | |
| # Function to Find Place ID by Name (Case-Insensitive) | |
| # ------------------------------- | |
| def find_place_id_by_name(name): | |
| """ | |
| Finds the place ID by matching the place name (case-insensitive). | |
| Returns the place ID if found, else returns the original name. | |
| """ | |
| for id_, place in places_dict.items(): | |
| if place['Name'].strip().lower() == name.strip().lower(): | |
| return id_ | |
| return name # Return the original name if no match is found | |
| # ------------------------------- | |
| # Function to Parse Inscriptions | |
| # ------------------------------- | |
| def parse_inscriptions(xml_content): | |
| tree = ET.ElementTree(ET.fromstring(xml_content)) | |
| root = tree.getroot() | |
| inscriptions = [] | |
| for inscription in root.findall('inscription'): | |
| n = inscription.get('n') | |
| publisher = inscription.find('Publisher').text if inscription.find('Publisher') is not None else "N/A" | |
| # Handle Origin with or without 'ref' attribute | |
| origin_elem = inscription.find('Origin') | |
| if origin_elem is not None: | |
| origin_ref = origin_elem.get('ref') | |
| if origin_ref: | |
| origin_id = origin_ref | |
| else: | |
| origin_text = origin_elem.text.strip() if origin_elem.text else "" | |
| origin_id = find_place_id_by_name(origin_text) | |
| else: | |
| origin_id = "N/A" | |
| origin = places_dict.get(origin_id, {}).get('Name', origin_id) | |
| origin_geonames_link = places_dict.get(origin_id, {}).get('GeoNames Link', "#") | |
| origin_pleiades_link = places_dict.get(origin_id, {}).get('Pleiades Link', "#") | |
| latitude = places_dict.get(origin_id, {}).get('Latitude', None) | |
| longitude = places_dict.get(origin_id, {}).get('Longitude', None) | |
| # Handle Material with or without 'ref' attribute | |
| material_elem = inscription.find('Material') | |
| if material_elem is not None: | |
| material_ref = material_elem.get('ref') | |
| if material_ref: | |
| material_id = material_ref | |
| else: | |
| material_text = material_elem.text.strip() if material_elem.text else "" | |
| # Attempt to find material ID by matching the name_en | |
| material_id = None | |
| for id_, material in materials_dict.items(): | |
| if material['Name_EN'].strip().lower() == material_text.strip().lower(): | |
| material_id = id_ | |
| break | |
| if not material_id: | |
| material_id = material_text # Use the text if no match found | |
| else: | |
| material_id = "N/A" | |
| material = materials_dict.get(material_id, {}).get('Name_EN', material_id) | |
| language = inscription.find('Language').text if inscription.find('Language') is not None else "N/A" | |
| # Extract Titles from the Text element | |
| text_elem = inscription.find('Text') | |
| titles_used = [] | |
| titles_descriptions = [] | |
| if text_elem is not None: | |
| for title in text_elem.findall('.//title'): | |
| title_ref = title.get('ref') | |
| if title_ref and title_ref in titles_dict: | |
| title_info = titles_dict[title_ref] | |
| title_name = title_info['Name'] | |
| title_description = title_info['Description'] | |
| titles_used.append(title_name) | |
| titles_descriptions.append(title_description) | |
| elif title.text: | |
| title_text = title.text.strip() | |
| titles_used.append(title_text) | |
| titles_descriptions.append("No description available.") | |
| text = "".join(text_elem.itertext()).strip() if text_elem is not None else "N/A" | |
| dating = inscription.find('Dating').text if inscription.find('Dating') is not None else "N/A" | |
| images = inscription.find('Images').text if inscription.find('Images') is not None else "N/A" | |
| encoder = inscription.find('Encoder').text if inscription.find('Encoder') is not None else "N/A" | |
| category_terms = [term.text for term in inscription.findall('Category/term')] | |
| inscriptions.append({ | |
| 'Number': n, | |
| 'Publisher': publisher, | |
| 'Origin_ID': origin_id, | |
| 'Origin': origin, | |
| 'GeoNames Link': origin_geonames_link, | |
| 'Pleiades Link': origin_pleiades_link, | |
| 'Latitude': latitude, | |
| 'Longitude': longitude, | |
| 'Material_ID': material_id, | |
| 'Material': material, | |
| 'Language': language, | |
| 'Titles': ", ".join(titles_used) if titles_used else "N/A", | |
| 'Title_Descriptions': "; ".join(titles_descriptions) if titles_descriptions else "N/A", | |
| 'Text': text, | |
| 'Dating': dating, | |
| 'Images': images, | |
| 'Encoder': encoder, | |
| 'Categories': ", ".join(category_terms) | |
| }) | |
| return pd.DataFrame(inscriptions) | |
| # ------------------------------- | |
| # Functions to Render Editions | |
| # ------------------------------- | |
| def render_diplomatic(text_element): | |
| lines = [] | |
| current_line = "" | |
| for elem in text_element.iter(): | |
| if elem.tag == "lb": | |
| if current_line: | |
| lines.append(current_line.strip()) | |
| current_line = "" # Start a new line | |
| line_number = elem.get("n", "") | |
| current_line += f"{line_number} " if line_number else "" | |
| elif elem.tag == "supplied": | |
| # Process nested <expan> elements and concatenate abbreviations | |
| supplied_content = "" | |
| for sub_elem in elem.findall(".//expan"): # Nested <expan> elements | |
| abbr_elem = sub_elem.find("abbr") | |
| if abbr_elem is not None and abbr_elem.text: | |
| supplied_content += abbr_elem.text.upper() | |
| current_line += f"[{supplied_content}]" | |
| elif elem.tag == "expan": | |
| # Use only the abbreviation part | |
| abbr_elem = elem.find("abbr") | |
| if abbr_elem is not None and abbr_elem.text: | |
| current_line += abbr_elem.text.upper() | |
| elif elem.tag == "g" and elem.get("type") == "leaf": | |
| current_line += " LEAF " | |
| elif elem.tag == "title" and elem.get("type") == "emperor": | |
| # Include title abbreviations | |
| title_ref = elem.get('ref') | |
| title_info = titles_dict.get(title_ref, {}) | |
| abbreviation = title_info.get('Abbreviation', '') | |
| current_line += abbreviation | |
| elif elem.text and elem.tag not in ["supplied", "expan", "g", "title"]: | |
| current_line += elem.text.upper() | |
| if current_line: | |
| lines.append(current_line.strip()) # Append the last line | |
| return "\n".join(lines) | |
| def render_editor(text_element): | |
| lines = [] | |
| current_line = "" | |
| for elem in text_element.iter(): | |
| if elem.tag == "lb": | |
| if current_line: | |
| lines.append(current_line.strip()) | |
| current_line = "" # Start a new line | |
| line_number = elem.get("n", "") | |
| current_line += f"{line_number} " if line_number else "" | |
| elif elem.tag == "supplied": | |
| # Process nested <expan> elements with abbreviation and expansion | |
| supplied_content = [] | |
| for sub_elem in elem.findall(".//expan"): # Nested <expan> elements | |
| abbr_elem = sub_elem.find("abbr") | |
| ex_elem = sub_elem.find("ex") | |
| abbr = abbr_elem.text if abbr_elem is not None and abbr_elem.text else "" | |
| ex = ex_elem.text if ex_elem is not None and ex_elem.text else "" | |
| supplied_content.append(f"{abbr}({ex})") | |
| current_line += " ".join(supplied_content) | |
| elif elem.tag == "expan": | |
| # Render abbreviation and expansion | |
| abbr_elem = elem.find("abbr") | |
| ex_elem = elem.find("ex") | |
| abbr = abbr_elem.text if abbr_elem is not None and abbr_elem.text else "" | |
| ex = ex_elem.text if ex_elem is not None and ex_elem.text else "" | |
| current_line += f"{abbr}({ex})" | |
| elif elem.tag == "g" and elem.get("type") == "leaf": | |
| current_line += " ((leaf)) " | |
| elif elem.tag == "title" and elem.get("type") == "emperor": | |
| # Render title abbreviation and name | |
| title_ref = elem.get('ref') | |
| title_info = titles_dict.get(title_ref, {}) | |
| abbreviation = title_info.get('Abbreviation', '') | |
| name_gr = title_info.get('Name_GR', '') | |
| current_line += f"{abbreviation} {name_gr}" | |
| elif elem.text and elem.tag not in ["supplied", "expan", "g", "title"]: | |
| current_line += elem.text | |
| if current_line: | |
| lines.append(current_line.strip()) # Append the last line | |
| return "\n".join(lines) | |
| # ------------------------------- | |
| # Streamlit App Layout | |
| # ------------------------------- | |
| st.set_page_config(page_title="Epigraphic XML Viewer", layout="wide") | |
| st.title("Epigraphic XML Viewer: Diplomatic and Editor Editions") | |
| # ------------------------------- | |
| # Sidebar - Project Information | |
| # ------------------------------- | |
| with st.sidebar: | |
| st.image("imgs/logo_inscripta.jpg", use_container_width=True, caption="Latin and Ancient Greek Inscriptions") | |
| st.header("Project Information") | |
| st.markdown(""" | |
| **Epigraphic Database Viewer** is a tool designed to visualize and analyze ancient inscriptions. | |
| **Features**: | |
| - Upload and view XML inscriptions data. | |
| - Explore inscriptions in various formats. | |
| - Visualize geographical origins on an interactive map. | |
| **Authority Lists**: | |
| - **Materials**: Details about materials used in inscriptions. | |
| - **Places**: Geographical data and descriptions. | |
| - **Emperor Titles**: Titles and abbreviations used in inscriptions. | |
| **Developed by**: Kristiyan Simeonov, Sofia University | |
| """) | |
| # ------------------------------- | |
| # File uploader for Inscriptions XML | |
| # ------------------------------- | |
| uploaded_file = st.file_uploader("Upload Inscriptions XML File", type=["xml"]) | |
| if uploaded_file: | |
| st.success("File uploaded successfully!") | |
| # Read uploaded XML content | |
| inscriptions_content = uploaded_file.getvalue().decode("utf-8") | |
| else: | |
| st.info("No file uploaded. Using default sample XML data.") | |
| # Default XML data (as provided by the user) | |
| inscriptions_content = """<?xml version="1.0" encoding="UTF-8"?> | |
| <!DOCTYPE epiData SYSTEM "epiData.dtd"> <!--<!DOCTYPE epiData SYSTEM "https://raw.githubusercontent.com/Bestroi150/EpiDataBase/refs/heads/main/epiData.dtd">--> | |
| <epiData> | |
| <inscription n="1"> | |
| <Publisher>EDCS</Publisher> | |
| <Origin ref="VIZE">Vize</Origin> | |
| <Origin-Geonames-Link>https://www.geonames.org/738154/vize.html</Origin-Geonames-Link> | |
| <Origin-Pleiades-Link>https://pleiades.stoa.org/places/511190</Origin-Pleiades-Link> | |
| <Institution ID="AE 1951, 00257"></Institution> | |
| <Category> | |
| <term>Augusti/Augustae</term> | |
| <term>ordo senatorius</term> | |
| <term>tituli sacri</term> | |
| <term>tria nomina</term> | |
| <term>viri</term> | |
| </Category> | |
| <Material ref="LAPIS">lapis</Material> | |
| <Language>Greek</Language> | |
| <Text> | |
| <lb n="1"/>ἀγαθῇ τύχῃ | |
| <lb n="2"/>ὑπὲρ τῆς τοῦ <title type="emperor" ref="IMPERATOR">Αὐτοκράτορος</title> | |
| <lb n="3" break="no"/><expan><abbr>T</abbr><ex>ίτου</ex></expan> <expan>Αἰλ<ex>ίου</ex></expan> <persName type="emperor">Ἁδριανοῦ Ἀντωνείνου</persName> <title type="emperor">Καί | |
| <lb n="4"/>σαρος</title><expan>Σεβ<ex>αστοῦ</ex></expan> Εὐσεβοῦς καὶ Οὐήρου Καίσαρ | |
| <lb n="5"/>ος νείκης τε καὶ αἰωνίου διαμονῆς καὶ τοῦ | |
| <lb n="6"/>σύμπαντος αὐτῶν οἴκου ἱερᾶς τε | |
| <lb n="7"/>συνκλήτου καὶ δήμου Ῥωμαίων | |
| <lb n="8" break="no"/>ἡγεμονεύοντος <place type="province">ἐπαρχείας Θρᾴκης</place> | |
| <lb n="9"/><persName type="official"> <expan>Γ<ex>αΐου</ex></expan> Ἰουλίου <expan>Κομ<ex>μ</ex></expan>όδου</persName> <title type="official">πρεσβ<ex>ευτοῦ</ex></title> <expan>Σεβ<ex>αστοῦ</ex></expan> | |
| <lb n="10"/>ἀντιστρατήγου ἡ <place type="city">πόλις Βιζυηνῶν</place> | |
| <lb n="11"/>κατεσκεύασεν τοὺς πυργοὺς διὰ | |
| <lb n="12" break="no"/>ἐπιμελητῶν Φίρμου Αυλουπορε | |
| <lb n="13"/>ος καὶ Αυλουκενθου Δυτουκενθου | |
| <lb n="14"/>καὶ Ραζδου Ὑακίνθου εὐτυχεῖτε | |
| </Text> | |
| <Dating>155 to 155</Dating> | |
| <Images>https://db.edcs.eu/epigr/ae/ae1951/ae1951-74.pdf</Images> | |
| <Encoder>Admin</Encoder> | |
| </inscription> | |
| </epiData> | |
| """ | |
| # ------------------------------- | |
| # Parse Inscriptions | |
| # ------------------------------- | |
| try: | |
| df = parse_inscriptions(inscriptions_content) | |
| except ET.ParseError as e: | |
| st.error(f"Error parsing XML: {e}") | |
| st.stop() | |
| # ------------------------------- | |
| # Tabs for Different Views | |
| # ------------------------------- | |
| tabs = st.tabs(["Raw XML", "DataFrame", "Diplomatic Edition", "Editor Edition", "Visualization", "Authority Connections"]) | |
| # ------------------------------- | |
| # Raw XML Tab | |
| # ------------------------------- | |
| with tabs[0]: | |
| st.subheader("Raw XML Content") | |
| st.code(inscriptions_content, language="xml") | |
| # ------------------------------- | |
| # DataFrame Tab | |
| # ------------------------------- | |
| with tabs[1]: | |
| st.subheader("Inscriptions Data") | |
| st.dataframe(df) | |
| # ------------------------------- | |
| # Diplomatic Edition Tab | |
| # ------------------------------- | |
| import streamlit as st | |
| import xml.etree.ElementTree as ET | |
| import unicodedata | |
| # Function to remove diacritics from text | |
| def remove_diacritics(text): | |
| """ | |
| Removes diacritics from the input text. | |
| """ | |
| normalized_text = unicodedata.normalize('NFD', text) | |
| return ''.join( | |
| char for char in normalized_text | |
| if unicodedata.category(char) != 'Mn' | |
| ) | |
| # Function to process the Text element | |
| def render_diplomatic(text_elem): | |
| """ | |
| Transforms the XML Text element into uppercase Greek text without diacritics and spaces, | |
| with line breaks at <lb> tags. Handles <expan> tags by including only the <abbr> text. | |
| """ | |
| lines = [] | |
| current_line = [] | |
| # Define a helper function to process elements recursively | |
| def process_element(elem): | |
| if elem.tag == 'lb': | |
| finalize_current_line() | |
| if elem.tail: | |
| # After <lb>, the tail text is the start of the new line | |
| current_line.append(elem.tail) | |
| elif elem.tag == 'expan': | |
| abbr_elem = elem.find('abbr') | |
| if abbr_elem is not None and abbr_elem.text: | |
| current_line.append(abbr_elem.text) | |
| # Do not process <ex> or any other children within <expan> | |
| if elem.tail: | |
| current_line.append(elem.tail) | |
| else: | |
| if elem.text: | |
| current_line.append(elem.text) | |
| # Recursively process child elements | |
| for child in elem: | |
| process_element(child) | |
| if elem.tail: | |
| current_line.append(elem.tail) | |
| def finalize_current_line(): | |
| """ | |
| Finalizes the current line by removing diacritics, spaces, converting to uppercase, | |
| and appending it to the lines list. | |
| """ | |
| nonlocal current_line | |
| line_text = ''.join(current_line).strip() | |
| if line_text: | |
| # Remove diacritics and spaces, then convert to uppercase | |
| line_text = remove_diacritics(line_text).replace(' ', '').upper() | |
| lines.append(line_text) | |
| current_line = [] | |
| # Start processing from the root text element | |
| process_element(text_elem) | |
| # Finalize the last line if any | |
| if current_line: | |
| finalize_current_line() | |
| # Join all lines with newline characters | |
| return '\n'.join(lines) | |
| # Streamlit Application | |
| # Ensure that 'tabs' and 'df' are properly defined in your Streamlit app context | |
| with tabs[2]: | |
| st.subheader("Diplomatic Edition") | |
| # Select Inscription | |
| inscription_numbers = df['Number'].tolist() | |
| selected_inscription_num = st.selectbox("Select Inscription Number", inscription_numbers) | |
| selected_inscription = df[df['Number'] == selected_inscription_num].iloc[0] | |
| # Parse the selected inscription's XML to get the Text element | |
| try: | |
| tree = ET.ElementTree(ET.fromstring(inscriptions_content)) | |
| root = tree.getroot() | |
| inscription_elem = root.find(f".//inscription[@n='{selected_inscription_num}']") | |
| text_element = inscription_elem.find("Text") if inscription_elem is not None else None | |
| except ET.ParseError: | |
| st.error("Failed to parse the XML content. Please check the XML structure.") | |
| text_element = None | |
| if text_element is not None: | |
| diplomatic_text = render_diplomatic(text_element) | |
| st.code(diplomatic_text, language="plaintext") | |
| else: | |
| st.warning("No text found for the selected inscription.") | |
| # ------------------------------- | |
| # Editor Edition Tab | |
| # ------------------------------- | |
| def render_editor(text_element): | |
| """ | |
| Processes the Text XML element and converts it to plaintext. | |
| """ | |
| def process_element(elem): | |
| result = elem.text if elem.text else '' | |
| for child in elem: | |
| if child.tag == 'lb': | |
| # Line break; add a newline | |
| result += '\n' | |
| elif child.tag == 'expan': | |
| # Handle expansions, e.g., <expan><abbr>T</abbr><ex>ίτου</ex></expan> → T(ίτου) | |
| abbr = child.find('abbr') | |
| ex = child.find('ex') | |
| if abbr is not None and ex is not None: | |
| result += f"{abbr.text}({ex.text})" | |
| else: | |
| # If structure is unexpected, process children recursively | |
| result += process_element(child) | |
| elif child.tag == 'abbr': | |
| # Abbreviation; add text without special formatting | |
| result += child.text if child.text else '' | |
| elif child.tag == 'ex': | |
| # Expansion; add text within parentheses | |
| result += f"({child.text})" if child.text else '' | |
| elif child.tag in ['persName', 'place', 'title']: | |
| # Names and titles; add text without tags | |
| # If they contain nested elements, process them | |
| result += process_element(child) | |
| else: | |
| # For any other tags, process their children | |
| result += process_element(child) | |
| if child.tail: | |
| result += child.tail | |
| return result | |
| return process_element(text_element).strip() | |
| with tabs[3]: | |
| st.subheader("Editor Edition") | |
| # Select Inscription | |
| inscription_numbers = df['Number'].tolist() | |
| selected_inscription_num = st.selectbox("Select Inscription Number", inscription_numbers, key='editor_select') | |
| # Parse the entire XML to find the selected inscription | |
| try: | |
| # Parse the entire XML content | |
| tree = ET.ElementTree(ET.fromstring(inscriptions_content)) | |
| root = tree.getroot() | |
| # Locate the inscription element with the matching number | |
| inscription_elem = root.find(f".//inscription[@n='{selected_inscription_num}']") | |
| # If the root itself is the inscription | |
| if inscription_elem is None and root.tag == 'inscription' and root.attrib.get('n') == str(selected_inscription_num): | |
| inscription_elem = root | |
| text_element = inscription_elem.find("Text") if inscription_elem is not None else None | |
| if text_element is not None: | |
| editor_text = render_editor(text_element) | |
| st.code(editor_text, language="plaintext") | |
| else: | |
| st.warning("No text found for the selected inscription.") | |
| except ET.ParseError as e: | |
| st.error(f"Error parsing XML: {e}") | |
| except Exception as e: | |
| st.error(f"An unexpected error occurred: {e}") | |
| # ------------------------------- | |
| # Visualization Tab | |
| # ------------------------------- | |
| with tabs[4]: | |
| st.subheader("Visualization") | |
| # Extract categories | |
| all_categories = set() | |
| for categories in df['Categories']: | |
| for cat in categories.split(", "): | |
| all_categories.add(cat) | |
| # Category filtering | |
| selected_categories = st.multiselect("Filter by Category", sorted(all_categories)) | |
| if selected_categories: | |
| filtered_df = df[df['Categories'].apply(lambda x: any(cat in x.split(", ") for cat in selected_categories))] | |
| else: | |
| filtered_df = df.copy() | |
| # Merge with places to get coordinates | |
| def get_coordinates(origin_id): | |
| place = places_dict.get(origin_id, {}) | |
| return place.get('Latitude'), place.get('Longitude') | |
| # Apply the function to get Latitude and Longitude | |
| filtered_df['Latitude'], filtered_df['Longitude'] = zip(*filtered_df['Origin_ID'].apply(get_coordinates)) | |
| # Drop entries without coordinates | |
| map_df = filtered_df.dropna(subset=['Latitude', 'Longitude']) | |
| if not map_df.empty: | |
| # Create a Folium map centered around the average coordinates | |
| avg_lat = map_df['Latitude'].mean() | |
| avg_lon = map_df['Longitude'].mean() | |
| folium_map = folium.Map(location=[avg_lat, avg_lon], zoom_start=6) | |
| # Add markers to the map | |
| for _, row in map_df.iterrows(): | |
| popup_content = f""" | |
| <b>Inscription Number:</b> {row['Number']}<br> | |
| <b>Publisher:</b> {row['Publisher']}<br> | |
| <b>Material:</b> {row['Material']}<br> | |
| <b>Language:</b> {row['Language']}<br> | |
| <b>Dating:</b> {row['Dating']}<br> | |
| <b>Encoder:</b> {row['Encoder']}<br> | |
| <b>Categories:</b> {row['Categories']}<br> | |
| <b>Text:</b> {row['Text']}<br> | |
| """ | |
| if row['Images'] and row['Images'] != "N/A": | |
| popup_content += f'<a href="{row["Images"]}" target="_blank">View Images</a><br>' | |
| folium.Marker( | |
| location=[row['Latitude'], row['Longitude']], | |
| popup=folium.Popup(popup_content, max_width=300), | |
| tooltip=f"Inscription {row['Number']}" | |
| ).add_to(folium_map) | |
| # Display the Folium map using streamlit_folium | |
| st_folium(folium_map, width=700, height=500) | |
| else: | |
| st.write("No inscriptions to display on the map based on the selected filters.") | |
| st.dataframe(filtered_df) | |
| # Detailed View | |
| for _, row in filtered_df.iterrows(): | |
| with st.expander(f"Inscription {row['Number']}"): | |
| st.markdown(f"**Publisher**: {row['Publisher']}") | |
| st.markdown(f"**Origin**: {row['Origin']} ([GeoNames Link]({row['GeoNames Link']}), [Pleiades Link]({row['Pleiades Link']}))") | |
| st.markdown(f"**Material**: {row['Material']} - {materials_dict.get(row['Material_ID'], {}).get('Description', '')}") | |
| st.markdown(f"**Language**: {row['Language']}") | |
| st.markdown(f"**Dating**: {row['Dating']}") | |
| st.markdown(f"**Encoder**: {row['Encoder']}") | |
| st.markdown(f"**Categories**: {row['Categories']}") | |
| st.markdown(f"**Text**:\n\n{row['Text']}") | |
| if row['Images'] and row['Images'] != "N/A": | |
| st.markdown(f"[View Images]({row['Images']})") | |
| # Display material description | |
| material_desc = materials_dict.get(row['Material_ID'], {}).get('Description', "No description available.") | |
| st.markdown(f"**Material Description**: {material_desc}") | |
| # Display place description | |
| place_desc = places_dict.get(row['Origin_ID'], {}).get('Description', "No description available.") | |
| st.markdown(f"**Place Description**: {place_desc}") | |
| # ------------------------------- | |
| # Authority Connections Tab | |
| # ------------------------------- | |
| with tabs[5]: | |
| st.subheader("Authority Connections") | |
| # Define Authority Types | |
| authority_types = ["Material", "Place", "Title"] # Added "Title" | |
| # Select Authority Type | |
| selected_authority_type = st.selectbox("Select Authority Type", authority_types) | |
| # Based on selection, provide the corresponding options | |
| if selected_authority_type == "Material": | |
| # List all materials from materials_dict | |
| material_names = [material['Name_EN'] for material in materials_dict.values()] | |
| selected_material = st.selectbox("Select Material", sorted(material_names)) | |
| # Find the material ID based on the selected name | |
| material_id = None | |
| for id_, material in materials_dict.items(): | |
| if material['Name_EN'] == selected_material: | |
| material_id = id_ | |
| break | |
| if material_id: | |
| # Filter inscriptions that reference this material | |
| connected_inscriptions = df[df['Material_ID'] == material_id] | |
| st.markdown(f"### Inscriptions using **{selected_material}**") | |
| st.write(f"**Total Inscriptions:** {len(connected_inscriptions)}") | |
| if not connected_inscriptions.empty: | |
| # Display inscriptions in a table | |
| st.dataframe(connected_inscriptions[['Number', 'Publisher', 'Origin', 'Language', 'Dating', 'Encoder']]) | |
| # **Plotly Visualization: Inscriptions Over Time** | |
| st.markdown("#### Inscriptions Over Time") | |
| # Assuming 'Dating' is in a format that can be processed (e.g., "155 to 155") | |
| def extract_start_year(dating): | |
| if isinstance(dating, str): | |
| parts = dating.split('to') | |
| try: | |
| return int(parts[0].strip()) | |
| except: | |
| return None | |
| return None | |
| connected_inscriptions['Start_Year'] = connected_inscriptions['Dating'].apply(extract_start_year) | |
| year_counts = connected_inscriptions['Start_Year'].dropna().astype(int).value_counts().sort_index() | |
| year_counts = year_counts.reset_index() | |
| year_counts.columns = ['Year', 'Count'] | |
| fig_bar = px.bar( | |
| year_counts, | |
| x='Year', | |
| y='Count', | |
| labels={'Count': 'Number of Inscriptions'}, | |
| title=f'Number of Inscriptions Using {selected_material} Over Time', | |
| template='plotly_white' | |
| ) | |
| st.plotly_chart(fig_bar, use_container_width=True) | |
| # **Plotly Visualization: Network Graph of Inscriptions and Materials** | |
| st.markdown("#### Network Graph of Inscriptions and Materials") | |
| # Create a network graph using Plotly | |
| G = nx.Graph() | |
| # Add nodes | |
| G.add_node(selected_material, type='Material') | |
| for _, row in connected_inscriptions.iterrows(): | |
| inscription_node = f"Inscription {row['Number']}" | |
| G.add_node(inscription_node, type='Inscription') | |
| G.add_edge(selected_material, inscription_node) | |
| # Generate positions for the nodes | |
| pos = nx.spring_layout(G, k=0.5, iterations=50) | |
| edge_x = [] | |
| edge_y = [] | |
| for edge in G.edges(): | |
| x0, y0 = pos[edge[0]] | |
| x1, y1 = pos[edge[1]] | |
| edge_x.extend([x0, x1, None]) | |
| edge_y.extend([y0, y1, None]) | |
| edge_trace = go.Scatter( | |
| x=edge_x, y=edge_y, | |
| line=dict(width=1, color='#888'), | |
| hoverinfo='none', | |
| mode='lines' | |
| ) | |
| node_x = [] | |
| node_y = [] | |
| for node in G.nodes(): | |
| x, y = pos[node] | |
| node_x.append(x) | |
| node_y.append(y) | |
| node_trace = go.Scatter( | |
| x=node_x, y=node_y, | |
| mode='markers+text', | |
| text=[node for node in G.nodes()], | |
| textposition="bottom center", | |
| hoverinfo='text', | |
| marker=dict( | |
| showscale=False, | |
| color=['lightblue' if G.nodes[node]['type'] == 'Material' else 'lightgreen' for node in G.nodes()], | |
| size=20, | |
| line_width=2 | |
| ) | |
| ) | |
| fig_network = go.Figure(data=[edge_trace, node_trace], | |
| layout=go.Layout( | |
| title=f"Network Graph: {selected_material} and Connected Inscriptions", | |
| titlefont_size=16, | |
| showlegend=False, | |
| hovermode='closest', | |
| margin=dict(b=20,l=5,r=5,t=40), | |
| annotations=[ dict( | |
| text="", | |
| showarrow=False, | |
| xref="paper", yref="paper") ], | |
| xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), | |
| yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)) | |
| ) | |
| st.plotly_chart(fig_network, use_container_width=True) | |
| else: | |
| st.info("No inscriptions found for the selected material.") | |
| elif selected_authority_type == "Place": | |
| # List all places from places_dict | |
| place_names = [place['Name'] for place in places_dict.values()] | |
| selected_place = st.selectbox("Select Place", sorted(place_names)) | |
| # Find the place ID based on the selected name | |
| place_id = None | |
| for id_, place in places_dict.items(): | |
| if place['Name'] == selected_place: | |
| place_id = id_ | |
| break | |
| if place_id: | |
| # Filter inscriptions that originate from this place | |
| connected_inscriptions = df[df['Origin_ID'] == place_id] | |
| st.markdown(f"### Inscriptions from **{selected_place}**") | |
| st.write(f"**Total Inscriptions:** {len(connected_inscriptions)}") | |
| if not connected_inscriptions.empty: | |
| # Display inscriptions in a table | |
| st.dataframe(connected_inscriptions[['Number', 'Publisher', 'Material', 'Language', 'Dating', 'Encoder']]) | |
| # **Plotly Visualization: Geographical Distribution of Inscriptions** | |
| st.markdown("#### Geographical Distribution of Inscriptions") | |
| map_df = connected_inscriptions[['Latitude', 'Longitude', 'Number']] | |
| map_df = map_df.dropna(subset=['Latitude', 'Longitude']) | |
| if not map_df.empty: | |
| fig_map = px.scatter_geo( | |
| map_df, | |
| lat='Latitude', | |
| lon='Longitude', | |
| hover_name='Number', | |
| title=f'Geographical Distribution of Inscriptions from {selected_place}', | |
| template='plotly_white' | |
| ) | |
| fig_map.update_layout( | |
| geo=dict( | |
| scope='world', | |
| projection_type='natural earth', | |
| showland=True, | |
| landcolor='lightgray', | |
| showcountries=True, | |
| ) | |
| ) | |
| st.plotly_chart(fig_map, use_container_width=True) | |
| else: | |
| st.info("No geographical data available for these inscriptions.") | |
| # **Plotly Visualization: Network Graph of Inscriptions and Places** | |
| st.markdown("#### Network Graph of Inscriptions and Places") | |
| G = nx.Graph() | |
| # Add nodes | |
| G.add_node(selected_place, type='Place') | |
| for _, row in connected_inscriptions.iterrows(): | |
| inscription_node = f"Inscription {row['Number']}" | |
| G.add_node(inscription_node, type='Inscription') | |
| G.add_edge(selected_place, inscription_node) | |
| # Generate positions for the nodes | |
| pos = nx.spring_layout(G, k=0.5, iterations=50) | |
| edge_x = [] | |
| edge_y = [] | |
| for edge in G.edges(): | |
| x0, y0 = pos[edge[0]] | |
| x1, y1 = pos[edge[1]] | |
| edge_x.extend([x0, x1, None]) | |
| edge_y.extend([y0, y1, None]) | |
| edge_trace = go.Scatter( | |
| x=edge_x, y=edge_y, | |
| line=dict(width=1, color='#888'), | |
| hoverinfo='none', | |
| mode='lines' | |
| ) | |
| node_x = [] | |
| node_y = [] | |
| for node in G.nodes(): | |
| x, y = pos[node] | |
| node_x.append(x) | |
| node_y.append(y) | |
| node_trace = go.Scatter( | |
| x=node_x, y=node_y, | |
| mode='markers+text', | |
| text=[node for node in G.nodes()], | |
| textposition="bottom center", | |
| hoverinfo='text', | |
| marker=dict( | |
| showscale=False, | |
| color=['salmon' if G.nodes[node]['type'] == 'Place' else 'lightgreen' for node in G.nodes()], | |
| size=20, | |
| line_width=2 | |
| ) | |
| ) | |
| fig_network = go.Figure(data=[edge_trace, node_trace], | |
| layout=go.Layout( | |
| title=f"Network Graph: {selected_place} and Connected Inscriptions", | |
| titlefont_size=16, | |
| showlegend=False, | |
| hovermode='closest', | |
| margin=dict(b=20,l=5,r=5,t=40), | |
| annotations=[ dict( | |
| text="", | |
| showarrow=False, | |
| xref="paper", yref="paper") ], | |
| xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), | |
| yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)) | |
| ) | |
| st.plotly_chart(fig_network, use_container_width=True) | |
| else: | |
| st.info("No inscriptions found for the selected place.") | |
| elif selected_authority_type == "Title": | |
| # List all titles from titles_dict | |
| title_names = [title['Name'] for title in titles_dict.values()] | |
| selected_title = st.selectbox("Select Title", sorted(title_names)) | |
| # Find the title ID based on the selected name | |
| title_id = None | |
| for id_, title in titles_dict.items(): | |
| if title['Name'] == selected_title: | |
| title_id = id_ | |
| break | |
| if title_id: | |
| # Filter inscriptions that reference this title | |
| # Assuming 'Titles' column contains comma-separated titles | |
| connected_inscriptions = df[df['Titles'].str.contains(selected_title, case=False, na=False)] | |
| st.markdown(f"### Inscriptions referencing **{selected_title}**") | |
| st.write(f"**Total Inscriptions:** {len(connected_inscriptions)}") | |
| if not connected_inscriptions.empty: | |
| # Display inscriptions in a table | |
| st.dataframe(connected_inscriptions[['Number', 'Publisher', 'Origin', 'Material', 'Language', 'Dating', 'Encoder']]) | |
| # **Plotly Visualization: Inscriptions Referencing the Title Over Time** | |
| st.markdown("#### Inscriptions Referencing the Title Over Time") | |
| def extract_start_year(dating): | |
| if isinstance(dating, str): | |
| parts = dating.split('to') | |
| try: | |
| return int(parts[0].strip()) | |
| except: | |
| return None | |
| return None | |
| connected_inscriptions['Start_Year'] = connected_inscriptions['Dating'].apply(extract_start_year) | |
| year_counts = connected_inscriptions['Start_Year'].dropna().astype(int).value_counts().sort_index() | |
| year_counts = year_counts.reset_index() | |
| year_counts.columns = ['Year', 'Count'] | |
| fig_bar = px.bar( | |
| year_counts, | |
| x='Year', | |
| y='Count', | |
| labels={'Count': 'Number of Inscriptions'}, | |
| title=f'Number of Inscriptions Referencing "{selected_title}" Over Time', | |
| template='plotly_white' | |
| ) | |
| st.plotly_chart(fig_bar, use_container_width=True) | |
| # **Plotly Visualization: Network Graph of Inscriptions and Titles** | |
| st.markdown("#### Network Graph of Inscriptions and Titles") | |
| # Create a network graph using Plotly | |
| G = nx.Graph() | |
| # Add nodes | |
| G.add_node(selected_title, type='Title') | |
| for _, row in connected_inscriptions.iterrows(): | |
| inscription_node = f"Inscription {row['Number']}" | |
| G.add_node(inscription_node, type='Inscription') | |
| G.add_edge(selected_title, inscription_node) | |
| # Generate positions for the nodes | |
| pos = nx.spring_layout(G, k=0.5, iterations=50) | |
| edge_x = [] | |
| edge_y = [] | |
| for edge in G.edges(): | |
| x0, y0 = pos[edge[0]] | |
| x1, y1 = pos[edge[1]] | |
| edge_x.extend([x0, x1, None]) | |
| edge_y.extend([y0, y1, None]) | |
| edge_trace = go.Scatter( | |
| x=edge_x, y=edge_y, | |
| line=dict(width=1, color='#888'), | |
| hoverinfo='none', | |
| mode='lines' | |
| ) | |
| node_x = [] | |
| node_y = [] | |
| for node in G.nodes(): | |
| x, y = pos[node] | |
| node_x.append(x) | |
| node_y.append(y) | |
| node_trace = go.Scatter( | |
| x=node_x, y=node_y, | |
| mode='markers+text', | |
| text=[node for node in G.nodes()], | |
| textposition="bottom center", | |
| hoverinfo='text', | |
| marker=dict( | |
| showscale=False, | |
| color=['orange' if G.nodes[node]['type'] == 'Title' else 'lightgreen' for node in G.nodes()], | |
| size=20, | |
| line_width=2 | |
| ) | |
| ) | |
| fig_network = go.Figure(data=[edge_trace, node_trace], | |
| layout=go.Layout( | |
| title=f"Network Graph: {selected_title} and Connected Inscriptions", | |
| titlefont_size=16, | |
| showlegend=False, | |
| hovermode='closest', | |
| margin=dict(b=20,l=5,r=5,t=40), | |
| annotations=[ dict( | |
| text="", | |
| showarrow=False, | |
| xref="paper", yref="paper") ], | |
| xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), | |
| yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)) | |
| ) | |
| st.plotly_chart(fig_network, use_container_width=True) | |
| else: | |
| st.info("No inscriptions found referencing the selected title.") | |
| # ------------------------------- | |
| # Footer | |
| # ------------------------------- | |
| st.markdown(""" | |
| --- | |
| **© 2024 InscriptaNET** | |
| """) | |