Spaces:
Running
Running
| import gradio as gr | |
| from datasets import load_dataset | |
| import pandas as pd | |
| import re | |
| import folium | |
| import numpy as np | |
| # Chargement initial | |
| print("Chargement du jeu de données...") | |
| dataset = load_dataset("GEODE/edda-coordinata", split="train") | |
| df = dataset.to_pandas() | |
| def parse_coordinate(coord_str, meridian_name=None): | |
| """ | |
| Extrait les coordonnées et applique les décalages selon les méridiens historiques. | |
| """ | |
| if not isinstance(coord_str, str): return None, None | |
| pattern = r"(\d+)\s*(?:(\d+)')?\s*(?:(\d+)[\"']{1,2})?\s*([NSEW])" | |
| matches = re.findall(pattern, coord_str) | |
| lat_val, lon_val = None, None | |
| is_west, is_east = False, False | |
| for m in matches: | |
| deg = float(m[0]) if m[0] else 0 | |
| minute = float(m[1]) if m[1] else 0 | |
| sec = float(m[2]) if m[2] else 0 | |
| val = deg + (minute / 60) + (sec / 3600) | |
| direction = m[3] | |
| if direction in ['N', 'S']: | |
| lat_val = val if direction == 'N' else -val | |
| elif direction in ['E', 'W']: | |
| lon_val = val # On garde la valeur absolue pour le calcul spécifique | |
| is_west = (direction == 'W') | |
| is_east = (direction == 'E') | |
| if lat_val is not None and lon_val is not None: | |
| m_name = meridian_name.strip() if isinstance(meridian_name, str) and meridian_name else "île de Fer" | |
| final_lon = lon_val if is_east else -lon_val | |
| base_longitudes = { | |
| "Paris": 2.35, | |
| "Lunden": 13.19, | |
| "Londres": 0.0, | |
| "Sydon": 35.37, | |
| "Pékin": 116.39 | |
| } | |
| base = base_longitudes.get(m_name, -17.66) | |
| lon_val = base + final_lon | |
| return lat_val, lon_val | |
| return None, None | |
| def classify_geometry(x): | |
| if not isinstance(x, (list, np.ndarray)) or len(x) == 0: return "none" | |
| if len(x) == 1 and isinstance(x[0], (list, np.ndarray)): | |
| return "point" if len(x[0]) == 1 else "surface" | |
| if len(x) > 1 and isinstance(x[0], (list, np.ndarray)) and len(x[0]) == 1: | |
| if x[0][0] in ['subart', 'multsrc', 'pchain', 'misc']: return x[0][0] | |
| return "unknown" | |
| def get_meridian_safely(meridian_list, index): | |
| """ | |
| Récupère le méridien strictement à l'index donné. | |
| Si la valeur est vide ou si l'index dépasse, retourne "île de Fer". | |
| """ | |
| if not isinstance(meridian_list, (list, np.ndarray)): | |
| return "île de Fer" | |
| if index < len(meridian_list): | |
| val = meridian_list[index] | |
| # On vérifie que ce n'est pas None et pas une chaîne vide | |
| if isinstance(val, str) and val.strip() != "": | |
| return val.strip() | |
| return "île de Fer" | |
| def search_and_map(query, search_column): | |
| if not query: | |
| return pd.DataFrame(), folium.Map(location=[46.2, 2.2], zoom_start=4)._repr_html_() | |
| mask = df[search_column].str.contains(query, case=False, na=False) | |
| results = df[mask].copy() | |
| m = folium.Map(location=[46.2, 2.2], zoom_start=4) | |
| bounds = [] | |
| for _, row in results.iterrows(): | |
| meridian_list = row.get('meridian', []) | |
| coords_raw = row['coordinates'] | |
| if isinstance(coords_raw, (list, np.ndarray)): | |
| coords_list = [item.tolist() if isinstance(item, np.ndarray) else item for item in coords_raw] | |
| else: | |
| continue | |
| geom_type = classify_geometry(coords_list) | |
| headword = row['headword'] | |
| snippet = (row['text'][:150] + '...') if len(row['text']) > 150 else row['text'] | |
| try: | |
| # POINT SIMPLE | |
| if geom_type == "point": | |
| current_meridian = get_meridian_safely(meridian_list, 0) | |
| lat, lon = parse_coordinate(coords_list[0][0], current_meridian) | |
| if lat is not None: | |
| popup_html = f"<b>{headword}</b><br><i>Meridian: {current_meridian}</i><br><br>{snippet}" | |
| folium.Marker([lat, lon], popup=popup_html, tooltip=headword).add_to(m) | |
| bounds.append([lat, lon]) | |
| # SURFACE (Bounding Box) | |
| elif geom_type == "surface": | |
| current_meridian = get_meridian_safely(meridian_list, 0) | |
| p1 = parse_coordinate(coords_list[0][0], current_meridian) | |
| p2 = parse_coordinate(coords_list[0][1], current_meridian) | |
| if p1[0] is not None and p2[0] is not None: | |
| popup_html = f"<b>{headword}</b> (Zone)<br><i>Meridian: {current_meridian}</i>" | |
| folium.Rectangle(bounds=[p1, p2], color="orange", fill=True, popup=popup_html).add_to(m) | |
| bounds.extend([p1, p2]) | |
| # LISTES COMPLEXES (multsrc, subart, pchain) | |
| elif geom_type in ["subart", "multsrc", "pchain"]: | |
| points = [] | |
| # On itère sur chaque point de la liste (index i) | |
| for i, item in enumerate(coords_list[1:]): | |
| c_str = item[0] if isinstance(item, (list, np.ndarray)) else item | |
| # On associe le point 'i' au méridien 'i' de la liste | |
| current_meridian = get_meridian_safely(meridian_list, i) | |
| p = parse_coordinate(c_str, current_meridian) | |
| if p[0] is not None: | |
| # On stocke aussi le méridien utilisé pour l'affichage | |
| points.append((p[0], p[1], current_meridian)) | |
| if points: | |
| if geom_type == "pchain": | |
| coords_only = [[pt[0], pt[1]] for pt in points] | |
| popup_html = f"<b>{headword}</b> (Arc)<br><i>Meridian: {points[0][2]}</i>" | |
| folium.PolyLine(coords_only, color="blue", weight=3, popup=popup_html).add_to(m) | |
| bounds.extend(coords_only) | |
| else: | |
| for pt in points: | |
| popup_html = f"<b>{headword}</b><br><i>Meridian: {pt[2]}</i><br><br>{snippet}" | |
| folium.Marker([pt[0], pt[1]], icon=folium.Icon(color='green'), popup=popup_html, tooltip=headword).add_to(m) | |
| bounds.extend([[pt[0], pt[1]] for pt in points]) | |
| except Exception as e: | |
| print(f"Erreur de rendu pour {headword}: {e}") | |
| # Logique de Zoom | |
| if bounds: | |
| unique_pts = np.unique(bounds, axis=0) | |
| if len(unique_pts) <= 1: | |
| m.location = unique_pts[0].tolist() | |
| m.zoom_start = 5 | |
| else: | |
| m.fit_bounds(bounds) | |
| final_df = results[['headword', 'coordinates', 'meridian', 'text']].head(50).copy() | |
| final_df['coordinates'] = final_df['coordinates'].astype(str) | |
| final_df['meridian'] = final_df['meridian'].astype(str) | |
| return final_df, m._repr_html_() | |
| # --- Interface Gradio --- | |
| description = """ | |
| # 🌍 EDDA-Coordinata Viewer | |
| --- | |
| Historical texts contain information about latitude and longitude in many, non-standardized forms. Here, you can view the results of research to find and normalize coordinates in an Enlightenment-era text. | |
| This application allows you to explore geographical coordinates manually annotated from articles in the 18th-century *Encyclopédie* edited by Diderot and d'Alembert. | |
| You can search for any term within either the **entry's text content** or its **headword (title)**. All matching entries will be displayed in the table below, and if they contain coordinates, their locations (points, areas, or paths) will be automatically rendered on the interactive map. | |
| Out of 15,278 total geographical entries, the dataset includes 4,798 entries with manually identified, explicit coordinates. | |
| - **Authors:** Ludovic Moncla, Pierre Nugues, Thierry Joliveau, and Katherine McDonough. | |
| - **Dataset:** [GEODE/edda-coordinata](https://huggingface.co/datasets/GEODE/edda-coordinata) | |
| - **Data sources:** [ENCCRE](https://enccre.academie-sciences.fr/) & [The ARTFL Project](https://artfl-project.uchicago.edu). | |
| - **Project:** [GEODE](https://geode-project.github.io) | |
| ### Cite this work | |
| > Moncla, L., Nugues, P., Joliveau, T., & McDonough, K. (2026). **EDDA-Coordinata: An Annotated Dataset of Historical Geographic Coordinates**. *arXiv preprint [arXiv:2602.23941](https://arxiv.org/abs/2602.23941).* (Accepted at **LREC 2026**) | |
| ### Usage notes: | |
| Terms do not need to be place names (toponyms): they can be any word (e.g., *château*, *philosophie*, *Londres*). | |
| If you search for a word, you will retrieve all the instances of that word either in the title (headword) or the entry text. The mapped coordinates represent **all coordinates present in that article**, not necessarily the specific coordinates of your search term. Click on a marker to see the historical meridian used for its calculation! | |
| --- | |
| """ | |
| with gr.Blocks() as demo: | |
| gr.Markdown(description) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| search_input = gr.Textbox(label="Search terms", placeholder="Type here...") | |
| search_mode = gr.Radio(choices=["headword", "text"], value="headword", label="Search in:") | |
| btn = gr.Button("View Search Results", variant="primary") | |
| with gr.Column(scale=2): | |
| map_output = gr.HTML(label="Map Visualization") | |
| table_output = gr.Dataframe(label="Search Results (max 50)", interactive=False) | |
| btn.click(search_and_map, [search_input, search_mode], [table_output, map_output]) | |
| search_input.submit(search_and_map, [search_input, search_mode], [table_output, map_output]) | |
| demo.launch() |