import gradio as gr
from datasets import load_dataset
import pandas as pd
import re
import folium
import numpy as np
# Chargement initial
print("Chargement du jeu de données...")
dataset = load_dataset("GEODE/edda-coordinata", split="train")
df = dataset.to_pandas()
def parse_coordinate(coord_str, meridian_name=None):
"""
Extrait les coordonnées et applique les décalages selon les méridiens historiques.
"""
if not isinstance(coord_str, str): return None, None
pattern = r"(\d+)\s*(?:(\d+)')?\s*(?:(\d+)[\"']{1,2})?\s*([NSEW])"
matches = re.findall(pattern, coord_str)
lat_val, lon_val = None, None
is_west, is_east = False, False
for m in matches:
deg = float(m[0]) if m[0] else 0
minute = float(m[1]) if m[1] else 0
sec = float(m[2]) if m[2] else 0
val = deg + (minute / 60) + (sec / 3600)
direction = m[3]
if direction in ['N', 'S']:
lat_val = val if direction == 'N' else -val
elif direction in ['E', 'W']:
lon_val = val # On garde la valeur absolue pour le calcul spécifique
is_west = (direction == 'W')
is_east = (direction == 'E')
if lat_val is not None and lon_val is not None:
m_name = meridian_name.strip() if isinstance(meridian_name, str) and meridian_name else "île de Fer"
final_lon = lon_val if is_east else -lon_val
base_longitudes = {
"Paris": 2.35,
"Lunden": 13.19,
"Londres": 0.0,
"Sydon": 35.37,
"Pékin": 116.39
}
base = base_longitudes.get(m_name, -17.66)
lon_val = base + final_lon
return lat_val, lon_val
return None, None
def classify_geometry(x):
if not isinstance(x, (list, np.ndarray)) or len(x) == 0: return "none"
if len(x) == 1 and isinstance(x[0], (list, np.ndarray)):
return "point" if len(x[0]) == 1 else "surface"
if len(x) > 1 and isinstance(x[0], (list, np.ndarray)) and len(x[0]) == 1:
if x[0][0] in ['subart', 'multsrc', 'pchain', 'misc']: return x[0][0]
return "unknown"
def get_meridian_safely(meridian_list, index):
"""
Récupère le méridien strictement à l'index donné.
Si la valeur est vide ou si l'index dépasse, retourne "île de Fer".
"""
if not isinstance(meridian_list, (list, np.ndarray)):
return "île de Fer"
if index < len(meridian_list):
val = meridian_list[index]
# On vérifie que ce n'est pas None et pas une chaîne vide
if isinstance(val, str) and val.strip() != "":
return val.strip()
return "île de Fer"
def search_and_map(query, search_column):
if not query:
return pd.DataFrame(), folium.Map(location=[46.2, 2.2], zoom_start=4)._repr_html_()
mask = df[search_column].str.contains(query, case=False, na=False)
results = df[mask].copy()
m = folium.Map(location=[46.2, 2.2], zoom_start=4)
bounds = []
for _, row in results.iterrows():
meridian_list = row.get('meridian', [])
coords_raw = row['coordinates']
if isinstance(coords_raw, (list, np.ndarray)):
coords_list = [item.tolist() if isinstance(item, np.ndarray) else item for item in coords_raw]
else:
continue
geom_type = classify_geometry(coords_list)
headword = row['headword']
snippet = (row['text'][:150] + '...') if len(row['text']) > 150 else row['text']
try:
# POINT SIMPLE
if geom_type == "point":
current_meridian = get_meridian_safely(meridian_list, 0)
lat, lon = parse_coordinate(coords_list[0][0], current_meridian)
if lat is not None:
popup_html = f"{headword}
Meridian: {current_meridian}
{snippet}"
folium.Marker([lat, lon], popup=popup_html, tooltip=headword).add_to(m)
bounds.append([lat, lon])
# SURFACE (Bounding Box)
elif geom_type == "surface":
current_meridian = get_meridian_safely(meridian_list, 0)
p1 = parse_coordinate(coords_list[0][0], current_meridian)
p2 = parse_coordinate(coords_list[0][1], current_meridian)
if p1[0] is not None and p2[0] is not None:
popup_html = f"{headword} (Zone)
Meridian: {current_meridian}"
folium.Rectangle(bounds=[p1, p2], color="orange", fill=True, popup=popup_html).add_to(m)
bounds.extend([p1, p2])
# LISTES COMPLEXES (multsrc, subart, pchain)
elif geom_type in ["subart", "multsrc", "pchain"]:
points = []
# On itère sur chaque point de la liste (index i)
for i, item in enumerate(coords_list[1:]):
c_str = item[0] if isinstance(item, (list, np.ndarray)) else item
# On associe le point 'i' au méridien 'i' de la liste
current_meridian = get_meridian_safely(meridian_list, i)
p = parse_coordinate(c_str, current_meridian)
if p[0] is not None:
# On stocke aussi le méridien utilisé pour l'affichage
points.append((p[0], p[1], current_meridian))
if points:
if geom_type == "pchain":
coords_only = [[pt[0], pt[1]] for pt in points]
popup_html = f"{headword} (Arc)
Meridian: {points[0][2]}"
folium.PolyLine(coords_only, color="blue", weight=3, popup=popup_html).add_to(m)
bounds.extend(coords_only)
else:
for pt in points:
popup_html = f"{headword}
Meridian: {pt[2]}
{snippet}"
folium.Marker([pt[0], pt[1]], icon=folium.Icon(color='green'), popup=popup_html, tooltip=headword).add_to(m)
bounds.extend([[pt[0], pt[1]] for pt in points])
except Exception as e:
print(f"Erreur de rendu pour {headword}: {e}")
# Logique de Zoom
if bounds:
unique_pts = np.unique(bounds, axis=0)
if len(unique_pts) <= 1:
m.location = unique_pts[0].tolist()
m.zoom_start = 5
else:
m.fit_bounds(bounds)
final_df = results[['headword', 'coordinates', 'meridian', 'text']].head(50).copy()
final_df['coordinates'] = final_df['coordinates'].astype(str)
final_df['meridian'] = final_df['meridian'].astype(str)
return final_df, m._repr_html_()
# --- Interface Gradio ---
description = """
# 🌍 EDDA-Coordinata Viewer
---
Historical texts contain information about latitude and longitude in many, non-standardized forms. Here, you can view the results of research to find and normalize coordinates in an Enlightenment-era text.
This application allows you to explore geographical coordinates manually annotated from articles in the 18th-century *Encyclopédie* edited by Diderot and d'Alembert.
You can search for any term within either the **entry's text content** or its **headword (title)**. All matching entries will be displayed in the table below, and if they contain coordinates, their locations (points, areas, or paths) will be automatically rendered on the interactive map.
Out of 15,278 total geographical entries, the dataset includes 4,798 entries with manually identified, explicit coordinates.
- **Authors:** Ludovic Moncla, Pierre Nugues, Thierry Joliveau, and Katherine McDonough.
- **Dataset:** [GEODE/edda-coordinata](https://huggingface.co/datasets/GEODE/edda-coordinata)
- **Data sources:** [ENCCRE](https://enccre.academie-sciences.fr/) & [The ARTFL Project](https://artfl-project.uchicago.edu).
- **Project:** [GEODE](https://geode-project.github.io)
### Cite this work
> Moncla, L., Nugues, P., Joliveau, T., & McDonough, K. (2026). **EDDA-Coordinata: An Annotated Dataset of Historical Geographic Coordinates**. *arXiv preprint [arXiv:2602.23941](https://arxiv.org/abs/2602.23941).* (Accepted at **LREC 2026**)
### Usage notes:
Terms do not need to be place names (toponyms): they can be any word (e.g., *château*, *philosophie*, *Londres*).
If you search for a word, you will retrieve all the instances of that word either in the title (headword) or the entry text. The mapped coordinates represent **all coordinates present in that article**, not necessarily the specific coordinates of your search term. Click on a marker to see the historical meridian used for its calculation!
---
"""
with gr.Blocks() as demo:
gr.Markdown(description)
with gr.Row():
with gr.Column(scale=1):
search_input = gr.Textbox(label="Search terms", placeholder="Type here...")
search_mode = gr.Radio(choices=["headword", "text"], value="headword", label="Search in:")
btn = gr.Button("View Search Results", variant="primary")
with gr.Column(scale=2):
map_output = gr.HTML(label="Map Visualization")
table_output = gr.Dataframe(label="Search Results (max 50)", interactive=False)
btn.click(search_and_map, [search_input, search_mode], [table_output, map_output])
search_input.submit(search_and_map, [search_input, search_mode], [table_output, map_output])
demo.launch()