import gradio as gr
from datasets import load_dataset
import pandas as pd
import re
import folium
import numpy as np
# --- 1. Initial Data Loading ---
print("Loading datasets...")
dfs = {}
try:
print("- Loading EDDA...")
dfs["Encyclopédie de Diderot et d'Alembert"] = load_dataset("GEODE/edda-coordinata", split="train").to_pandas()
print("- Loading EB7...")
dfs["Encyclopædia Britannica 7th edition"] = load_dataset("pnugues/EB7", split="train").to_pandas()
print("- Loading EB9...")
dfs["Encyclopædia Britannica 9th edition"] = load_dataset("pnugues/EB9", split="train").to_pandas()
print("Loading complete!")
except Exception as e:
print(f"Error loading datasets: {e}")
# --- 2. Utility Functions ---
def parse_coordinate(coord_str, meridian_name=None):
if not isinstance(coord_str, str): return None, None
pattern = r"(\d+)\s*(?:(\d+)')?\s*(?:(\d+)[\"']{1,2})?\s*([NSEW])"
matches = re.findall(pattern, coord_str)
lat_val, lon_val = None, None
is_west, is_east = False, False
for m in matches:
deg = float(m[0]) if m[0] else 0
minute = float(m[1]) if m[1] else 0
sec = float(m[2]) if m[2] else 0
val = deg + (minute / 60) + (sec / 3600)
direction = m[3]
if direction in ['N', 'S']:
lat_val = val if direction == 'N' else -val
elif direction in ['E', 'W']:
lon_val = val
is_west = (direction == 'W')
is_east = (direction == 'E')
if lat_val is not None and lon_val is not None:
m_name = meridian_name.strip() if isinstance(meridian_name, str) and meridian_name.strip() else "île de Fer"
if m_name == "Pékin":
lon_val = 116.39 + lon_val if is_west else 116.39 - lon_val
else:
final_lon = lon_val if is_east else -lon_val
if m_name == "Paris":
lon_val = final_lon + 2.35
elif m_name == "Lunden":
lon_val = final_lon + 13.19
elif m_name in ["Londres", "London"]:
lon_val = final_lon + 0.0
elif m_name == "Sydon":
lon_val = final_lon + 35.37
else:
lon_val = final_lon - 17.66
return lat_val, lon_val
return None, None
def classify_geometry(x):
if not isinstance(x, (list, np.ndarray)) or len(x) == 0: return "none"
if len(x) == 1 and isinstance(x[0], (list, np.ndarray)):
return "point" if len(x[0]) == 1 else "surface"
if len(x) > 1 and isinstance(x[0], (list, np.ndarray)) and len(x[0]) == 1:
if x[0][0] in ['subart', 'multsrc', 'pchain', 'misc']: return x[0][0]
return "unknown"
def get_meridian_safely(meridian_list, index):
if not isinstance(meridian_list, (list, np.ndarray)):
return "île de Fer"
if index < len(meridian_list):
val = meridian_list[index]
if isinstance(val, str) and val.strip() != "":
return val.strip()
return "île de Fer"
# --- 3. Search and Mapping Engine ---
def search_and_map(query, search_mode, dataset_choice):
df = dfs.get(dataset_choice)
if not query or df is None:
return pd.DataFrame(), folium.Map(location=[46.2, 2.2], zoom_start=4)._repr_html_()
# Group EB7 and EB9 under the same logic
is_eb = dataset_choice in ["Encyclopædia Britannica 7th edition", "Encyclopædia Britannica 9th edition"]
# 1. Column management for search
if is_eb:
search_col = "texte" if search_mode == "text" else "vedette"
else:
search_col = "text" if search_mode == "text" else "headword"
# Filtering
mask = df[search_col].str.contains(r'\b' + re.escape(query) + r'\b', case=False, na=False)
results = df[mask].copy()
m = folium.Map(location=[46.2, 2.2], zoom_start=4)
bounds = []
# 2. Map rendering loop
for _, row in results.iterrows():
# --- BRITANNICA BRANCH (EB7 & EB9) ---
if is_eb:
coords_str = row.get('coords', '')
texte_val = str(row.get('texte', ''))
headword = row.get('vedette', 'Unknown article')
snippet = (texte_val[:150] + '...') if len(texte_val) > 150 else texte_val
if isinstance(coords_str, str) and coords_str.strip():
# Force London meridian for British editions
lat, lon = parse_coordinate(coords_str, "Londres")
if lat is not None:
popup_html = f"{headword}
Meridian: London (Greenwich)
{snippet}"
folium.Marker([lat, lon], popup=popup_html, tooltip=headword).add_to(m)
bounds.append([lat, lon])
# --- ENCYCLOPÉDIE BRANCH (EDDA) ---
else:
meridian_list = row.get('meridian', [])
if isinstance(meridian_list, np.ndarray):
meridian_list = meridian_list.tolist()
coords_raw = row.get('coordinates', [])
if isinstance(coords_raw, (list, np.ndarray)):
coords_list = [item.tolist() if isinstance(item, np.ndarray) else item for item in coords_raw]
else:
continue
geom_type = classify_geometry(coords_list)
headword = row.get('headword', 'Unknown')
texte_val = str(row.get('text', ''))
snippet = (texte_val[:150] + '...') if len(texte_val) > 150 else texte_val
try:
if geom_type == "point":
current_meridian = get_meridian_safely(meridian_list, 0)
lat, lon = parse_coordinate(coords_list[0][0], current_meridian)
if lat is not None:
popup_html = f"{headword}
Meridian: {current_meridian}
{snippet}"
folium.Marker([lat, lon], popup=popup_html, tooltip=headword).add_to(m)
bounds.append([lat, lon])
elif geom_type == "surface":
current_meridian = get_meridian_safely(meridian_list, 0)
p1 = parse_coordinate(coords_list[0][0], current_meridian)
p2 = parse_coordinate(coords_list[0][1], current_meridian)
if p1[0] is not None and p2[0] is not None:
popup_html = f"{headword} (Area)
Meridian: {current_meridian}"
folium.Rectangle(bounds=[p1, p2], color="orange", fill=True, popup=popup_html).add_to(m)
bounds.extend([p1, p2])
elif geom_type in ["subart", "multsrc", "pchain"]:
points = []
for i, item in enumerate(coords_list[1:]):
c_str = item[0] if isinstance(item, (list, np.ndarray)) else item
current_meridian = get_meridian_safely(meridian_list, i)
p = parse_coordinate(c_str, current_meridian)
if p[0] is not None:
points.append((p[0], p[1], current_meridian))
if points:
if geom_type == "pchain":
coords_only = [[pt[0], pt[1]] for pt in points]
popup_html = f"{headword} (Path)
Meridian: {points[0][2]}"
folium.PolyLine(coords_only, color="blue", weight=3, popup=popup_html).add_to(m)
bounds.extend(coords_only)
else:
for pt in points:
popup_html = f"{headword}
Meridian: {pt[2]}
{snippet}"
folium.Marker([pt[0], pt[1]], icon=folium.Icon(color='green'), popup=popup_html, tooltip=headword).add_to(m)
bounds.extend([[pt[0], pt[1]] for pt in points])
except Exception as e:
print(f"EDDA rendering error for {headword}: {e}")
# --- 3. Zoom Logic ---
if bounds:
unique_pts = np.unique(bounds, axis=0)
if len(unique_pts) <= 1:
m.location = unique_pts[0].tolist()
m.zoom_start = 5
else:
m.fit_bounds(bounds)
# --- 4. Final Dataframe Formatting ---
if is_eb:
final_df = results[['vedette', 'coords', 'texte']].head(50).copy()
final_df['texte'] = final_df['texte'].astype(str).str.slice(0, 280) + '...'
else:
final_df = results[['headword', 'coordinates', 'meridian', 'text']].head(50).copy()
#final_df['coordinates'] = final_df['coordinates'].astype(str)
final_df['text'] = final_df['text'].astype(str).str.slice(0, 280) + '...'
return final_df, m._repr_html_()
# --- 4. Gradio Interface ---
description = """
# 🌍 Historical Encyclopedias Coordinates Explorer
---
**Disclaimer:** This application is a demonstration prototype currently under development.
This application allows you to explore and compare manually annotated geographical coordinates from several major 18th and 19th-century encyclopedias:
* The **Encyclopédie** by Diderot and d'Alembert (FR, ~1751): https://huggingface.co/datasets/GEODE/edda-coordinata
* The **Encyclopædia Britannica** 7th edition (EN, ~1842): https://huggingface.co/datasets/pnugues/EB7
* The **Encyclopædia Britannica** 9th edition (EN, ~1875): https://huggingface.co/datasets/pnugues/EB9
Select the dataset, then search for any term within the article's text or its title (headword). The corresponding coordinates will be automatically projected onto the interactive map.
---
"""
with gr.Blocks(title="Historical Encyclopedias Coordinates Explorer") as demo:
gr.Markdown(description)
with gr.Row():
with gr.Column(scale=1):
dataset_dropdown = gr.Dropdown(
choices=[
"Encyclopédie de Diderot et d'Alembert",
"Encyclopædia Britannica 7th edition",
"Encyclopædia Britannica 9th edition"
],
value="Encyclopédie de Diderot et d'Alembert",
label="Choose Dataset"
)
search_input = gr.Textbox(label="Search term", placeholder="E.g., Acapulco, Brest, Berlin...")
search_mode = gr.Radio(choices=["headword", "text"], value="headword", label="Search in:")
btn = gr.Button("Search on map", variant="primary")
with gr.Column(scale=2):
map_output = gr.HTML(label="Map Visualization")
table_output = gr.Dataframe(label="Results (max 50)", interactive=False, wrap=True)
inputs = [search_input, search_mode, dataset_dropdown]
outputs = [table_output, map_output]
btn.click(search_and_map, inputs, outputs)
search_input.submit(search_and_map, inputs, outputs)
if __name__ == "__main__":
demo.launch()