| import gradio as gr |
| from datasets import load_dataset |
| import pandas as pd |
| import re |
| import folium |
| import numpy as np |
|
|
| |
| print("Loading datasets...") |
| dfs = {} |
|
|
| try: |
| print("- Loading EDDA...") |
| dfs["Encyclopédie de Diderot et d'Alembert"] = load_dataset("GEODE/edda-coordinata", split="train").to_pandas() |
| |
| print("- Loading EB7...") |
| dfs["Encyclopædia Britannica 7th edition"] = load_dataset("pnugues/EB7", split="train").to_pandas() |
|
|
| print("- Loading EB9...") |
| dfs["Encyclopædia Britannica 9th edition"] = load_dataset("pnugues/EB9", split="train").to_pandas() |
| |
| print("Loading complete!") |
| except Exception as e: |
| print(f"Error loading datasets: {e}") |
|
|
| |
| def parse_coordinate(coord_str, meridian_name=None): |
| if not isinstance(coord_str, str): return None, None |
| |
| pattern = r"(\d+)\s*(?:(\d+)')?\s*(?:(\d+)[\"']{1,2})?\s*([NSEW])" |
| matches = re.findall(pattern, coord_str) |
| |
| lat_val, lon_val = None, None |
| is_west, is_east = False, False |
|
|
| for m in matches: |
| deg = float(m[0]) if m[0] else 0 |
| minute = float(m[1]) if m[1] else 0 |
| sec = float(m[2]) if m[2] else 0 |
| val = deg + (minute / 60) + (sec / 3600) |
| |
| direction = m[3] |
| if direction in ['N', 'S']: |
| lat_val = val if direction == 'N' else -val |
| elif direction in ['E', 'W']: |
| lon_val = val |
| is_west = (direction == 'W') |
| is_east = (direction == 'E') |
|
|
| if lat_val is not None and lon_val is not None: |
| m_name = meridian_name.strip() if isinstance(meridian_name, str) and meridian_name.strip() else "île de Fer" |
| |
| if m_name == "Pékin": |
| lon_val = 116.39 + lon_val if is_west else 116.39 - lon_val |
| else: |
| final_lon = lon_val if is_east else -lon_val |
| |
| if m_name == "Paris": |
| lon_val = final_lon + 2.35 |
| elif m_name == "Lunden": |
| lon_val = final_lon + 13.19 |
| elif m_name in ["Londres", "London"]: |
| lon_val = final_lon + 0.0 |
| elif m_name == "Sydon": |
| lon_val = final_lon + 35.37 |
| else: |
| lon_val = final_lon - 17.66 |
| |
| return lat_val, lon_val |
| return None, None |
|
|
| def classify_geometry(x): |
| if not isinstance(x, (list, np.ndarray)) or len(x) == 0: return "none" |
| if len(x) == 1 and isinstance(x[0], (list, np.ndarray)): |
| return "point" if len(x[0]) == 1 else "surface" |
| if len(x) > 1 and isinstance(x[0], (list, np.ndarray)) and len(x[0]) == 1: |
| if x[0][0] in ['subart', 'multsrc', 'pchain', 'misc']: return x[0][0] |
| return "unknown" |
|
|
| def get_meridian_safely(meridian_list, index): |
| if not isinstance(meridian_list, (list, np.ndarray)): |
| return "île de Fer" |
| if index < len(meridian_list): |
| val = meridian_list[index] |
| if isinstance(val, str) and val.strip() != "": |
| return val.strip() |
| return "île de Fer" |
|
|
| |
| def search_and_map(query, search_mode, dataset_choice): |
| df = dfs.get(dataset_choice) |
| if not query or df is None: |
| return pd.DataFrame(), folium.Map(location=[46.2, 2.2], zoom_start=4)._repr_html_() |
| |
| |
| is_eb = dataset_choice in ["Encyclopædia Britannica 7th edition", "Encyclopædia Britannica 9th edition"] |
| |
| |
| if is_eb: |
| search_col = "texte" if search_mode == "text" else "vedette" |
| else: |
| search_col = "text" if search_mode == "text" else "headword" |
|
|
| |
| mask = df[search_col].str.contains(r'\b' + re.escape(query) + r'\b', case=False, na=False) |
| results = df[mask].copy() |
| |
| m = folium.Map(location=[46.2, 2.2], zoom_start=4) |
| bounds = [] |
|
|
| |
| for _, row in results.iterrows(): |
| |
| |
| if is_eb: |
| coords_str = row.get('coords', '') |
| texte_val = str(row.get('texte', '')) |
| |
| headword = row.get('vedette', 'Unknown article') |
| snippet = (texte_val[:150] + '...') if len(texte_val) > 150 else texte_val |
|
|
| if isinstance(coords_str, str) and coords_str.strip(): |
| |
| lat, lon = parse_coordinate(coords_str, "Londres") |
| if lat is not None: |
| popup_html = f"<b>{headword}</b><br><i>Meridian: London (Greenwich)</i><br><br>{snippet}" |
| folium.Marker([lat, lon], popup=popup_html, tooltip=headword).add_to(m) |
| bounds.append([lat, lon]) |
|
|
| |
| else: |
| meridian_list = row.get('meridian', []) |
| if isinstance(meridian_list, np.ndarray): |
| meridian_list = meridian_list.tolist() |
|
|
| coords_raw = row.get('coordinates', []) |
| if isinstance(coords_raw, (list, np.ndarray)): |
| coords_list = [item.tolist() if isinstance(item, np.ndarray) else item for item in coords_raw] |
| else: |
| continue |
|
|
| geom_type = classify_geometry(coords_list) |
| headword = row.get('headword', 'Unknown') |
| |
| texte_val = str(row.get('text', '')) |
| snippet = (texte_val[:150] + '...') if len(texte_val) > 150 else texte_val |
|
|
| try: |
| if geom_type == "point": |
| current_meridian = get_meridian_safely(meridian_list, 0) |
| lat, lon = parse_coordinate(coords_list[0][0], current_meridian) |
| if lat is not None: |
| popup_html = f"<b>{headword}</b><br><i>Meridian: {current_meridian}</i><br><br>{snippet}" |
| folium.Marker([lat, lon], popup=popup_html, tooltip=headword).add_to(m) |
| bounds.append([lat, lon]) |
|
|
| elif geom_type == "surface": |
| current_meridian = get_meridian_safely(meridian_list, 0) |
| p1 = parse_coordinate(coords_list[0][0], current_meridian) |
| p2 = parse_coordinate(coords_list[0][1], current_meridian) |
| if p1[0] is not None and p2[0] is not None: |
| popup_html = f"<b>{headword}</b> (Area)<br><i>Meridian: {current_meridian}</i>" |
| folium.Rectangle(bounds=[p1, p2], color="orange", fill=True, popup=popup_html).add_to(m) |
| bounds.extend([p1, p2]) |
|
|
| elif geom_type in ["subart", "multsrc", "pchain"]: |
| points = [] |
| for i, item in enumerate(coords_list[1:]): |
| c_str = item[0] if isinstance(item, (list, np.ndarray)) else item |
| current_meridian = get_meridian_safely(meridian_list, i) |
| p = parse_coordinate(c_str, current_meridian) |
| if p[0] is not None: |
| points.append((p[0], p[1], current_meridian)) |
| |
| if points: |
| if geom_type == "pchain": |
| coords_only = [[pt[0], pt[1]] for pt in points] |
| popup_html = f"<b>{headword}</b> (Path)<br><i>Meridian: {points[0][2]}</i>" |
| folium.PolyLine(coords_only, color="blue", weight=3, popup=popup_html).add_to(m) |
| bounds.extend(coords_only) |
| else: |
| for pt in points: |
| popup_html = f"<b>{headword}</b><br><i>Meridian: {pt[2]}</i><br><br>{snippet}" |
| folium.Marker([pt[0], pt[1]], icon=folium.Icon(color='green'), popup=popup_html, tooltip=headword).add_to(m) |
| bounds.extend([[pt[0], pt[1]] for pt in points]) |
| except Exception as e: |
| print(f"EDDA rendering error for {headword}: {e}") |
|
|
| |
| if bounds: |
| unique_pts = np.unique(bounds, axis=0) |
| if len(unique_pts) <= 1: |
| m.location = unique_pts[0].tolist() |
| m.zoom_start = 5 |
| else: |
| m.fit_bounds(bounds) |
| |
| |
| if is_eb: |
| final_df = results[['vedette', 'coords', 'texte']].head(50).copy() |
| final_df['texte'] = final_df['texte'].astype(str).str.slice(0, 280) + '...' |
| else: |
| final_df = results[['headword', 'coordinates', 'meridian', 'text']].head(50).copy() |
| |
| final_df['text'] = final_df['text'].astype(str).str.slice(0, 280) + '...' |
| |
| return final_df, m._repr_html_() |
|
|
| |
| description = """ |
| # 🌍 Historical Encyclopedias Coordinates Explorer |
| --- |
| |
| **Disclaimer:** This application is a demonstration prototype currently under development. |
| |
| This application allows you to explore and compare manually annotated geographical coordinates from several major 18th and 19th-century encyclopedias: |
| * The **Encyclopédie** by Diderot and d'Alembert (FR, ~1751): https://huggingface.co/datasets/GEODE/edda-coordinata |
| * The **Encyclopædia Britannica** 7th edition (EN, ~1842): https://huggingface.co/datasets/pnugues/EB7 |
| * The **Encyclopædia Britannica** 9th edition (EN, ~1875): https://huggingface.co/datasets/pnugues/EB9 |
| |
| Select the dataset, then search for any term within the article's text or its title (headword). The corresponding coordinates will be automatically projected onto the interactive map. |
| |
| --- |
| """ |
|
|
| with gr.Blocks(title="Historical Encyclopedias Coordinates Explorer") as demo: |
| gr.Markdown(description) |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| dataset_dropdown = gr.Dropdown( |
| choices=[ |
| "Encyclopédie de Diderot et d'Alembert", |
| "Encyclopædia Britannica 7th edition", |
| "Encyclopædia Britannica 9th edition" |
| ], |
| value="Encyclopédie de Diderot et d'Alembert", |
| label="Choose Dataset" |
| ) |
| search_input = gr.Textbox(label="Search term", placeholder="E.g., Acapulco, Brest, Berlin...") |
| search_mode = gr.Radio(choices=["headword", "text"], value="headword", label="Search in:") |
| btn = gr.Button("Search on map", variant="primary") |
| |
| with gr.Column(scale=2): |
| map_output = gr.HTML(label="Map Visualization") |
|
|
| table_output = gr.Dataframe(label="Results (max 50)", interactive=False, wrap=True) |
|
|
| inputs = [search_input, search_mode, dataset_dropdown] |
| outputs = [table_output, map_output] |
| |
| btn.click(search_and_map, inputs, outputs) |
| search_input.submit(search_and_map, inputs, outputs) |
|
|
| if __name__ == "__main__": |
| demo.launch() |