import gradio as gr from datasets import load_dataset import pandas as pd import re import folium import numpy as np # --- 1. Initial Data Loading --- print("Loading datasets...") dfs = {} try: print("- Loading EDDA...") dfs["Encyclopédie de Diderot et d'Alembert"] = load_dataset("GEODE/edda-coordinata", split="train").to_pandas() print("- Loading EB7...") dfs["Encyclopædia Britannica 7th edition"] = load_dataset("pnugues/EB7", split="train").to_pandas() print("- Loading EB9...") dfs["Encyclopædia Britannica 9th edition"] = load_dataset("pnugues/EB9", split="train").to_pandas() print("Loading complete!") except Exception as e: print(f"Error loading datasets: {e}") # --- 2. Utility Functions --- def parse_coordinate(coord_str, meridian_name=None): if not isinstance(coord_str, str): return None, None pattern = r"(\d+)\s*(?:(\d+)')?\s*(?:(\d+)[\"']{1,2})?\s*([NSEW])" matches = re.findall(pattern, coord_str) lat_val, lon_val = None, None is_west, is_east = False, False for m in matches: deg = float(m[0]) if m[0] else 0 minute = float(m[1]) if m[1] else 0 sec = float(m[2]) if m[2] else 0 val = deg + (minute / 60) + (sec / 3600) direction = m[3] if direction in ['N', 'S']: lat_val = val if direction == 'N' else -val elif direction in ['E', 'W']: lon_val = val is_west = (direction == 'W') is_east = (direction == 'E') if lat_val is not None and lon_val is not None: m_name = meridian_name.strip() if isinstance(meridian_name, str) and meridian_name.strip() else "île de Fer" if m_name == "Pékin": lon_val = 116.39 + lon_val if is_west else 116.39 - lon_val else: final_lon = lon_val if is_east else -lon_val if m_name == "Paris": lon_val = final_lon + 2.35 elif m_name == "Lunden": lon_val = final_lon + 13.19 elif m_name in ["Londres", "London"]: lon_val = final_lon + 0.0 elif m_name == "Sydon": lon_val = final_lon + 35.37 else: lon_val = final_lon - 17.66 return lat_val, lon_val return None, None def classify_geometry(x): if not isinstance(x, (list, np.ndarray)) or len(x) == 0: return "none" if len(x) == 1 and isinstance(x[0], (list, np.ndarray)): return "point" if len(x[0]) == 1 else "surface" if len(x) > 1 and isinstance(x[0], (list, np.ndarray)) and len(x[0]) == 1: if x[0][0] in ['subart', 'multsrc', 'pchain', 'misc']: return x[0][0] return "unknown" def get_meridian_safely(meridian_list, index): if not isinstance(meridian_list, (list, np.ndarray)): return "île de Fer" if index < len(meridian_list): val = meridian_list[index] if isinstance(val, str) and val.strip() != "": return val.strip() return "île de Fer" # --- 3. Search and Mapping Engine --- def search_and_map(query, search_mode, dataset_choice): df = dfs.get(dataset_choice) if not query or df is None: return pd.DataFrame(), folium.Map(location=[46.2, 2.2], zoom_start=4)._repr_html_() # Group EB7 and EB9 under the same logic is_eb = dataset_choice in ["Encyclopædia Britannica 7th edition", "Encyclopædia Britannica 9th edition"] # 1. Column management for search if is_eb: search_col = "texte" if search_mode == "text" else "vedette" else: search_col = "text" if search_mode == "text" else "headword" # Filtering mask = df[search_col].str.contains(r'\b' + re.escape(query) + r'\b', case=False, na=False) results = df[mask].copy() m = folium.Map(location=[46.2, 2.2], zoom_start=4) bounds = [] # 2. Map rendering loop for _, row in results.iterrows(): # --- BRITANNICA BRANCH (EB7 & EB9) --- if is_eb: coords_str = row.get('coords', '') texte_val = str(row.get('texte', '')) headword = row.get('vedette', 'Unknown article') snippet = (texte_val[:150] + '...') if len(texte_val) > 150 else texte_val if isinstance(coords_str, str) and coords_str.strip(): # Force London meridian for British editions lat, lon = parse_coordinate(coords_str, "Londres") if lat is not None: popup_html = f"{headword}
Meridian: London (Greenwich)

{snippet}" folium.Marker([lat, lon], popup=popup_html, tooltip=headword).add_to(m) bounds.append([lat, lon]) # --- ENCYCLOPÉDIE BRANCH (EDDA) --- else: meridian_list = row.get('meridian', []) if isinstance(meridian_list, np.ndarray): meridian_list = meridian_list.tolist() coords_raw = row.get('coordinates', []) if isinstance(coords_raw, (list, np.ndarray)): coords_list = [item.tolist() if isinstance(item, np.ndarray) else item for item in coords_raw] else: continue geom_type = classify_geometry(coords_list) headword = row.get('headword', 'Unknown') texte_val = str(row.get('text', '')) snippet = (texte_val[:150] + '...') if len(texte_val) > 150 else texte_val try: if geom_type == "point": current_meridian = get_meridian_safely(meridian_list, 0) lat, lon = parse_coordinate(coords_list[0][0], current_meridian) if lat is not None: popup_html = f"{headword}
Meridian: {current_meridian}

{snippet}" folium.Marker([lat, lon], popup=popup_html, tooltip=headword).add_to(m) bounds.append([lat, lon]) elif geom_type == "surface": current_meridian = get_meridian_safely(meridian_list, 0) p1 = parse_coordinate(coords_list[0][0], current_meridian) p2 = parse_coordinate(coords_list[0][1], current_meridian) if p1[0] is not None and p2[0] is not None: popup_html = f"{headword} (Area)
Meridian: {current_meridian}" folium.Rectangle(bounds=[p1, p2], color="orange", fill=True, popup=popup_html).add_to(m) bounds.extend([p1, p2]) elif geom_type in ["subart", "multsrc", "pchain"]: points = [] for i, item in enumerate(coords_list[1:]): c_str = item[0] if isinstance(item, (list, np.ndarray)) else item current_meridian = get_meridian_safely(meridian_list, i) p = parse_coordinate(c_str, current_meridian) if p[0] is not None: points.append((p[0], p[1], current_meridian)) if points: if geom_type == "pchain": coords_only = [[pt[0], pt[1]] for pt in points] popup_html = f"{headword} (Path)
Meridian: {points[0][2]}" folium.PolyLine(coords_only, color="blue", weight=3, popup=popup_html).add_to(m) bounds.extend(coords_only) else: for pt in points: popup_html = f"{headword}
Meridian: {pt[2]}

{snippet}" folium.Marker([pt[0], pt[1]], icon=folium.Icon(color='green'), popup=popup_html, tooltip=headword).add_to(m) bounds.extend([[pt[0], pt[1]] for pt in points]) except Exception as e: print(f"EDDA rendering error for {headword}: {e}") # --- 3. Zoom Logic --- if bounds: unique_pts = np.unique(bounds, axis=0) if len(unique_pts) <= 1: m.location = unique_pts[0].tolist() m.zoom_start = 5 else: m.fit_bounds(bounds) # --- 4. Final Dataframe Formatting --- if is_eb: final_df = results[['vedette', 'coords', 'texte']].head(50).copy() final_df['texte'] = final_df['texte'].astype(str).str.slice(0, 280) + '...' else: final_df = results[['headword', 'coordinates', 'meridian', 'text']].head(50).copy() #final_df['coordinates'] = final_df['coordinates'].astype(str) final_df['text'] = final_df['text'].astype(str).str.slice(0, 280) + '...' return final_df, m._repr_html_() # --- 4. Gradio Interface --- description = """ # 🌍 Historical Encyclopedias Coordinates Explorer --- **Disclaimer:** This application is a demonstration prototype currently under development. This application allows you to explore and compare manually annotated geographical coordinates from several major 18th and 19th-century encyclopedias: * The **Encyclopédie** by Diderot and d'Alembert (FR, ~1751): https://huggingface.co/datasets/GEODE/edda-coordinata * The **Encyclopædia Britannica** 7th edition (EN, ~1842): https://huggingface.co/datasets/pnugues/EB7 * The **Encyclopædia Britannica** 9th edition (EN, ~1875): https://huggingface.co/datasets/pnugues/EB9 Select the dataset, then search for any term within the article's text or its title (headword). The corresponding coordinates will be automatically projected onto the interactive map. --- """ with gr.Blocks(title="Historical Encyclopedias Coordinates Explorer") as demo: gr.Markdown(description) with gr.Row(): with gr.Column(scale=1): dataset_dropdown = gr.Dropdown( choices=[ "Encyclopédie de Diderot et d'Alembert", "Encyclopædia Britannica 7th edition", "Encyclopædia Britannica 9th edition" ], value="Encyclopédie de Diderot et d'Alembert", label="Choose Dataset" ) search_input = gr.Textbox(label="Search term", placeholder="E.g., Acapulco, Brest, Berlin...") search_mode = gr.Radio(choices=["headword", "text"], value="headword", label="Search in:") btn = gr.Button("Search on map", variant="primary") with gr.Column(scale=2): map_output = gr.HTML(label="Map Visualization") table_output = gr.Dataframe(label="Results (max 50)", interactive=False, wrap=True) inputs = [search_input, search_mode, dataset_dropdown] outputs = [table_output, map_output] btn.click(search_and_map, inputs, outputs) search_input.submit(search_and_map, inputs, outputs) if __name__ == "__main__": demo.launch()