File size: 11,053 Bytes
30f0acf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3902302
30f0acf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3902302
30f0acf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3902302
30f0acf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
import gradio as gr
from datasets import load_dataset
import pandas as pd
import re
import folium
import numpy as np

# --- 1. Initial Data Loading ---
print("Loading datasets...")
dfs = {}

try:
    print("- Loading EDDA...")
    dfs["Encyclopédie de Diderot et d'Alembert"] = load_dataset("GEODE/edda-coordinata", split="train").to_pandas()
    
    print("- Loading EB7...")
    dfs["Encyclopædia Britannica 7th edition"] = load_dataset("pnugues/EB7", split="train").to_pandas()

    print("- Loading EB9...")
    dfs["Encyclopædia Britannica 9th edition"] = load_dataset("pnugues/EB9", split="train").to_pandas()
    
    print("Loading complete!")
except Exception as e:
    print(f"Error loading datasets: {e}")

# --- 2. Utility Functions ---
def parse_coordinate(coord_str, meridian_name=None):
    if not isinstance(coord_str, str): return None, None
    
    pattern = r"(\d+)\s*(?:(\d+)')?\s*(?:(\d+)[\"']{1,2})?\s*([NSEW])"
    matches = re.findall(pattern, coord_str)
    
    lat_val, lon_val = None, None
    is_west, is_east = False, False

    for m in matches:
        deg = float(m[0]) if m[0] else 0
        minute = float(m[1]) if m[1] else 0
        sec = float(m[2]) if m[2] else 0
        val = deg + (minute / 60) + (sec / 3600)
        
        direction = m[3]
        if direction in ['N', 'S']:
            lat_val = val if direction == 'N' else -val
        elif direction in ['E', 'W']:
            lon_val = val
            is_west = (direction == 'W')
            is_east = (direction == 'E')

    if lat_val is not None and lon_val is not None:
        m_name = meridian_name.strip() if isinstance(meridian_name, str) and meridian_name.strip() else "île de Fer"
        
        if m_name == "Pékin":
            lon_val = 116.39 + lon_val if is_west else 116.39 - lon_val
        else:
            final_lon = lon_val if is_east else -lon_val
            
            if m_name == "Paris":
                lon_val = final_lon + 2.35
            elif m_name == "Lunden": 
                lon_val = final_lon + 13.19
            elif m_name in ["Londres", "London"]:
                lon_val = final_lon + 0.0
            elif m_name == "Sydon":
                lon_val = final_lon + 35.37
            else: 
                lon_val = final_lon - 17.66
                
        return lat_val, lon_val
    return None, None

def classify_geometry(x):
    if not isinstance(x, (list, np.ndarray)) or len(x) == 0: return "none"
    if len(x) == 1 and isinstance(x[0], (list, np.ndarray)):
        return "point" if len(x[0]) == 1 else "surface"
    if len(x) > 1 and isinstance(x[0], (list, np.ndarray)) and len(x[0]) == 1:
        if x[0][0] in ['subart', 'multsrc', 'pchain', 'misc']: return x[0][0]
    return "unknown"

def get_meridian_safely(meridian_list, index):
    if not isinstance(meridian_list, (list, np.ndarray)):
        return "île de Fer"
    if index < len(meridian_list):
        val = meridian_list[index]
        if isinstance(val, str) and val.strip() != "":
            return val.strip()
    return "île de Fer"

# --- 3. Search and Mapping Engine ---
def search_and_map(query, search_mode, dataset_choice):
    df = dfs.get(dataset_choice)
    if not query or df is None:
        return pd.DataFrame(), folium.Map(location=[46.2, 2.2], zoom_start=4)._repr_html_()
    
    # Group EB7 and EB9 under the same logic
    is_eb = dataset_choice in ["Encyclopædia Britannica 7th edition", "Encyclopædia Britannica 9th edition"]
    
    # 1. Column management for search
    if is_eb:
        search_col = "texte" if search_mode == "text" else "vedette"
    else:
        search_col = "text" if search_mode == "text" else "headword"

    # Filtering
    mask = df[search_col].str.contains(r'\b' + re.escape(query) + r'\b', case=False, na=False)
    results = df[mask].copy()
    
    m = folium.Map(location=[46.2, 2.2], zoom_start=4)
    bounds = []

    # 2. Map rendering loop
    for _, row in results.iterrows():
        
        # --- BRITANNICA BRANCH (EB7 & EB9) ---
        if is_eb:
            coords_str = row.get('coords', '')
            texte_val = str(row.get('texte', ''))
            
            headword = row.get('vedette', 'Unknown article')
            snippet = (texte_val[:150] + '...') if len(texte_val) > 150 else texte_val

            if isinstance(coords_str, str) and coords_str.strip():
                # Force London meridian for British editions
                lat, lon = parse_coordinate(coords_str, "Londres")
                if lat is not None:
                    popup_html = f"<b>{headword}</b><br><i>Meridian: London (Greenwich)</i><br><br>{snippet}"
                    folium.Marker([lat, lon], popup=popup_html, tooltip=headword).add_to(m)
                    bounds.append([lat, lon])

        # --- ENCYCLOPÉDIE BRANCH (EDDA) ---
        else:
            meridian_list = row.get('meridian', [])
            if isinstance(meridian_list, np.ndarray):
                meridian_list = meridian_list.tolist()

            coords_raw = row.get('coordinates', [])
            if isinstance(coords_raw, (list, np.ndarray)):
                coords_list = [item.tolist() if isinstance(item, np.ndarray) else item for item in coords_raw]
            else:
                continue

            geom_type = classify_geometry(coords_list)
            headword = row.get('headword', 'Unknown')
            
            texte_val = str(row.get('text', ''))
            snippet = (texte_val[:150] + '...') if len(texte_val) > 150 else texte_val

            try:
                if geom_type == "point":
                    current_meridian = get_meridian_safely(meridian_list, 0)
                    lat, lon = parse_coordinate(coords_list[0][0], current_meridian)
                    if lat is not None:
                        popup_html = f"<b>{headword}</b><br><i>Meridian: {current_meridian}</i><br><br>{snippet}"
                        folium.Marker([lat, lon], popup=popup_html, tooltip=headword).add_to(m)
                        bounds.append([lat, lon])

                elif geom_type == "surface":
                    current_meridian = get_meridian_safely(meridian_list, 0)
                    p1 = parse_coordinate(coords_list[0][0], current_meridian)
                    p2 = parse_coordinate(coords_list[0][1], current_meridian)
                    if p1[0] is not None and p2[0] is not None:
                        popup_html = f"<b>{headword}</b> (Area)<br><i>Meridian: {current_meridian}</i>"
                        folium.Rectangle(bounds=[p1, p2], color="orange", fill=True, popup=popup_html).add_to(m)
                        bounds.extend([p1, p2])

                elif geom_type in ["subart", "multsrc", "pchain"]:
                    points = []
                    for i, item in enumerate(coords_list[1:]):
                        c_str = item[0] if isinstance(item, (list, np.ndarray)) else item
                        current_meridian = get_meridian_safely(meridian_list, i)
                        p = parse_coordinate(c_str, current_meridian)
                        if p[0] is not None: 
                            points.append((p[0], p[1], current_meridian))
                    
                    if points:
                        if geom_type == "pchain":
                            coords_only = [[pt[0], pt[1]] for pt in points]
                            popup_html = f"<b>{headword}</b> (Path)<br><i>Meridian: {points[0][2]}</i>"
                            folium.PolyLine(coords_only, color="blue", weight=3, popup=popup_html).add_to(m)
                            bounds.extend(coords_only)
                        else:
                            for pt in points:
                                popup_html = f"<b>{headword}</b><br><i>Meridian: {pt[2]}</i><br><br>{snippet}"
                                folium.Marker([pt[0], pt[1]], icon=folium.Icon(color='green'), popup=popup_html, tooltip=headword).add_to(m)
                            bounds.extend([[pt[0], pt[1]] for pt in points])
            except Exception as e:
                print(f"EDDA rendering error for {headword}: {e}")

    # --- 3. Zoom Logic ---
    if bounds:
        unique_pts = np.unique(bounds, axis=0)
        if len(unique_pts) <= 1:
            m.location = unique_pts[0].tolist()
            m.zoom_start = 5
        else:
            m.fit_bounds(bounds)
    
    # --- 4. Final Dataframe Formatting ---
    if is_eb:
        final_df = results[['vedette', 'coords', 'texte']].head(50).copy()
        final_df['texte'] = final_df['texte'].astype(str).str.slice(0, 280) + '...'
    else:
        final_df = results[['headword', 'coordinates', 'meridian', 'text']].head(50).copy()
        #final_df['coordinates'] = final_df['coordinates'].astype(str)
        final_df['text'] = final_df['text'].astype(str).str.slice(0, 280) + '...'
    
    return final_df, m._repr_html_()

# --- 4. Gradio Interface ---
description = """
# 🌍 Historical Encyclopedias Coordinates Explorer
---

**Disclaimer:** This application is a demonstration prototype currently under development.

This application allows you to explore and compare manually annotated geographical coordinates from several major 18th and 19th-century encyclopedias: 
* The **Encyclopédie** by Diderot and d'Alembert (FR, ~1751): https://huggingface.co/datasets/GEODE/edda-coordinata
* The **Encyclopædia Britannica** 7th edition (EN, ~1842): https://huggingface.co/datasets/pnugues/EB7
* The **Encyclopædia Britannica** 9th edition (EN, ~1875): https://huggingface.co/datasets/pnugues/EB9

Select the dataset, then search for any term within the article's text or its title (headword). The corresponding coordinates will be automatically projected onto the interactive map.

---
"""

with gr.Blocks(title="Historical Encyclopedias Coordinates Explorer") as demo:
    gr.Markdown(description)

    with gr.Row():
        with gr.Column(scale=1):
            dataset_dropdown = gr.Dropdown(
                choices=[
                    "Encyclopédie de Diderot et d'Alembert", 
                    "Encyclopædia Britannica 7th edition",
                    "Encyclopædia Britannica 9th edition"
                ],
                value="Encyclopédie de Diderot et d'Alembert",
                label="Choose Dataset"
            )
            search_input = gr.Textbox(label="Search term", placeholder="E.g., Acapulco, Brest, Berlin...")
            search_mode = gr.Radio(choices=["headword", "text"], value="headword", label="Search in:")
            btn = gr.Button("Search on map", variant="primary")
        
        with gr.Column(scale=2):
            map_output = gr.HTML(label="Map Visualization")

    table_output = gr.Dataframe(label="Results (max 50)", interactive=False, wrap=True)

    inputs = [search_input, search_mode, dataset_dropdown]
    outputs = [table_output, map_output]
    
    btn.click(search_and_map, inputs, outputs)
    search_input.submit(search_and_map, inputs, outputs)

if __name__ == "__main__":
    demo.launch()