Ludovic Moncla commited on
Commit
30f0acf
·
1 Parent(s): 950bde0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +257 -0
app.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from datasets import load_dataset
3
+ import pandas as pd
4
+ import re
5
+ import folium
6
+ import numpy as np
7
+
8
+ # --- 1. Initial Data Loading ---
9
+ print("Loading datasets...")
10
+ dfs = {}
11
+
12
+ try:
13
+ print("- Loading EDDA...")
14
+ dfs["Encyclopédie de Diderot et d'Alembert"] = load_dataset("GEODE/edda-coordinata", split="train").to_pandas()
15
+
16
+ print("- Loading EB7...")
17
+ dfs["Encyclopædia Britannica 7th edition"] = load_dataset("pnugues/EB7", split="train").to_pandas()
18
+
19
+ print("- Loading EB9...")
20
+ dfs["Encyclopædia Britannica 9th edition"] = load_dataset("pnugues/EB9", split="train").to_pandas()
21
+
22
+ print("Loading complete!")
23
+ except Exception as e:
24
+ print(f"Error loading datasets: {e}")
25
+
26
+ # --- 2. Utility Functions ---
27
+ def parse_coordinate(coord_str, meridian_name=None):
28
+ if not isinstance(coord_str, str): return None, None
29
+
30
+ pattern = r"(\d+)\s*(?:(\d+)')?\s*(?:(\d+)[\"']{1,2})?\s*([NSEW])"
31
+ matches = re.findall(pattern, coord_str)
32
+
33
+ lat_val, lon_val = None, None
34
+ is_west, is_east = False, False
35
+
36
+ for m in matches:
37
+ deg = float(m[0]) if m[0] else 0
38
+ minute = float(m[1]) if m[1] else 0
39
+ sec = float(m[2]) if m[2] else 0
40
+ val = deg + (minute / 60) + (sec / 3600)
41
+
42
+ direction = m[3]
43
+ if direction in ['N', 'S']:
44
+ lat_val = val if direction == 'N' else -val
45
+ elif direction in ['E', 'W']:
46
+ lon_val = val
47
+ is_west = (direction == 'W')
48
+ is_east = (direction == 'E')
49
+
50
+ if lat_val is not None and lon_val is not None:
51
+ m_name = meridian_name.strip() if isinstance(meridian_name, str) and meridian_name.strip() else "île de Fer"
52
+
53
+ if m_name == "Pékin":
54
+ lon_val = 116.39 + lon_val if is_west else 116.39 - lon_val
55
+ else:
56
+ final_lon = lon_val if is_east else -lon_val
57
+
58
+ if m_name == "Paris":
59
+ lon_val = final_lon + 2.35
60
+ elif m_name == "Lunden":
61
+ lon_val = final_lon + 13.19
62
+ elif m_name in ["Londres", "London"]:
63
+ lon_val = final_lon + 0.0
64
+ elif m_name == "Sydon":
65
+ lon_val = final_lon + 35.37
66
+ else:
67
+ lon_val = final_lon - 17.66
68
+
69
+ return lat_val, lon_val
70
+ return None, None
71
+
72
+ def classify_geometry(x):
73
+ if not isinstance(x, (list, np.ndarray)) or len(x) == 0: return "none"
74
+ if len(x) == 1 and isinstance(x[0], (list, np.ndarray)):
75
+ return "point" if len(x[0]) == 1 else "surface"
76
+ if len(x) > 1 and isinstance(x[0], (list, np.ndarray)) and len(x[0]) == 1:
77
+ if x[0][0] in ['subart', 'multsrc', 'pchain', 'misc']: return x[0][0]
78
+ return "unknown"
79
+
80
+ def get_meridian_safely(meridian_list, index):
81
+ if not isinstance(meridian_list, (list, np.ndarray)):
82
+ return "île de Fer"
83
+ if index < len(meridian_list):
84
+ val = meridian_list[index]
85
+ if isinstance(val, str) and val.strip() != "":
86
+ return val.strip()
87
+ return "île de Fer"
88
+
89
+ # --- 3. Search and Mapping Engine ---
90
+ def search_and_map(query, search_mode, dataset_choice):
91
+ df = dfs.get(dataset_choice)
92
+ if not query or df is None:
93
+ return pd.DataFrame(), folium.Map(location=[46.2, 2.2], zoom_start=4)._repr_html_()
94
+
95
+ # Group EB7 and EB9 under the same logic
96
+ is_eb = dataset_choice in ["Encyclopædia Britannica 7th edition", "Encyclopædia Britannica 9th edition"]
97
+
98
+ # 1. Column management for search
99
+ if is_eb:
100
+ search_col = "texte" if search_mode == "text" else "vedette"
101
+ else:
102
+ search_col = "text" if search_mode == "text" else "headword"
103
+
104
+ # Filtering
105
+ mask = df[search_col].str.contains(query, case=False, na=False)
106
+ results = df[mask].copy()
107
+
108
+ m = folium.Map(location=[46.2, 2.2], zoom_start=4)
109
+ bounds = []
110
+
111
+ # 2. Map rendering loop
112
+ for _, row in results.iterrows():
113
+
114
+ # --- BRITANNICA BRANCH (EB7 & EB9) ---
115
+ if is_eb:
116
+ coords_str = row.get('coords', '')
117
+ texte_val = str(row.get('texte', ''))
118
+
119
+ headword = row.get('vedette', 'Unknown article')
120
+ snippet = (texte_val[:150] + '...') if len(texte_val) > 150 else texte_val
121
+
122
+ if isinstance(coords_str, str) and coords_str.strip():
123
+ # Force London meridian for British editions
124
+ lat, lon = parse_coordinate(coords_str, "Londres")
125
+ if lat is not None:
126
+ popup_html = f"<b>{headword}</b><br><i>Meridian: London (Greenwich)</i><br><br>{snippet}"
127
+ folium.Marker([lat, lon], popup=popup_html, tooltip=headword).add_to(m)
128
+ bounds.append([lat, lon])
129
+
130
+ # --- ENCYCLOPÉDIE BRANCH (EDDA) ---
131
+ else:
132
+ meridian_list = row.get('meridian', [])
133
+ if isinstance(meridian_list, np.ndarray):
134
+ meridian_list = meridian_list.tolist()
135
+
136
+ coords_raw = row.get('coordinates', [])
137
+ if isinstance(coords_raw, (list, np.ndarray)):
138
+ coords_list = [item.tolist() if isinstance(item, np.ndarray) else item for item in coords_raw]
139
+ else:
140
+ continue
141
+
142
+ geom_type = classify_geometry(coords_list)
143
+ headword = row.get('headword', 'Unknown')
144
+
145
+ texte_val = str(row.get('text', ''))
146
+ snippet = (texte_val[:150] + '...') if len(texte_val) > 150 else texte_val
147
+
148
+ try:
149
+ if geom_type == "point":
150
+ current_meridian = get_meridian_safely(meridian_list, 0)
151
+ lat, lon = parse_coordinate(coords_list[0][0], current_meridian)
152
+ if lat is not None:
153
+ popup_html = f"<b>{headword}</b><br><i>Meridian: {current_meridian}</i><br><br>{snippet}"
154
+ folium.Marker([lat, lon], popup=popup_html, tooltip=headword).add_to(m)
155
+ bounds.append([lat, lon])
156
+
157
+ elif geom_type == "surface":
158
+ current_meridian = get_meridian_safely(meridian_list, 0)
159
+ p1 = parse_coordinate(coords_list[0][0], current_meridian)
160
+ p2 = parse_coordinate(coords_list[0][1], current_meridian)
161
+ if p1[0] is not None and p2[0] is not None:
162
+ popup_html = f"<b>{headword}</b> (Area)<br><i>Meridian: {current_meridian}</i>"
163
+ folium.Rectangle(bounds=[p1, p2], color="orange", fill=True, popup=popup_html).add_to(m)
164
+ bounds.extend([p1, p2])
165
+
166
+ elif geom_type in ["subart", "multsrc", "pchain"]:
167
+ points = []
168
+ for i, item in enumerate(coords_list[1:]):
169
+ c_str = item[0] if isinstance(item, (list, np.ndarray)) else item
170
+ current_meridian = get_meridian_safely(meridian_list, i)
171
+ p = parse_coordinate(c_str, current_meridian)
172
+ if p[0] is not None:
173
+ points.append((p[0], p[1], current_meridian))
174
+
175
+ if points:
176
+ if geom_type == "pchain":
177
+ coords_only = [[pt[0], pt[1]] for pt in points]
178
+ popup_html = f"<b>{headword}</b> (Path)<br><i>Meridian: {points[0][2]}</i>"
179
+ folium.PolyLine(coords_only, color="blue", weight=3, popup=popup_html).add_to(m)
180
+ bounds.extend(coords_only)
181
+ else:
182
+ for pt in points:
183
+ popup_html = f"<b>{headword}</b><br><i>Meridian: {pt[2]}</i><br><br>{snippet}"
184
+ folium.Marker([pt[0], pt[1]], icon=folium.Icon(color='green'), popup=popup_html, tooltip=headword).add_to(m)
185
+ bounds.extend([[pt[0], pt[1]] for pt in points])
186
+ except Exception as e:
187
+ print(f"EDDA rendering error for {headword}: {e}")
188
+
189
+ # --- 3. Zoom Logic ---
190
+ if bounds:
191
+ unique_pts = np.unique(bounds, axis=0)
192
+ if len(unique_pts) <= 1:
193
+ m.location = unique_pts[0].tolist()
194
+ m.zoom_start = 5
195
+ else:
196
+ m.fit_bounds(bounds)
197
+
198
+ # --- 4. Final Dataframe Formatting ---
199
+ if is_eb:
200
+ final_df = results[['vedette', 'coords', 'texte']].head(50).copy()
201
+ final_df['texte'] = final_df['texte'].astype(str).str.slice(0, 280) + '...'
202
+ else:
203
+ final_df = results[['headword', 'coordinates', 'meridian', 'text']].head(50).copy()
204
+ final_df['coordinates'] = final_df['coordinates'].astype(str)
205
+ final_df['meridian'] = final_df['meridian'].astype(str)
206
+ final_df['text'] = final_df['text'].astype(str).str.slice(0, 280) + '...'
207
+
208
+ return final_df, m._repr_html_()
209
+
210
+ # --- 4. Gradio Interface ---
211
+ description = """
212
+ # 🌍 Historical Encyclopedias Coordinates Explorer
213
+ ---
214
+
215
+ **Disclaimer:** This application is a demonstration prototype currently under development.
216
+
217
+ This application allows you to explore and compare manually annotated geographical coordinates from several major 18th and 19th-century encyclopedias:
218
+ * The **Encyclopédie** by Diderot and d'Alembert (FR, ~1751): https://huggingface.co/datasets/GEODE/edda-coordinata
219
+ * The **Encyclopædia Britannica** 7th edition (EN, ~1842): https://huggingface.co/datasets/pnugues/EB7
220
+ * The **Encyclopædia Britannica** 9th edition (EN, ~1875): https://huggingface.co/datasets/pnugues/EB9
221
+
222
+ Select the dataset, then search for any term within the article's text or its title (headword). The corresponding coordinates will be automatically projected onto the interactive map.
223
+
224
+ ---
225
+ """
226
+
227
+ with gr.Blocks(title="Historical Encyclopedias Coordinates Explorer") as demo:
228
+ gr.Markdown(description)
229
+
230
+ with gr.Row():
231
+ with gr.Column(scale=1):
232
+ dataset_dropdown = gr.Dropdown(
233
+ choices=[
234
+ "Encyclopédie de Diderot et d'Alembert",
235
+ "Encyclopædia Britannica 7th edition",
236
+ "Encyclopædia Britannica 9th edition"
237
+ ],
238
+ value="Encyclopédie de Diderot et d'Alembert",
239
+ label="Choose Dataset"
240
+ )
241
+ search_input = gr.Textbox(label="Search term", placeholder="E.g., Paris, river, castle...")
242
+ search_mode = gr.Radio(choices=["headword", "text"], value="headword", label="Search in:")
243
+ btn = gr.Button("Search on map", variant="primary")
244
+
245
+ with gr.Column(scale=2):
246
+ map_output = gr.HTML(label="Map Visualization")
247
+
248
+ table_output = gr.Dataframe(label="Results (max 50)", interactive=False, wrap=True)
249
+
250
+ inputs = [search_input, search_mode, dataset_dropdown]
251
+ outputs = [table_output, map_output]
252
+
253
+ btn.click(search_and_map, inputs, outputs)
254
+ search_input.submit(search_and_map, inputs, outputs)
255
+
256
+ if __name__ == "__main__":
257
+ demo.launch()