DavMelchi commited on
Commit
31bc0df
·
1 Parent(s): 5065564

Add points KML export to FN4B parser

Browse files
Files changed (1) hide show
  1. apps/fnb_parser.py +81 -8
apps/fnb_parser.py CHANGED
@@ -7,10 +7,12 @@ upload one or many Word documents and instantly visualise / download the results
7
  import io
8
  import os
9
  import re
 
10
  from typing import List
11
 
12
  import pandas as pd
13
  import plotly.express as px
 
14
  import streamlit as st
15
  from docx import Document
16
  from streamlit.commands.execution_control import rerun
@@ -237,6 +239,61 @@ def process_files_to_dataframe(uploaded_files) -> pd.DataFrame:
237
  return df
238
 
239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  ###############################################################################
241
  # ----------------------------- Streamlit UI ------------------------------ #
242
  ###############################################################################
@@ -287,14 +344,30 @@ def main() -> None:
287
  buffer = io.BytesIO()
288
  with pd.ExcelWriter(buffer, engine="xlsxwriter") as writer:
289
  df.to_excel(writer, index=False, sheet_name="Extract")
290
- st.download_button(
291
- label="Download Excel",
292
- data=buffer.getvalue(),
293
- file_name="extracted_fnb.xlsx",
294
- mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
295
- on_click="ignore",
296
- type="primary",
297
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
 
299
  st.markdown("---")
300
 
 
7
  import io
8
  import os
9
  import re
10
+ from datetime import datetime
11
  from typing import List
12
 
13
  import pandas as pd
14
  import plotly.express as px
15
+ import simplekml
16
  import streamlit as st
17
  from docx import Document
18
  from streamlit.commands.execution_control import rerun
 
239
  return df
240
 
241
 
242
+ def generate_points_kml(df: pd.DataFrame) -> io.BytesIO | None:
243
+ """Build a point-only KML file from extracted site coordinates."""
244
+ required_columns = {"X_decimal", "Y_decimal"}
245
+ if not required_columns.issubset(df.columns):
246
+ return None
247
+
248
+ metadata_columns = [
249
+ "Code",
250
+ "Site Name",
251
+ "Localité",
252
+ "Adresse",
253
+ "File",
254
+ "X_decimal",
255
+ "Y_decimal",
256
+ ]
257
+ available_columns = [col for col in metadata_columns if col in df.columns]
258
+ points_df = df[available_columns].dropna(subset=["X_decimal", "Y_decimal"])
259
+ if points_df.empty:
260
+ return None
261
+
262
+ dedupe_keys = [col for col in ["Code", "X_decimal", "Y_decimal"] if col in points_df.columns]
263
+ if dedupe_keys:
264
+ points_df = points_df.drop_duplicates(subset=dedupe_keys)
265
+
266
+ kml = simplekml.Kml()
267
+ for _, row in points_df.iterrows():
268
+ point_name = row.get("Code") or row.get("Site Name") or "Site"
269
+ point = kml.newpoint(
270
+ name=str(point_name),
271
+ coords=[(float(row["Y_decimal"]), float(row["X_decimal"]))],
272
+ )
273
+ point.style.iconstyle.icon.href = (
274
+ "http://maps.google.com/mapfiles/kml/pushpin/ylw-pushpin.png"
275
+ )
276
+ point.style.iconstyle.scale = 1.1
277
+
278
+ description_parts = []
279
+ for label, column in [
280
+ ("Code", "Code"),
281
+ ("Site Name", "Site Name"),
282
+ ("Localité", "Localité"),
283
+ ("Adresse", "Adresse"),
284
+ ("Source File", "File"),
285
+ ]:
286
+ value = row.get(column)
287
+ if pd.notna(value) and str(value).strip():
288
+ description_parts.append(f"{label}: {value}")
289
+ point.description = "<br>".join(description_parts)
290
+
291
+ kml_data = io.BytesIO()
292
+ kml_data.write(kml.kml().encode("utf-8"))
293
+ kml_data.seek(0)
294
+ return kml_data
295
+
296
+
297
  ###############################################################################
298
  # ----------------------------- Streamlit UI ------------------------------ #
299
  ###############################################################################
 
344
  buffer = io.BytesIO()
345
  with pd.ExcelWriter(buffer, engine="xlsxwriter") as writer:
346
  df.to_excel(writer, index=False, sheet_name="Extract")
347
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
348
+ excel_col, kml_col = st.columns(2)
349
+ with excel_col:
350
+ st.download_button(
351
+ label="Download Excel",
352
+ data=buffer.getvalue(),
353
+ file_name=f"extracted_fnb_{timestamp}.xlsx",
354
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
355
+ on_click="ignore",
356
+ type="primary",
357
+ use_container_width=True,
358
+ )
359
+
360
+ points_kml_data = generate_points_kml(df)
361
+ if points_kml_data is not None:
362
+ with kml_col:
363
+ st.download_button(
364
+ label="Download Points KML",
365
+ data=points_kml_data.getvalue(),
366
+ file_name=f"fnb_points_{timestamp}.kml",
367
+ mime="application/vnd.google-earth.kml+xml",
368
+ on_click="ignore",
369
+ use_container_width=True,
370
+ )
371
 
372
  st.markdown("---")
373