Add points KML export to FN4B parser
Browse files- apps/fnb_parser.py +81 -8
apps/fnb_parser.py
CHANGED
|
@@ -7,10 +7,12 @@ upload one or many Word documents and instantly visualise / download the results
|
|
| 7 |
import io
|
| 8 |
import os
|
| 9 |
import re
|
|
|
|
| 10 |
from typing import List
|
| 11 |
|
| 12 |
import pandas as pd
|
| 13 |
import plotly.express as px
|
|
|
|
| 14 |
import streamlit as st
|
| 15 |
from docx import Document
|
| 16 |
from streamlit.commands.execution_control import rerun
|
|
@@ -237,6 +239,61 @@ def process_files_to_dataframe(uploaded_files) -> pd.DataFrame:
|
|
| 237 |
return df
|
| 238 |
|
| 239 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
###############################################################################
|
| 241 |
# ----------------------------- Streamlit UI ------------------------------ #
|
| 242 |
###############################################################################
|
|
@@ -287,14 +344,30 @@ def main() -> None:
|
|
| 287 |
buffer = io.BytesIO()
|
| 288 |
with pd.ExcelWriter(buffer, engine="xlsxwriter") as writer:
|
| 289 |
df.to_excel(writer, index=False, sheet_name="Extract")
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
|
| 299 |
st.markdown("---")
|
| 300 |
|
|
|
|
| 7 |
import io
|
| 8 |
import os
|
| 9 |
import re
|
| 10 |
+
from datetime import datetime
|
| 11 |
from typing import List
|
| 12 |
|
| 13 |
import pandas as pd
|
| 14 |
import plotly.express as px
|
| 15 |
+
import simplekml
|
| 16 |
import streamlit as st
|
| 17 |
from docx import Document
|
| 18 |
from streamlit.commands.execution_control import rerun
|
|
|
|
| 239 |
return df
|
| 240 |
|
| 241 |
|
| 242 |
+
def generate_points_kml(df: pd.DataFrame) -> io.BytesIO | None:
|
| 243 |
+
"""Build a point-only KML file from extracted site coordinates."""
|
| 244 |
+
required_columns = {"X_decimal", "Y_decimal"}
|
| 245 |
+
if not required_columns.issubset(df.columns):
|
| 246 |
+
return None
|
| 247 |
+
|
| 248 |
+
metadata_columns = [
|
| 249 |
+
"Code",
|
| 250 |
+
"Site Name",
|
| 251 |
+
"Localité",
|
| 252 |
+
"Adresse",
|
| 253 |
+
"File",
|
| 254 |
+
"X_decimal",
|
| 255 |
+
"Y_decimal",
|
| 256 |
+
]
|
| 257 |
+
available_columns = [col for col in metadata_columns if col in df.columns]
|
| 258 |
+
points_df = df[available_columns].dropna(subset=["X_decimal", "Y_decimal"])
|
| 259 |
+
if points_df.empty:
|
| 260 |
+
return None
|
| 261 |
+
|
| 262 |
+
dedupe_keys = [col for col in ["Code", "X_decimal", "Y_decimal"] if col in points_df.columns]
|
| 263 |
+
if dedupe_keys:
|
| 264 |
+
points_df = points_df.drop_duplicates(subset=dedupe_keys)
|
| 265 |
+
|
| 266 |
+
kml = simplekml.Kml()
|
| 267 |
+
for _, row in points_df.iterrows():
|
| 268 |
+
point_name = row.get("Code") or row.get("Site Name") or "Site"
|
| 269 |
+
point = kml.newpoint(
|
| 270 |
+
name=str(point_name),
|
| 271 |
+
coords=[(float(row["Y_decimal"]), float(row["X_decimal"]))],
|
| 272 |
+
)
|
| 273 |
+
point.style.iconstyle.icon.href = (
|
| 274 |
+
"http://maps.google.com/mapfiles/kml/pushpin/ylw-pushpin.png"
|
| 275 |
+
)
|
| 276 |
+
point.style.iconstyle.scale = 1.1
|
| 277 |
+
|
| 278 |
+
description_parts = []
|
| 279 |
+
for label, column in [
|
| 280 |
+
("Code", "Code"),
|
| 281 |
+
("Site Name", "Site Name"),
|
| 282 |
+
("Localité", "Localité"),
|
| 283 |
+
("Adresse", "Adresse"),
|
| 284 |
+
("Source File", "File"),
|
| 285 |
+
]:
|
| 286 |
+
value = row.get(column)
|
| 287 |
+
if pd.notna(value) and str(value).strip():
|
| 288 |
+
description_parts.append(f"{label}: {value}")
|
| 289 |
+
point.description = "<br>".join(description_parts)
|
| 290 |
+
|
| 291 |
+
kml_data = io.BytesIO()
|
| 292 |
+
kml_data.write(kml.kml().encode("utf-8"))
|
| 293 |
+
kml_data.seek(0)
|
| 294 |
+
return kml_data
|
| 295 |
+
|
| 296 |
+
|
| 297 |
###############################################################################
|
| 298 |
# ----------------------------- Streamlit UI ------------------------------ #
|
| 299 |
###############################################################################
|
|
|
|
| 344 |
buffer = io.BytesIO()
|
| 345 |
with pd.ExcelWriter(buffer, engine="xlsxwriter") as writer:
|
| 346 |
df.to_excel(writer, index=False, sheet_name="Extract")
|
| 347 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 348 |
+
excel_col, kml_col = st.columns(2)
|
| 349 |
+
with excel_col:
|
| 350 |
+
st.download_button(
|
| 351 |
+
label="Download Excel",
|
| 352 |
+
data=buffer.getvalue(),
|
| 353 |
+
file_name=f"extracted_fnb_{timestamp}.xlsx",
|
| 354 |
+
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
| 355 |
+
on_click="ignore",
|
| 356 |
+
type="primary",
|
| 357 |
+
use_container_width=True,
|
| 358 |
+
)
|
| 359 |
+
|
| 360 |
+
points_kml_data = generate_points_kml(df)
|
| 361 |
+
if points_kml_data is not None:
|
| 362 |
+
with kml_col:
|
| 363 |
+
st.download_button(
|
| 364 |
+
label="Download Points KML",
|
| 365 |
+
data=points_kml_data.getvalue(),
|
| 366 |
+
file_name=f"fnb_points_{timestamp}.kml",
|
| 367 |
+
mime="application/vnd.google-earth.kml+xml",
|
| 368 |
+
on_click="ignore",
|
| 369 |
+
use_container_width=True,
|
| 370 |
+
)
|
| 371 |
|
| 372 |
st.markdown("---")
|
| 373 |
|