Geoeasy commited on
Commit
71f7204
·
verified ·
1 Parent(s): d144c25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -43
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import re
3
  import zipfile
@@ -7,7 +8,7 @@ import osmnx as ox
7
  import folium
8
  import branca
9
  import pandas as pd
10
- import geopandas as gpd # <- útil para alguns ambientes
11
 
12
  # --- Initial settings ---
13
  geolocator = Nominatim(user_agent="gradio_osm_app")
@@ -16,78 +17,101 @@ ox.settings.log_console = False
16
  DOWNLOAD_DIR = "/tmp/download"
17
  os.makedirs(DOWNLOAD_DIR, exist_ok=True)
18
 
19
- def slugify(name):
20
  return re.sub(r"[^0-9A-Za-z]+", "_", name).strip("_")
21
 
22
- # --------- Utilidades para salvar GPKG sem erros ----------
 
 
 
 
 
 
 
 
 
 
 
23
  def _is_complex_obj(v):
24
- # Tipos que o OGR não aceita direto como campo (list, dict, set, tuple, bytes)
25
  return isinstance(v, (list, dict, tuple, set, bytes))
26
 
27
- def clean_for_gpkg(gdf):
28
- """Sanitiza colunas e dtypes para gravação segura em GPKG."""
29
  gdf = gdf.copy()
30
 
31
- # CRS
32
  if getattr(gdf, "crs", None) is None:
33
  try:
34
  gdf.set_crs(4326, inplace=True)
35
  except Exception:
36
  pass
37
 
38
- # Nomes de colunas seguros e não muito longos
39
- gdf.columns = (
40
- pd.Index(gdf.columns)
41
- .map(str)
42
- .str.replace(r"[^0-9a-zA-Z_]", "_", regex=True)
43
- .str.replace(r"_{2,}", "_", regex=True)
44
- .str.strip("_")
45
- .str.slice(0, 60)
46
- )
47
-
48
- # Converter dtypes problemáticos
 
 
 
 
49
  for col in gdf.columns:
50
- if col == gdf.geometry.name:
51
  continue
52
 
53
- # >>> correção: use a tabela de dtypes do DataFrame
54
  dt = gdf.dtypes[col]
 
 
 
 
 
 
 
 
55
 
56
- # Pandas tipo string nativo ajuda a evitar NaN/None
57
- if dt == "object":
58
- # Se houver qualquer valor complexo, vira string integral
59
  if gdf[col].apply(_is_complex_obj).any():
60
  gdf[col] = gdf[col].astype(str)
61
  else:
62
- # Objetos simples (str/num/None) -> string estável
63
  gdf[col] = gdf[col].astype("string")
64
 
65
- # Inteiros nulos (Int64) -> float64 (OGR lida melhor) ou string
 
 
 
 
66
  if pd.api.types.is_integer_dtype(dt) and str(dt).startswith("Int"):
67
  gdf[col] = gdf[col].astype("float64")
68
 
69
- # Booleans com nulos -> string "true/false" para evitar falhas
70
  if pd.api.types.is_bool_dtype(dt):
71
  if gdf[col].isna().any():
72
  gdf[col] = gdf[col].astype("string").fillna("")
73
- else:
74
- pass # bool puro costuma funcionar
75
 
76
- # Preencher NaN em strings
77
  if gdf[col].dtype == "string":
78
  gdf[col] = gdf[col].fillna("")
79
 
 
 
 
80
  return gdf
81
 
82
- def try_to_file(gdf, path, driver="GPKG"):
83
- """Tenta salvar; em caso de erro por campo, remove apenas o campo problemático e tenta novamente."""
84
  try:
85
  gdf.to_file(path, driver=driver)
86
  return
87
  except Exception as e:
88
  msg = str(e)
89
- # Detecta qual campo quebrou (mensagem típica do pyogrio/OGR)
90
- # Ex.: "Error adding field 'fixme' to layer"
91
  m = re.search(r"field '([^']+)'", msg, flags=re.IGNORECASE)
92
  if m:
93
  bad = m.group(1)
@@ -95,12 +119,16 @@ def try_to_file(gdf, path, driver="GPKG"):
95
  gdf2 = gdf.drop(columns=[bad])
96
  gdf2.to_file(path, driver=driver)
97
  return
98
- # Se não identificar, relança
99
  raise
100
 
101
- def ensure_saved(gdf, slug, layer):
102
  if gdf is None or gdf.empty:
103
  return
 
 
 
 
 
104
  filename = f"{slug}_{layer}.gpkg"
105
  path = os.path.join(DOWNLOAD_DIR, filename)
106
  if os.path.exists(path):
@@ -108,24 +136,23 @@ def ensure_saved(gdf, slug, layer):
108
 
109
  gdf_clean = clean_for_gpkg(gdf)
110
 
111
- # Fallback final: se ainda der erro, tenta remover colunas 'problemáticas' comuns em OSM
112
  try:
113
  try_to_file(gdf_clean, path, driver="GPKG")
114
  except Exception:
115
- # Remove colunas muito propensas a erro
116
  drop_candidates = [c for c in gdf_clean.columns if c.lower() in {"fixme", "note", "source_ref"}]
117
  if drop_candidates:
118
  gdf_clean2 = gdf_clean.drop(columns=drop_candidates, errors="ignore")
119
  try_to_file(gdf_clean2, path, driver="GPKG")
120
  else:
121
- # Sem pistas: tenta converter tudo não-geom para string como último recurso
122
  gdf_last = gdf_clean.copy()
123
  for c in gdf_last.columns:
124
  if c != gdf_last.geometry.name:
125
  gdf_last[c] = gdf_last[c].astype(str)
126
  try_to_file(gdf_last, path, driver="GPKG")
127
 
128
- # --------- UI / Lógica principal ----------
129
  def make_legend(selected_layers):
130
  color_map = {
131
  "Highways": "yellow",
@@ -207,7 +234,7 @@ def map_with_layers(place_name, cb_highways, cb_buildings, cb_school,
207
  if flag:
208
  yield None, f"🔄 Downloading {name}..."
209
  gdf2 = ox.features_from_polygon(poly, tags)
210
- # Apenas polígonos para essas camadas
211
  gdf2 = gdf2[gdf2.geometry.type.isin(['Polygon', 'MultiPolygon'])]
212
  if not gdf2.empty:
213
  layers[name] = gdf2
@@ -275,7 +302,7 @@ def download_data(place_name, cb_highways, cb_buildings, cb_school,
275
  print("Erro ao gerar o ZIP:", str(e))
276
  raise gr.Error(f"❌ Download failed: {e}")
277
 
278
- # --- Layout com abas ---
279
  with gr.Blocks(title="Geoeasy View") as demo:
280
  gr.HTML("""
281
  <style>
@@ -335,5 +362,3 @@ with gr.Blocks(title="Geoeasy View") as demo:
335
 
336
  if __name__ == '__main__':
337
  demo.launch()
338
-
339
-
 
1
+ # app.py
2
  import os
3
  import re
4
  import zipfile
 
8
  import folium
9
  import branca
10
  import pandas as pd
11
+ import geopandas as gpd
12
 
13
  # --- Initial settings ---
14
  geolocator = Nominatim(user_agent="gradio_osm_app")
 
17
  DOWNLOAD_DIR = "/tmp/download"
18
  os.makedirs(DOWNLOAD_DIR, exist_ok=True)
19
 
20
+ def slugify(name: str) -> str:
21
  return re.sub(r"[^0-9A-Za-z]+", "_", name).strip("_")
22
 
23
+ # --------- Utils for safe GPKG writing ----------
24
+ def _make_unique(names):
25
+ """Make column names unique by suffixing __1, __2 on duplicates."""
26
+ seen = {}
27
+ out = []
28
+ for n in names:
29
+ n0 = str(n)
30
+ cnt = seen.get(n0, 0)
31
+ out.append(n0 if cnt == 0 else f"{n0}__{cnt}")
32
+ seen[n0] = cnt + 1
33
+ return out
34
+
35
  def _is_complex_obj(v):
36
+ # Types OGR doesn't accept as field values
37
  return isinstance(v, (list, dict, tuple, set, bytes))
38
 
39
+ def clean_for_gpkg(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
40
+ """Sanitize columns and dtypes for safe GPKG write."""
41
  gdf = gdf.copy()
42
 
43
+ # Ensure CRS
44
  if getattr(gdf, "crs", None) is None:
45
  try:
46
  gdf.set_crs(4326, inplace=True)
47
  except Exception:
48
  pass
49
 
50
+ # Normalize column names
51
+ cols = pd.Index(gdf.columns).map(str)
52
+ cols = cols.str.replace(r"[^0-9a-zA-Z_]", "_", regex=True)\
53
+ .str.replace(r"_{2,}", "_", regex=True)\
54
+ .str.strip("_")\
55
+ .str.slice(0, 60)
56
+
57
+ # Guarantee uniqueness (avoids gdf.dtypes[col] returning a Series)
58
+ if cols.duplicated().any() or isinstance(cols, pd.MultiIndex):
59
+ cols = pd.Index(_make_unique(cols))
60
+ gdf.columns = cols
61
+
62
+ geom_name = gdf.geometry.name if hasattr(gdf, "geometry") and gdf.geometry is not None else None
63
+
64
+ # Convert problematic dtypes
65
  for col in gdf.columns:
66
+ if col == geom_name:
67
  continue
68
 
 
69
  dt = gdf.dtypes[col]
70
+ # Defensive: if dt ever comes as Series, collapse to string
71
+ if isinstance(dt, pd.Series):
72
+ gdf[col] = gdf[col].astype("string").fillna("")
73
+ continue
74
+
75
+ # tz-aware datetimes -> naive
76
+ if pd.api.types.is_datetime64tz_dtype(dt):
77
+ gdf[col] = gdf[col].dt.tz_convert(None)
78
 
79
+ # objects -> string (if complex) or pandas string dtype
80
+ if pd.api.types.is_object_dtype(dt):
 
81
  if gdf[col].apply(_is_complex_obj).any():
82
  gdf[col] = gdf[col].astype(str)
83
  else:
 
84
  gdf[col] = gdf[col].astype("string")
85
 
86
+ # categorical -> string
87
+ if pd.api.types.is_categorical_dtype(dt):
88
+ gdf[col] = gdf[col].astype(str)
89
+
90
+ # nullable integer -> float64
91
  if pd.api.types.is_integer_dtype(dt) and str(dt).startswith("Int"):
92
  gdf[col] = gdf[col].astype("float64")
93
 
94
+ # booleans with NaN -> string; pure bool stays
95
  if pd.api.types.is_bool_dtype(dt):
96
  if gdf[col].isna().any():
97
  gdf[col] = gdf[col].astype("string").fillna("")
 
 
98
 
99
+ # fill NaN in pandas string dtype
100
  if gdf[col].dtype == "string":
101
  gdf[col] = gdf[col].fillna("")
102
 
103
+ # Final friendly names
104
+ gdf.rename(columns=lambda c: str(c).strip("_")[:60], inplace=True)
105
+
106
  return gdf
107
 
108
+ def try_to_file(gdf: gpd.GeoDataFrame, path: str, driver: str = "GPKG"):
109
+ """Try to save; on field error, drop only the offending field and retry once."""
110
  try:
111
  gdf.to_file(path, driver=driver)
112
  return
113
  except Exception as e:
114
  msg = str(e)
 
 
115
  m = re.search(r"field '([^']+)'", msg, flags=re.IGNORECASE)
116
  if m:
117
  bad = m.group(1)
 
119
  gdf2 = gdf.drop(columns=[bad])
120
  gdf2.to_file(path, driver=driver)
121
  return
 
122
  raise
123
 
124
+ def ensure_saved(gdf: gpd.GeoDataFrame, slug: str, layer: str):
125
  if gdf is None or gdf.empty:
126
  return
127
+ # Extra guard: unique columns before cleaning/writing
128
+ if pd.Index(gdf.columns).duplicated().any():
129
+ gdf = gdf.copy()
130
+ gdf.columns = pd.Index(_make_unique(pd.Index(gdf.columns).map(str)))
131
+
132
  filename = f"{slug}_{layer}.gpkg"
133
  path = os.path.join(DOWNLOAD_DIR, filename)
134
  if os.path.exists(path):
 
136
 
137
  gdf_clean = clean_for_gpkg(gdf)
138
 
 
139
  try:
140
  try_to_file(gdf_clean, path, driver="GPKG")
141
  except Exception:
142
+ # Drop common problematic OSM fields as a second attempt
143
  drop_candidates = [c for c in gdf_clean.columns if c.lower() in {"fixme", "note", "source_ref"}]
144
  if drop_candidates:
145
  gdf_clean2 = gdf_clean.drop(columns=drop_candidates, errors="ignore")
146
  try_to_file(gdf_clean2, path, driver="GPKG")
147
  else:
148
+ # Last resort: stringify all non-geometry columns
149
  gdf_last = gdf_clean.copy()
150
  for c in gdf_last.columns:
151
  if c != gdf_last.geometry.name:
152
  gdf_last[c] = gdf_last[c].astype(str)
153
  try_to_file(gdf_last, path, driver="GPKG")
154
 
155
+ # --------- UI / main logic ----------
156
  def make_legend(selected_layers):
157
  color_map = {
158
  "Highways": "yellow",
 
234
  if flag:
235
  yield None, f"🔄 Downloading {name}..."
236
  gdf2 = ox.features_from_polygon(poly, tags)
237
+ # Only polygons for these layers
238
  gdf2 = gdf2[gdf2.geometry.type.isin(['Polygon', 'MultiPolygon'])]
239
  if not gdf2.empty:
240
  layers[name] = gdf2
 
302
  print("Erro ao gerar o ZIP:", str(e))
303
  raise gr.Error(f"❌ Download failed: {e}")
304
 
305
+ # --- Layout with tabs ---
306
  with gr.Blocks(title="Geoeasy View") as demo:
307
  gr.HTML("""
308
  <style>
 
362
 
363
  if __name__ == '__main__':
364
  demo.launch()