hiroki0008 commited on
Commit
5db2c30
·
verified ·
1 Parent(s): ceee7a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -268
app.py CHANGED
@@ -1,17 +1,6 @@
1
  # app.py
2
- # HF Spacesで:
3
- # 1) japan_ver85.shp (ZIP) を読み込み
4
- # 2) Excelの住所列をNominatimで緯度経度化(キャッシュあり/オフライン可)
5
- # 3) 日本地図(境界線)にポイントを重ねて描画(静的PNG & folium HTML)
6
- #
7
- # 使い方:
8
- # - Shapefile ZIP と Excel をアップロード
9
- # - シート名/ヘッダー行/列指定を入力して「描画」
10
- # - 注意: Nominatimは利用規約順守。user_agentに連絡先の設定推奨(スペースのシークレット等)。
11
-
12
  import os
13
  import io
14
- import time
15
  import pandas as pd
16
  import numpy as np
17
  import geopandas as gpd
@@ -23,23 +12,16 @@ import folium
23
  import gradio as gr
24
  from PIL import Image
25
 
26
- # ----------------------------
27
- # 設定
28
- # ----------------------------
29
  USER_AGENT = os.environ.get(
30
  "NOMINATIM_USER_AGENT",
31
- "jp-geocoding-demo (contact: your_email@example.com)" # ← 必ず連絡先付きに変更推奨
32
  )
33
- GEOCODE_DELAY_SEC = 1.0 # Nominatimへの配慮: 1秒間隔
34
  CACHE_DIR = "data/cache"
35
  os.makedirs(CACHE_DIR, exist_ok=True)
36
  CACHE_PATH = os.path.join(CACHE_DIR, "geocode_cache.csv")
 
37
 
38
- DEFAULT_ZIP = "data/japan_ver85.zip" # リポジトリに置いた場合に使う
39
-
40
- # ----------------------------
41
- # キャッシュ読み書き
42
- # ----------------------------
43
  def load_cache():
44
  if os.path.exists(CACHE_PATH):
45
  try:
@@ -54,13 +36,9 @@ def save_cache(df_cache):
54
  try:
55
  df_cache.to_csv(CACHE_PATH, index=False)
56
  except Exception:
57
- pass # 読み取り専用環境などではスキップ
58
 
59
- # ----------------------------
60
- # Shapefile ZIP → GeoDataFrame
61
- # ----------------------------
62
  def load_gdf_from_zip(zip_path: str) -> gpd.GeoDataFrame:
63
- # pyogrioが入っていれば engine="pyogrio" を付けると高速
64
  gdf = gpd.read_file(f"zip://{zip_path}") # , engine="pyogrio"
65
  try:
66
  if gdf.crs:
@@ -69,247 +47,6 @@ def load_gdf_from_zip(zip_path: str) -> gpd.GeoDataFrame:
69
  pass
70
  return gdf
71
 
72
- # ----------------------------
73
- # ジオコーダ
74
- # ----------------------------
75
  def make_geocoder():
76
  geolocator = Nominatim(user_agent=USER_AGENT, timeout=10)
77
- geocode = RateLimiter(geolocator.geocode, min_delay_seconds=GEOCODE_DELAY_SEC)
78
- return geocode
79
-
80
- def geocode_with_cache(addresses, CFs, use_internet=True):
81
- cache = load_cache()
82
- cache_map = {row["address_input"]: (row["lat"], row["lon"], row["CF"]) for _, row in cache.iterrows()}
83
- results = []
84
-
85
- geocode = make_geocoder() if use_internet else None
86
-
87
- for a, cf in zip(addresses, CFs):
88
- a = str(a)
89
- cf = str(cf) if (cf is not None and not (isinstance(cf, float) and np.isnan(cf))) else ""
90
- # キャッシュヒット
91
- if a in cache_map:
92
- lat, lon, _cached_cf = cache_map[a]
93
- if pd.notna(lat) and pd.notna(lon):
94
- results.append({"address_input": a, "CF": cf, "lat": lat, "lon": lon})
95
- continue
96
-
97
- if not use_internet:
98
- results.append({"address_input": a, "CF": cf, "lat": np.nan, "lon": np.nan})
99
- continue
100
-
101
- # API呼び出し(RateLimiterで待機)
102
- try:
103
- loc = geocode(a, country_codes="jp", addressdetails=True)
104
- if loc:
105
- lat, lon = loc.latitude, loc.longitude
106
- else:
107
- lat, lon = np.nan, np.nan
108
- except Exception:
109
- lat, lon = np.nan, np.nan
110
-
111
- # キャッシュ更新
112
- cache = cache[cache["address_input"] != a]
113
- cache = pd.concat(
114
- [cache, pd.DataFrame([{"address_input": a, "lat": lat, "lon": lon, "CF": cf}])],
115
- ignore_index=True
116
- )
117
- save_cache(cache)
118
-
119
- results.append({"address_input": a, "CF": cf, "lat": lat, "lon": lon})
120
-
121
- return pd.DataFrame(results)
122
-
123
- # ----------------------------
124
- # 可視化(matplotlib)
125
- # ----------------------------
126
- def plot_map_png(gdf_pref: gpd.GeoDataFrame, gdf_pts: gpd.GeoDataFrame,
127
- line_width: float = 0.6, figsize=(7, 7)) -> Image.Image:
128
- fig, ax = plt.subplots(figsize=figsize)
129
- gdf_pref.boundary.plot(ax=ax, linewidth=line_width, color="black")
130
- # 有効な点のみ
131
- gdf_pts_valid = gdf_pts[gdf_pts.geometry.notnull()]
132
- if not gdf_pts_valid.empty:
133
- # CF列(任意)を数値化してカラーマップに使用。無ければ等色。
134
- cf_num = pd.to_numeric(gdf_pts_valid.get("CF", pd.Series([np.nan]*len(gdf_pts_valid))), errors="coerce")
135
- # markersizeは見やすいよう固定(必要ならスライダで可変化も可)
136
- gdf_pts_valid.assign(CF_num=cf_num).plot(
137
- ax=ax, column="CF_num", cmap="OrRd", markersize=12, alpha=0.85, legend=True
138
- )
139
- ax.set_axis_off()
140
- plt.tight_layout()
141
- buf = io.BytesIO()
142
- fig.savefig(buf, format="png", dpi=200)
143
- plt.close(fig)
144
- buf.seek(0)
145
- return Image.open(buf)
146
-
147
- # ----------------------------
148
- # 可視化(folium)
149
- # ----------------------------
150
- def make_folium_html(gdf_pref: gpd.GeoDataFrame, gdf_pts: gpd.GeoDataFrame):
151
- # 中心位置(ポイントがあれば中央値、なければ東京駅付近)
152
- gdf_pts_valid = gdf_pts[gdf_pts.geometry.notnull()]
153
- if not gdf_pts_valid.empty:
154
- center_lat = gdf_pts_valid.geometry.y.median()
155
- center_lon = gdf_pts_valid.geometry.x.median()
156
- zoom = 6
157
- else:
158
- center_lat, center_lon, zoom = 35.6812, 139.7671, 5
159
-
160
- m = folium.Map(location=[center_lat, center_lon], zoom_start=zoom)
161
-
162
- # 境界線(軽量化のため boundary のみ簡易表示)
163
- try:
164
- folium.GeoJson(gdf_pref.to_json(), name="prefecture").add_to(m)
165
- except Exception:
166
- pass
167
-
168
- for _, r in gdf_pts_valid.iterrows():
169
- lat, lon = r.geometry.y, r.geometry.x
170
- popup = f"{r.get('address_input','(no addr)')}<br>CF:{r.get('CF','')}"
171
- folium.CircleMarker(
172
- location=(float(lat), float(lon)),
173
- radius=4,
174
- fill=True,
175
- fill_opacity=0.9,
176
- popup=popup,
177
- ).add_to(m)
178
-
179
- return m._repr_html_()
180
-
181
- # ----------------------------
182
- # パイプライン
183
- # ----------------------------
184
- def _coerce_indexer(x):
185
- # 列名/数字の両対応
186
- try:
187
- return int(x)
188
- except Exception:
189
- return x
190
-
191
- def run(zip_file, excel_file, sheet_name, header_row, address_col, power_col,
192
- use_inet, line_width):
193
- # 1) Shapefile ZIP の決定
194
- if zip_file is not None and hasattr(zip_file, "name") and os.path.exists(zip_file.name):
195
- zip_path = zip_file.name
196
- elif os.path.exists(DEFAULT_ZIP):
197
- zip_path = DEFAULT_ZIP
198
- else:
199
- return None, None, "Shapefile の ZIP をアップロードするか、data/japan_ver85.zip を配置してください。"
200
-
201
- # 2) 行政界読み込み
202
- try:
203
- gdf_pref = load_gdf_from_zip(zip_path)
204
- except Exception as e:
205
- return None, None, f"行政界の読み込みに失敗しました: {e}"
206
-
207
- # 3) Excel 読み込み & 列抽出
208
- if excel_file is None or not hasattr(excel_file, "name"):
209
- # 住所点なしでも地図だけ返せるようにする(要件に合わせてここでエラーにしてもOK)
210
- gdf_pts = gpd.GeoDataFrame(columns=["address_input", "CF", "lat", "lon"], geometry=[], crs="EPSG:4326")
211
- else:
212
- try:
213
- df = pd.read_excel(excel_file.name, sheet_name=sheet_name, header=int(header_row))
214
- except Exception as e:
215
- return None, None, f"Excel の読み込みに失敗しました: {e}"
216
-
217
- addr_series = df.iloc[:, address_col] if isinstance(address_col, int) else df[address_col]
218
- cf_series = df.iloc[:, power_col] if isinstance(power_col, int) else df[power_col]
219
-
220
- addresses = addr_series.astype(str).tolist()
221
- cfs = cf_series.tolist()
222
-
223
- # 4) ジオコーディング(キャッシュ活用)
224
- geo_df = geocode_with_cache(addresses, cfs, use_internet=bool(use_inet))
225
-
226
- # 5) GeoDataFrame 化
227
- geometry = [
228
- Point(lon, lat) if (pd.notna(lat) and pd.notna(lon)) else None
229
- for lat, lon in zip(geo_df["lat"], geo_df["lon"])
230
- ]
231
- gdf_pts = gpd.GeoDataFrame(geo_df, geometry=geometry, crs="EPSG:4326")
232
-
233
- # 6) 可視化(matplotlib)
234
- try:
235
- img = plot_map_png(gdf_pref, gdf_pts, line_width=line_width)
236
- except Exception as e:
237
- return None, None, f"静的描画に失敗しました: {e}"
238
-
239
- # 7) 可視化(folium)
240
- try:
241
- html = make_folium_html(gdf_pref, gdf_pts)
242
- except Exception as e:
243
- html = f"<p>folium描画に失敗しました: {e}</p>"
244
-
245
- # 8) 情報メモ
246
- info = []
247
- info.append(f"都道府県レコード数: {len(gdf_pref)}")
248
- if gdf_pref.crs:
249
- info.append(f"PREF CRS: {gdf_pref.crs}")
250
- info.append(f"ポイント数(有効座標): {int(gdf_pts.geometry.notnull().sum())} / {len(gdf_pts)}")
251
- if not gdf_pts.empty and gdf_pts.crs:
252
- info.append(f"PTS CRS: {gdf_pts.crs}")
253
-
254
- return img, html, "\n".join(info)
255
-
256
- # ----------------------------
257
- # Gradio UI
258
- # ----------------------------
259
- with gr.Blocks(title="Japan Shapefile + Excel Geocoding Plotter") as demo:
260
- gr.Markdown("## japan_ver85.shp(ZIP) + Excel住所 → 日本地図にプロット")
261
- gr.Markdown(
262
- "- **Shapefile ZIP**(`.shp/.shx/.dbf/.prj` など同梱)をアップロードしてください。\n"
263
- "- **Excel** は住所列と数値列(任意: CF/出力など)を指定してください。\n"
264
- "- Nominatim への問い合わせはレート制限済み。通信が難しい環境では「通信オフ(キャッシュのみ)」で実行できます。"
265
- )
266
-
267
- with gr.Row():
268
- zip_in = gr.File(label="Shapefile (ZIP)", file_count="single", file_types=[".zip"])
269
- xlsx_in = gr.File(label="Excelファイル(住所付き)", file_count="single", file_types=[".xlsx", ".xls"])
270
-
271
- with gr.Row():
272
- sheet = gr.Textbox(label="シート名", value="認定設備")
273
- header_row = gr.Number(label="ヘッダー行番号(0始まり)", value=2, precision=0)
274
-
275
- with gr.Row():
276
- address_col = gr.Textbox(label="住所列(列名 or 0始まり列番号)", value="発電設備の所在地")
277
- power_col = gr.Textbox(label="数値列(任意:列名 or 0始まり列番号)", value="発電出力(kW)")
278
-
279
- with gr.Row():
280
- use_inet = gr.Checkbox(label="Nominatimに問い合わせ(オフでキャッシュのみ使用)", value=True)
281
- line_width = gr.Slider(0.2, 2.0, value=0.6, step=0.1, label="境界線の太さ")
282
-
283
- run_btn = gr.Button("描画")
284
-
285
- out_img = gr.Image(label="静的地図(matplotlib)", type="pil")
286
- out_html = gr.HTML(label="インタラクティブ地図(folium)")
287
- out_txt = gr.Textbox(label="メタ情報", lines=4)
288
-
289
- def _parse_indexer(x):
290
- # 前端UIの入力を列名/番号に解釈
291
- try:
292
- return int(x)
293
- except Exception:
294
- return x
295
-
296
- def app_run(zipf, xls, s, h, a, p, inet, lw):
297
- return run(
298
- zipf,
299
- xls,
300
- s,
301
- int(h),
302
- _parse_indexer(a),
303
- _parse_indexer(p),
304
- inet,
305
- lw
306
- )
307
-
308
- run_btn.click(
309
- fn=app_run,
310
- inputs=[zip_in, xlsx_in, sheet, header_row, address_col, power_col, use_inet, line_width],
311
- outputs=[out_img, out_html, out_txt],
312
- )
313
-
314
- if __name__ == "__main__":
315
- demo.launch()
 
1
  # app.py
 
 
 
 
 
 
 
 
 
 
2
  import os
3
  import io
 
4
  import pandas as pd
5
  import numpy as np
6
  import geopandas as gpd
 
12
  import gradio as gr
13
  from PIL import Image
14
 
 
 
 
15
  USER_AGENT = os.environ.get(
16
  "NOMINATIM_USER_AGENT",
17
+ "jp-geocoding-demo (contact: your_email@example.com)"
18
  )
19
+ GEOCODE_DELAY_SEC = 1.0
20
  CACHE_DIR = "data/cache"
21
  os.makedirs(CACHE_DIR, exist_ok=True)
22
  CACHE_PATH = os.path.join(CACHE_DIR, "geocode_cache.csv")
23
+ DEFAULT_ZIP = "data/japan_ver85.zip"
24
 
 
 
 
 
 
25
  def load_cache():
26
  if os.path.exists(CACHE_PATH):
27
  try:
 
36
  try:
37
  df_cache.to_csv(CACHE_PATH, index=False)
38
  except Exception:
39
+ pass
40
 
 
 
 
41
  def load_gdf_from_zip(zip_path: str) -> gpd.GeoDataFrame:
 
42
  gdf = gpd.read_file(f"zip://{zip_path}") # , engine="pyogrio"
43
  try:
44
  if gdf.crs:
 
47
  pass
48
  return gdf
49
 
 
 
 
50
  def make_geocoder():
51
  geolocator = Nominatim(user_agent=USER_AGENT, timeout=10)
52
+ geocode = RateL