hiroki0008 commited on
Commit
ceee7a4
·
verified ·
1 Parent(s): 99d5f70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +265 -29
app.py CHANGED
@@ -1,14 +1,66 @@
1
  # app.py
 
 
 
 
 
 
 
 
 
 
2
  import os
3
  import io
 
 
 
4
  import geopandas as gpd
5
  import matplotlib.pyplot as plt
 
 
 
 
6
  import gradio as gr
7
- from PIL import Image # ★ 追加
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- DEFAULT_ZIP = "data/japan_ver85.zip"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
 
 
 
11
  def load_gdf_from_zip(zip_path: str) -> gpd.GeoDataFrame:
 
12
  gdf = gpd.read_file(f"zip://{zip_path}") # , engine="pyogrio"
13
  try:
14
  if gdf.crs:
@@ -17,63 +69,247 @@ def load_gdf_from_zip(zip_path: str) -> gpd.GeoDataFrame:
17
  pass
18
  return gdf
19
 
20
- def plot_boundary(gdf: gpd.GeoDataFrame, linewidth: float = 0.6, figsize=(7,7)):
21
- """
22
- GeoDataFrame の境界線のみを静的にプロットし、PIL.Image を返す
23
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  fig, ax = plt.subplots(figsize=figsize)
25
- gdf.boundary.plot(ax=ax, linewidth=linewidth)
 
 
 
 
 
 
 
 
 
26
  ax.set_axis_off()
27
  plt.tight_layout()
28
-
29
  buf = io.BytesIO()
30
  fig.savefig(buf, format="png", dpi=200)
31
  plt.close(fig)
32
  buf.seek(0)
33
- img = Image.open(buf) # ★ BytesIO → PIL.Image に変換
34
- return img
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
- def run(zip_file, line_width, width_px):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  if zip_file is not None and hasattr(zip_file, "name") and os.path.exists(zip_file.name):
38
  zip_path = zip_file.name
39
  elif os.path.exists(DEFAULT_ZIP):
40
  zip_path = DEFAULT_ZIP
41
  else:
42
- return None, "Shapefile の ZIP をアップロードするか、data/japan_ver85.zip を配置してください。"
43
 
 
44
  try:
45
- gdf = load_gdf_from_zip(zip_path)
46
  except Exception as e:
47
- return None, f"読み込みに失敗しました: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
 
 
 
 
 
 
 
 
 
 
 
49
  try:
50
- # width_px PNG の実寸に直接効かないので、必要なら figsize/dpi を調整してください
51
- img = plot_boundary(gdf, linewidth=line_width)
52
  except Exception as e:
53
- return None, f"描画に失敗しました: {e}"
54
 
 
 
 
 
 
 
 
55
  info = []
56
- info.append(f"レコード数: {len(gdf)}")
57
- if gdf.crs:
58
- info.append(f"CRS: {gdf.crs}")
59
- cols = list(map(str, gdf.columns))
60
- info.append("列名(先頭10): " + ", ".join(cols[:10]))
61
- return img, "\n".join(info)
 
 
62
 
63
- with gr.Blocks(title="Japan Shapefile Plotter") as demo:
64
- gr.Markdown("## japan_ver85.shp(ZIP)を単純にプロット")
 
 
 
 
 
 
 
 
65
 
66
  with gr.Row():
67
  zip_in = gr.File(label="Shapefile (ZIP)", file_count="single", file_types=[".zip"])
 
 
 
 
 
 
68
  with gr.Row():
69
- line_width = gr.Slider(0.2, 2.0, value=0.6, step=0.1, label="線の太さ")
70
- width_px = gr.Slider(400, 1600, value=900, step=50, label="画像幅(px)(注: PNGは実質dpi依存)")
 
 
 
 
71
 
72
  run_btn = gr.Button("描画")
73
- out_img = gr.Image(label="静的プロット(境界線)", type="pil") # ★ PIL を受け取る
 
 
74
  out_txt = gr.Textbox(label="メタ情報", lines=4)
75
 
76
- run_btn.click(fn=run, inputs=[zip_in, line_width, width_px], outputs=[out_img, out_txt])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  if __name__ == "__main__":
79
  demo.launch()
 
1
  # app.py
2
+ # HF Spacesで:
3
+ # 1) japan_ver85.shp (ZIP) を読み込み
4
+ # 2) Excelの住所列をNominatimで緯度経度化(キャッシュあり/オフライン可)
5
+ # 3) 日本地図(境界線)にポイントを重ねて描画(静的PNG & folium HTML)
6
+ #
7
+ # 使い方:
8
+ # - Shapefile ZIP と Excel をアップロード
9
+ # - シート名/ヘッダー行/列指定を入力して「描画」
10
+ # - 注意: Nominatimは利用規約順守。user_agentに連絡先の設定推奨(スペースのシークレット等)。
11
+
12
  import os
13
  import io
14
+ import time
15
+ import pandas as pd
16
+ import numpy as np
17
  import geopandas as gpd
18
  import matplotlib.pyplot as plt
19
+ from shapely.geometry import Point
20
+ from geopy.geocoders import Nominatim
21
+ from geopy.extra.rate_limiter import RateLimiter
22
+ import folium
23
  import gradio as gr
24
+ from PIL import Image
25
+
26
+ # ----------------------------
27
+ # 設定
28
+ # ----------------------------
29
+ USER_AGENT = os.environ.get(
30
+ "NOMINATIM_USER_AGENT",
31
+ "jp-geocoding-demo (contact: your_email@example.com)" # ← 必ず連絡先付きに変更推奨
32
+ )
33
+ GEOCODE_DELAY_SEC = 1.0 # Nominatimへの配慮: 1秒間隔
34
+ CACHE_DIR = "data/cache"
35
+ os.makedirs(CACHE_DIR, exist_ok=True)
36
+ CACHE_PATH = os.path.join(CACHE_DIR, "geocode_cache.csv")
37
+
38
+ DEFAULT_ZIP = "data/japan_ver85.zip" # リポジトリに置いた場合に使う
39
 
40
+ # ----------------------------
41
+ # キャッシュ読み書き
42
+ # ----------------------------
43
+ def load_cache():
44
+ if os.path.exists(CACHE_PATH):
45
+ try:
46
+ df = pd.read_csv(CACHE_PATH)
47
+ if set(["address_input", "lat", "lon", "CF"]).issubset(df.columns):
48
+ return df
49
+ except Exception:
50
+ pass
51
+ return pd.DataFrame(columns=["address_input", "lat", "lon", "CF"])
52
+
53
+ def save_cache(df_cache):
54
+ try:
55
+ df_cache.to_csv(CACHE_PATH, index=False)
56
+ except Exception:
57
+ pass # 読み取り専用環境などではスキップ
58
 
59
+ # ----------------------------
60
+ # Shapefile ZIP → GeoDataFrame
61
+ # ----------------------------
62
  def load_gdf_from_zip(zip_path: str) -> gpd.GeoDataFrame:
63
+ # pyogrioが入っていれば engine="pyogrio" を付けると高速
64
  gdf = gpd.read_file(f"zip://{zip_path}") # , engine="pyogrio"
65
  try:
66
  if gdf.crs:
 
69
  pass
70
  return gdf
71
 
72
+ # ----------------------------
73
+ # ジオコーダ
74
+ # ----------------------------
75
+ def make_geocoder():
76
+ geolocator = Nominatim(user_agent=USER_AGENT, timeout=10)
77
+ geocode = RateLimiter(geolocator.geocode, min_delay_seconds=GEOCODE_DELAY_SEC)
78
+ return geocode
79
+
80
+ def geocode_with_cache(addresses, CFs, use_internet=True):
81
+ cache = load_cache()
82
+ cache_map = {row["address_input"]: (row["lat"], row["lon"], row["CF"]) for _, row in cache.iterrows()}
83
+ results = []
84
+
85
+ geocode = make_geocoder() if use_internet else None
86
+
87
+ for a, cf in zip(addresses, CFs):
88
+ a = str(a)
89
+ cf = str(cf) if (cf is not None and not (isinstance(cf, float) and np.isnan(cf))) else ""
90
+ # キャッシュヒット
91
+ if a in cache_map:
92
+ lat, lon, _cached_cf = cache_map[a]
93
+ if pd.notna(lat) and pd.notna(lon):
94
+ results.append({"address_input": a, "CF": cf, "lat": lat, "lon": lon})
95
+ continue
96
+
97
+ if not use_internet:
98
+ results.append({"address_input": a, "CF": cf, "lat": np.nan, "lon": np.nan})
99
+ continue
100
+
101
+ # API呼び出し(RateLimiterで待機)
102
+ try:
103
+ loc = geocode(a, country_codes="jp", addressdetails=True)
104
+ if loc:
105
+ lat, lon = loc.latitude, loc.longitude
106
+ else:
107
+ lat, lon = np.nan, np.nan
108
+ except Exception:
109
+ lat, lon = np.nan, np.nan
110
+
111
+ # キャッシュ更新
112
+ cache = cache[cache["address_input"] != a]
113
+ cache = pd.concat(
114
+ [cache, pd.DataFrame([{"address_input": a, "lat": lat, "lon": lon, "CF": cf}])],
115
+ ignore_index=True
116
+ )
117
+ save_cache(cache)
118
+
119
+ results.append({"address_input": a, "CF": cf, "lat": lat, "lon": lon})
120
+
121
+ return pd.DataFrame(results)
122
+
123
+ # ----------------------------
124
+ # 可視化(matplotlib)
125
+ # ----------------------------
126
+ def plot_map_png(gdf_pref: gpd.GeoDataFrame, gdf_pts: gpd.GeoDataFrame,
127
+ line_width: float = 0.6, figsize=(7, 7)) -> Image.Image:
128
  fig, ax = plt.subplots(figsize=figsize)
129
+ gdf_pref.boundary.plot(ax=ax, linewidth=line_width, color="black")
130
+ # 有効な点のみ
131
+ gdf_pts_valid = gdf_pts[gdf_pts.geometry.notnull()]
132
+ if not gdf_pts_valid.empty:
133
+ # CF列(任意)を数値化してカラーマップに使用。無ければ等色。
134
+ cf_num = pd.to_numeric(gdf_pts_valid.get("CF", pd.Series([np.nan]*len(gdf_pts_valid))), errors="coerce")
135
+ # markersizeは見やすいよう固定(必要ならスライダで可変化も可)
136
+ gdf_pts_valid.assign(CF_num=cf_num).plot(
137
+ ax=ax, column="CF_num", cmap="OrRd", markersize=12, alpha=0.85, legend=True
138
+ )
139
  ax.set_axis_off()
140
  plt.tight_layout()
 
141
  buf = io.BytesIO()
142
  fig.savefig(buf, format="png", dpi=200)
143
  plt.close(fig)
144
  buf.seek(0)
145
+ return Image.open(buf)
146
+
147
+ # ----------------------------
148
+ # 可視化(folium)
149
+ # ----------------------------
150
+ def make_folium_html(gdf_pref: gpd.GeoDataFrame, gdf_pts: gpd.GeoDataFrame):
151
+ # 中心位置(ポイントがあれば中央値、なければ東京駅付近)
152
+ gdf_pts_valid = gdf_pts[gdf_pts.geometry.notnull()]
153
+ if not gdf_pts_valid.empty:
154
+ center_lat = gdf_pts_valid.geometry.y.median()
155
+ center_lon = gdf_pts_valid.geometry.x.median()
156
+ zoom = 6
157
+ else:
158
+ center_lat, center_lon, zoom = 35.6812, 139.7671, 5
159
+
160
+ m = folium.Map(location=[center_lat, center_lon], zoom_start=zoom)
161
+
162
+ # 境界線(軽量化のため boundary のみ簡易表示)
163
+ try:
164
+ folium.GeoJson(gdf_pref.to_json(), name="prefecture").add_to(m)
165
+ except Exception:
166
+ pass
167
 
168
+ for _, r in gdf_pts_valid.iterrows():
169
+ lat, lon = r.geometry.y, r.geometry.x
170
+ popup = f"{r.get('address_input','(no addr)')}<br>CF:{r.get('CF','')}"
171
+ folium.CircleMarker(
172
+ location=(float(lat), float(lon)),
173
+ radius=4,
174
+ fill=True,
175
+ fill_opacity=0.9,
176
+ popup=popup,
177
+ ).add_to(m)
178
+
179
+ return m._repr_html_()
180
+
181
+ # ----------------------------
182
+ # パイプライン
183
+ # ----------------------------
184
+ def _coerce_indexer(x):
185
+ # 列名/数字の両対応
186
+ try:
187
+ return int(x)
188
+ except Exception:
189
+ return x
190
+
191
+ def run(zip_file, excel_file, sheet_name, header_row, address_col, power_col,
192
+ use_inet, line_width):
193
+ # 1) Shapefile ZIP の決定
194
  if zip_file is not None and hasattr(zip_file, "name") and os.path.exists(zip_file.name):
195
  zip_path = zip_file.name
196
  elif os.path.exists(DEFAULT_ZIP):
197
  zip_path = DEFAULT_ZIP
198
  else:
199
+ return None, None, "Shapefile の ZIP をアップロードするか、data/japan_ver85.zip を配置してください。"
200
 
201
+ # 2) 行政界読み込み
202
  try:
203
+ gdf_pref = load_gdf_from_zip(zip_path)
204
  except Exception as e:
205
+ return None, None, f"行政界の読み込みに失敗しました: {e}"
206
+
207
+ # 3) Excel 読み込み & 列抽出
208
+ if excel_file is None or not hasattr(excel_file, "name"):
209
+ # 住所点なしでも地図だけ返せるようにする(要件に合わせてここでエラーにしてもOK)
210
+ gdf_pts = gpd.GeoDataFrame(columns=["address_input", "CF", "lat", "lon"], geometry=[], crs="EPSG:4326")
211
+ else:
212
+ try:
213
+ df = pd.read_excel(excel_file.name, sheet_name=sheet_name, header=int(header_row))
214
+ except Exception as e:
215
+ return None, None, f"Excel の読み込みに失敗しました: {e}"
216
+
217
+ addr_series = df.iloc[:, address_col] if isinstance(address_col, int) else df[address_col]
218
+ cf_series = df.iloc[:, power_col] if isinstance(power_col, int) else df[power_col]
219
+
220
+ addresses = addr_series.astype(str).tolist()
221
+ cfs = cf_series.tolist()
222
 
223
+ # 4) ジオコーディング(キャッシュ活用)
224
+ geo_df = geocode_with_cache(addresses, cfs, use_internet=bool(use_inet))
225
+
226
+ # 5) GeoDataFrame 化
227
+ geometry = [
228
+ Point(lon, lat) if (pd.notna(lat) and pd.notna(lon)) else None
229
+ for lat, lon in zip(geo_df["lat"], geo_df["lon"])
230
+ ]
231
+ gdf_pts = gpd.GeoDataFrame(geo_df, geometry=geometry, crs="EPSG:4326")
232
+
233
+ # 6) 可視化(matplotlib)
234
  try:
235
+ img = plot_map_png(gdf_pref, gdf_pts, line_width=line_width)
 
236
  except Exception as e:
237
+ return None, None, f"静的描画に失敗しました: {e}"
238
 
239
+ # 7) 可視化(folium)
240
+ try:
241
+ html = make_folium_html(gdf_pref, gdf_pts)
242
+ except Exception as e:
243
+ html = f"<p>folium描画に失敗しました: {e}</p>"
244
+
245
+ # 8) 情報メモ
246
  info = []
247
+ info.append(f"都道府県レコード数: {len(gdf_pref)}")
248
+ if gdf_pref.crs:
249
+ info.append(f"PREF CRS: {gdf_pref.crs}")
250
+ info.append(f"ポイント数(有効座標): {int(gdf_pts.geometry.notnull().sum())} / {len(gdf_pts)}")
251
+ if not gdf_pts.empty and gdf_pts.crs:
252
+ info.append(f"PTS CRS: {gdf_pts.crs}")
253
+
254
+ return img, html, "\n".join(info)
255
 
256
+ # ----------------------------
257
+ # Gradio UI
258
+ # ----------------------------
259
+ with gr.Blocks(title="Japan Shapefile + Excel Geocoding Plotter") as demo:
260
+ gr.Markdown("## japan_ver85.shp(ZIP) + Excel住所 → 日本地図にプロット")
261
+ gr.Markdown(
262
+ "- **Shapefile ZIP**(`.shp/.shx/.dbf/.prj` など同梱)をアップロードしてください。\n"
263
+ "- **Excel** は住所列と数値列(任意: CF/出力など)を指定してください。\n"
264
+ "- Nominatim への問い合わせはレート制限済み。通信が難しい環境では「通信オフ(キャッシュのみ)」で実行できます。"
265
+ )
266
 
267
  with gr.Row():
268
  zip_in = gr.File(label="Shapefile (ZIP)", file_count="single", file_types=[".zip"])
269
+ xlsx_in = gr.File(label="Excelファイル(住所付き)", file_count="single", file_types=[".xlsx", ".xls"])
270
+
271
+ with gr.Row():
272
+ sheet = gr.Textbox(label="シート名", value="認定設備")
273
+ header_row = gr.Number(label="ヘッダー行番号(0始まり)", value=2, precision=0)
274
+
275
  with gr.Row():
276
+ address_col = gr.Textbox(label="住所列(列名 or 0始まり列番号)", value="発電設備の所在地")
277
+ power_col = gr.Textbox(label="数値列(任意:列名 or 0始まり列番号)", value="発電出力(kW)")
278
+
279
+ with gr.Row():
280
+ use_inet = gr.Checkbox(label="Nominatimに問い合わせ(オフでキャッシュのみ使用)", value=True)
281
+ line_width = gr.Slider(0.2, 2.0, value=0.6, step=0.1, label="境界線の太さ")
282
 
283
  run_btn = gr.Button("描画")
284
+
285
+ out_img = gr.Image(label="静的地図(matplotlib)", type="pil")
286
+ out_html = gr.HTML(label="インタラクティブ地図(folium)")
287
  out_txt = gr.Textbox(label="メタ情報", lines=4)
288
 
289
+ def _parse_indexer(x):
290
+ # 前端UIの入力を列名/番号に解釈
291
+ try:
292
+ return int(x)
293
+ except Exception:
294
+ return x
295
+
296
+ def app_run(zipf, xls, s, h, a, p, inet, lw):
297
+ return run(
298
+ zipf,
299
+ xls,
300
+ s,
301
+ int(h),
302
+ _parse_indexer(a),
303
+ _parse_indexer(p),
304
+ inet,
305
+ lw
306
+ )
307
+
308
+ run_btn.click(
309
+ fn=app_run,
310
+ inputs=[zip_in, xlsx_in, sheet, header_row, address_col, power_col, use_inet, line_width],
311
+ outputs=[out_img, out_html, out_txt],
312
+ )
313
 
314
  if __name__ == "__main__":
315
  demo.launch()