hiroki0008 commited on
Commit
9f2e9b8
·
verified ·
1 Parent(s): e2567d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +219 -2
app.py CHANGED
@@ -1,5 +1,222 @@
 
 
 
 
 
1
  import geopandas as gpd
2
  import matplotlib.pyplot as plt
 
 
 
 
 
3
 
4
- world = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
5
- world.head()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import io
4
+ import pandas as pd
5
+ import numpy as np
6
  import geopandas as gpd
7
  import matplotlib.pyplot as plt
8
+ from shapely.geometry import Point
9
+ from geopy.geocoders import Nominatim
10
+ from geopy.extra.rate_limiter import RateLimiter
11
+ import folium
12
+ import gradio as gr
13
 
14
+ # ----------------------------
15
+ # 設定
16
+ # ----------------------------
17
+ # Nominatimの利用規約に配慮:連絡先付き user_agent を推奨
18
+ USER_AGENT = os.environ.get(
19
+ "NOMINATIM_USER_AGENT",
20
+ "jp-geocoding-demo (contact: your_email@example.com)"
21
+ )
22
+ GEOCODE_DELAY_SEC = 1.0 # 1秒レート制限
23
+
24
+ # キャッシュ場所
25
+ CACHE_DIR = "data/cache"
26
+ os.makedirs(CACHE_DIR, exist_ok=True)
27
+ CACHE_PATH = os.path.join(CACHE_DIR, "geocode_cache.csv")
28
+
29
+ # 行政界(都道府県)シェープ
30
+ PREF_SHP_DIR = "data/N03-20250101_GML"
31
+ PREF_SHP_PATH = os.path.join(PREF_SHP_DIR, "N03-20250101_prefecture.shp")
32
+
33
+ # ----------------------------
34
+ # 住所→緯度経度 キャッシュの読み書き
35
+ # ----------------------------
36
+ def load_cache():
37
+ if os.path.exists(CACHE_PATH):
38
+ df = pd.read_csv(CACHE_PATH)
39
+ # 空ファイル対策
40
+ if df.shape[1] == 4:
41
+ return df
42
+ return pd.DataFrame(columns=["address_input", "lat", "lon", "CF"])
43
+
44
+ def save_cache(df_cache):
45
+ df_cache.to_csv(CACHE_PATH, index=False)
46
+
47
+ # ----------------------------
48
+ # ジオコーダ(Nominatim)
49
+ # ----------------------------
50
+ def make_geocoder():
51
+ geolocator = Nominatim(user_agent=USER_AGENT, timeout=10)
52
+ geocode = RateLimiter(geolocator.geocode, min_delay_seconds=GEOCODE_DELAY_SEC)
53
+ return geocode
54
+
55
+ def geocode_with_cache(addresses, CFs, use_internet=True):
56
+ """
57
+ addresses: list[str]
58
+ CFs: list[float or str] # 発電出力など
59
+ """
60
+ cache = load_cache()
61
+ cache_map = dict(((row["address_input"]), (row["lat"], row["lon"], row["CF"])) for _, row in cache.iterrows())
62
+ results = []
63
+
64
+ geocode = make_geocoder() if use_internet else None
65
+
66
+ for a, cf in zip(addresses, CFs):
67
+ a = str(a)
68
+ cf = str(cf)
69
+ if a in cache_map and not (pd.isna(cache_map[a][0]) or pd.isna(cache_map[a][1])):
70
+ lat, lon, cached_cf = cache_map[a]
71
+ results.append({"address_input": a, "CF": cf, "lat": lat, "lon": lon})
72
+ continue
73
+
74
+ if not use_internet:
75
+ # オフライン時は未取得扱い
76
+ results.append({"address_input": a, "CF": cf, "lat": np.nan, "lon": np.nan})
77
+ continue
78
+
79
+ # API叩く(1秒ディレイはRateLimiter側で実施)
80
+ try:
81
+ loc = geocode(a, country_codes="jp", addressdetails=True)
82
+ if loc:
83
+ lat, lon = loc.latitude, loc.longitude
84
+ else:
85
+ lat, lon = np.nan, np.nan
86
+ except Exception:
87
+ lat, lon = np.nan, np.nan
88
+
89
+ # 結果をキャッシュ行に反映(上書き)
90
+ cache = cache[cache["address_input"] != a]
91
+ cache = pd.concat([cache, pd.DataFrame([{
92
+ "address_input": a, "lat": lat, "lon": lon, "CF": cf
93
+ }])], ignore_index=True)
94
+ save_cache(cache)
95
+
96
+ results.append({"address_input": a, "CF": cf, "lat": lat, "lon": lon})
97
+
98
+ return pd.DataFrame(results)
99
+
100
+ # ----------------------------
101
+ # 主要処理:Excel→ジオコーディング→地図描画
102
+ # ----------------------------
103
+ def run_pipeline(xlsx_file, sheet_name, header_row, address_col, power_col, use_inet):
104
+ # 1) Excel読込
105
+ # gradioのFileは一時パスを持つ
106
+ df = pd.read_excel(xlsx_file.name, sheet_name=sheet_name, header=header_row)
107
+
108
+ # 指定列を抽出(列名/インデックスどちらにも対応)
109
+ if isinstance(address_col, int):
110
+ addr_series = df.iloc[:, address_col]
111
+ else:
112
+ addr_series = df[address_col]
113
+
114
+ if isinstance(power_col, int):
115
+ cf_series = df.iloc[:, power_col]
116
+ else:
117
+ cf_series = df[power_col]
118
+
119
+ # 先頭行除外のロジックが必要なら適宜ここで
120
+ # df = df.drop(index=0) # 元コード準拠(必要ならON)
121
+
122
+ addresses = addr_series.astype(str).tolist()
123
+ cfs = cf_series.tolist()
124
+
125
+ # 2) ジオコーディング(キャッシュ活用)
126
+ geo_df = geocode_with_cache(addresses, cfs, use_internet=use_inet)
127
+
128
+ # 3) GeoDataFrame化
129
+ geometry = [
130
+ Point(lon, lat) if (pd.notna(lat) and pd.notna(lon)) else None
131
+ for lat, lon in zip(geo_df["lat"], geo_df["lon"])
132
+ ]
133
+ gdf_pts = gpd.GeoDataFrame(geo_df, geometry=geometry, crs="EPSG:4326")
134
+
135
+ # 4) 行政界の読込(pyogrioエンジンで軽量)
136
+ gdf_pref = gpd.read_file(PREF_SHP_PATH, engine="pyogrio").to_crs("EPSG:4326")
137
+
138
+ # 5) matplotlib 静的図
139
+ fig, ax = plt.subplots(figsize=(7, 7))
140
+ gdf_pref.boundary.plot(ax=ax, linewidth=0.5)
141
+ # 有効な点のみ
142
+ gdf_pts_valid = gdf_pts[gdf_pts.geometry.notnull()]
143
+ if not gdf_pts_valid.empty:
144
+ # CF列を数値化してカラーマップに使う
145
+ cf_num = pd.to_numeric(gdf_pts_valid["CF"], errors="coerce")
146
+ gdf_pts_valid.assign(CF_num=cf_num).plot(
147
+ ax=ax, column="CF_num", cmap="OrRd", markersize=10, alpha=0.8, legend=True
148
+ )
149
+ ax.set_axis_off()
150
+ buf = io.BytesIO()
151
+ plt.tight_layout()
152
+ fig.savefig(buf, format="png", dpi=150)
153
+ plt.close(fig)
154
+ buf.seek(0)
155
+
156
+ # 6) folium インタラクティブ地図
157
+ # 中心は点の中央値あたりに
158
+ if not gdf_pts_valid.empty:
159
+ center_lat = gdf_pts_valid["lat"].astype(float).median()
160
+ center_lon = gdf_pts_valid["lon"].astype(float).median()
161
+ else:
162
+ center_lat, center_lon = 35.6812, 139.7671 # 東京駅あたり
163
+
164
+ m = folium.Map(location=[center_lat, center_lon], zoom_start=8)
165
+
166
+ # 都道府県境界(簡略表示:GeoJSONに直に積むと重いので省略可)
167
+ # ここでは境界線のみ少数表示(本番は軽量化したGeoJSONに置換推奨)
168
+ # folium.GeoJson(gdf_pref.to_json(), name="prefecture").add_to(m)
169
+
170
+ for _, r in gdf_pts_valid.iterrows():
171
+ popup = f"{r['address_input']}<br>CF:{r['CF']}"
172
+ folium.CircleMarker(
173
+ location=(float(r["lat"]), float(r["lon"])),
174
+ radius=4,
175
+ fill=True,
176
+ fill_opacity=0.8,
177
+ popup=popup,
178
+ ).add_to(m)
179
+
180
+ map_html = m._repr_html_() # GradioのHTMLに埋め込み
181
+
182
+ # 出力:結果テーブル、静的画像、インタラクティブHTML
183
+ return gdf_pts, buf, map_html
184
+
185
+ # ----------------------------
186
+ # Gradio UI
187
+ # ----------------------------
188
+ with gr.Blocks(title="JP Geocoding & Mapping") as demo:
189
+ gr.Markdown("## 住所ジオコーディング+可視化(Nominatim×GeoPandas×Folium)")
190
+
191
+ with gr.Row():
192
+ xlsx = gr.File(label="Excelファイル(例:13.東京都_202507.xlsx)", file_types=[".xlsx"])
193
+ sheet = gr.Textbox(label="シート名", value="認定設備")
194
+ header_row = gr.Number(label="ヘッダー行番号(0始まり)", value=2, precision=0)
195
+ with gr.Row():
196
+ address_col = gr.Textbox(label="住所列(列名 or 0始まり列番号)", value="発電設備の所在地")
197
+ power_col = gr.Textbox(label="出力列(列名 or 0始まり列番号)", value="発電出力(kW)")
198
+ use_inet = gr.Checkbox(label="Nominatimに問い合わせ(オフでキャッシュのみ使用)", value=True)
199
+
200
+ run_btn = gr.Button("実行")
201
+
202
+ out_df = gr.Dataframe(label="結果(住所・緯度経度・CF)", interactive=False)
203
+ out_img = gr.Image(label="静的地図(matplotlib)")
204
+ out_map = gr.HTML(label="インタラクティブ地図(folium)")
205
+
206
+ def _coerce_indexer(x):
207
+ # 列名/数字の両対応
208
+ try:
209
+ return int(x)
210
+ except Exception:
211
+ return x
212
+
213
+ run_btn.click(
214
+ fn=lambda xls, s, h, a, c, inet: run_pipeline(
215
+ xls, s, int(h), _coerce_indexer(a), _coerce_indexer(c), inet
216
+ ),
217
+ inputs=[xlsx, sheet, header_row, address_col, power_col, use_inet],
218
+ outputs=[out_df, out_img, out_map],
219
+ )
220
+
221
+ if __name__ == "__main__":
222
+ demo.launch()