Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import geopandas as gpd
|
| 2 |
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import time
|
| 3 |
+
import io
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import numpy as np
|
| 6 |
import geopandas as gpd
|
| 7 |
import matplotlib.pyplot as plt
|
| 8 |
+
from shapely.geometry import Point
|
| 9 |
+
from geopy.geocoders import Nominatim
|
| 10 |
+
from geopy.extra.rate_limiter import RateLimiter
|
| 11 |
+
import folium
|
| 12 |
+
import gradio as gr
|
| 13 |
|
| 14 |
+
# ----------------------------
|
| 15 |
+
# 設定
|
| 16 |
+
# ----------------------------
|
| 17 |
+
# Nominatimの利用規約に配慮:連絡先付き user_agent を推奨
|
| 18 |
+
USER_AGENT = os.environ.get(
|
| 19 |
+
"NOMINATIM_USER_AGENT",
|
| 20 |
+
"jp-geocoding-demo (contact: your_email@example.com)"
|
| 21 |
+
)
|
| 22 |
+
GEOCODE_DELAY_SEC = 1.0 # 1秒レート制限
|
| 23 |
+
|
| 24 |
+
# キャッシュ場所
|
| 25 |
+
CACHE_DIR = "data/cache"
|
| 26 |
+
os.makedirs(CACHE_DIR, exist_ok=True)
|
| 27 |
+
CACHE_PATH = os.path.join(CACHE_DIR, "geocode_cache.csv")
|
| 28 |
+
|
| 29 |
+
# 行政界(都道府県)シェープ
|
| 30 |
+
PREF_SHP_DIR = "data/N03-20250101_GML"
|
| 31 |
+
PREF_SHP_PATH = os.path.join(PREF_SHP_DIR, "N03-20250101_prefecture.shp")
|
| 32 |
+
|
| 33 |
+
# ----------------------------
|
| 34 |
+
# 住所→緯度経度 キャッシュの読み書き
|
| 35 |
+
# ----------------------------
|
| 36 |
+
def load_cache():
|
| 37 |
+
if os.path.exists(CACHE_PATH):
|
| 38 |
+
df = pd.read_csv(CACHE_PATH)
|
| 39 |
+
# 空ファイル対策
|
| 40 |
+
if df.shape[1] == 4:
|
| 41 |
+
return df
|
| 42 |
+
return pd.DataFrame(columns=["address_input", "lat", "lon", "CF"])
|
| 43 |
+
|
| 44 |
+
def save_cache(df_cache):
|
| 45 |
+
df_cache.to_csv(CACHE_PATH, index=False)
|
| 46 |
+
|
| 47 |
+
# ----------------------------
|
| 48 |
+
# ジオコーダ(Nominatim)
|
| 49 |
+
# ----------------------------
|
| 50 |
+
def make_geocoder():
|
| 51 |
+
geolocator = Nominatim(user_agent=USER_AGENT, timeout=10)
|
| 52 |
+
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=GEOCODE_DELAY_SEC)
|
| 53 |
+
return geocode
|
| 54 |
+
|
| 55 |
+
def geocode_with_cache(addresses, CFs, use_internet=True):
|
| 56 |
+
"""
|
| 57 |
+
addresses: list[str]
|
| 58 |
+
CFs: list[float or str] # 発電出力など
|
| 59 |
+
"""
|
| 60 |
+
cache = load_cache()
|
| 61 |
+
cache_map = dict(((row["address_input"]), (row["lat"], row["lon"], row["CF"])) for _, row in cache.iterrows())
|
| 62 |
+
results = []
|
| 63 |
+
|
| 64 |
+
geocode = make_geocoder() if use_internet else None
|
| 65 |
+
|
| 66 |
+
for a, cf in zip(addresses, CFs):
|
| 67 |
+
a = str(a)
|
| 68 |
+
cf = str(cf)
|
| 69 |
+
if a in cache_map and not (pd.isna(cache_map[a][0]) or pd.isna(cache_map[a][1])):
|
| 70 |
+
lat, lon, cached_cf = cache_map[a]
|
| 71 |
+
results.append({"address_input": a, "CF": cf, "lat": lat, "lon": lon})
|
| 72 |
+
continue
|
| 73 |
+
|
| 74 |
+
if not use_internet:
|
| 75 |
+
# オフライン時は未取得扱い
|
| 76 |
+
results.append({"address_input": a, "CF": cf, "lat": np.nan, "lon": np.nan})
|
| 77 |
+
continue
|
| 78 |
+
|
| 79 |
+
# API叩く(1秒ディレイはRateLimiter側で実施)
|
| 80 |
+
try:
|
| 81 |
+
loc = geocode(a, country_codes="jp", addressdetails=True)
|
| 82 |
+
if loc:
|
| 83 |
+
lat, lon = loc.latitude, loc.longitude
|
| 84 |
+
else:
|
| 85 |
+
lat, lon = np.nan, np.nan
|
| 86 |
+
except Exception:
|
| 87 |
+
lat, lon = np.nan, np.nan
|
| 88 |
+
|
| 89 |
+
# 結果をキャッシュ行に反映(上書き)
|
| 90 |
+
cache = cache[cache["address_input"] != a]
|
| 91 |
+
cache = pd.concat([cache, pd.DataFrame([{
|
| 92 |
+
"address_input": a, "lat": lat, "lon": lon, "CF": cf
|
| 93 |
+
}])], ignore_index=True)
|
| 94 |
+
save_cache(cache)
|
| 95 |
+
|
| 96 |
+
results.append({"address_input": a, "CF": cf, "lat": lat, "lon": lon})
|
| 97 |
+
|
| 98 |
+
return pd.DataFrame(results)
|
| 99 |
+
|
| 100 |
+
# ----------------------------
|
| 101 |
+
# 主要処理:Excel→ジオコーディング→地図描画
|
| 102 |
+
# ----------------------------
|
| 103 |
+
def run_pipeline(xlsx_file, sheet_name, header_row, address_col, power_col, use_inet):
|
| 104 |
+
# 1) Excel読込
|
| 105 |
+
# gradioのFileは一時パスを持つ
|
| 106 |
+
df = pd.read_excel(xlsx_file.name, sheet_name=sheet_name, header=header_row)
|
| 107 |
+
|
| 108 |
+
# 指定列を抽出(列名/インデックスどちらにも対応)
|
| 109 |
+
if isinstance(address_col, int):
|
| 110 |
+
addr_series = df.iloc[:, address_col]
|
| 111 |
+
else:
|
| 112 |
+
addr_series = df[address_col]
|
| 113 |
+
|
| 114 |
+
if isinstance(power_col, int):
|
| 115 |
+
cf_series = df.iloc[:, power_col]
|
| 116 |
+
else:
|
| 117 |
+
cf_series = df[power_col]
|
| 118 |
+
|
| 119 |
+
# 先頭行除外のロジックが必要なら適宜ここで
|
| 120 |
+
# df = df.drop(index=0) # 元コード準拠(必要ならON)
|
| 121 |
+
|
| 122 |
+
addresses = addr_series.astype(str).tolist()
|
| 123 |
+
cfs = cf_series.tolist()
|
| 124 |
+
|
| 125 |
+
# 2) ジオコーディング(キャッシュ活用)
|
| 126 |
+
geo_df = geocode_with_cache(addresses, cfs, use_internet=use_inet)
|
| 127 |
+
|
| 128 |
+
# 3) GeoDataFrame化
|
| 129 |
+
geometry = [
|
| 130 |
+
Point(lon, lat) if (pd.notna(lat) and pd.notna(lon)) else None
|
| 131 |
+
for lat, lon in zip(geo_df["lat"], geo_df["lon"])
|
| 132 |
+
]
|
| 133 |
+
gdf_pts = gpd.GeoDataFrame(geo_df, geometry=geometry, crs="EPSG:4326")
|
| 134 |
+
|
| 135 |
+
# 4) 行政界の読込(pyogrioエンジンで軽量)
|
| 136 |
+
gdf_pref = gpd.read_file(PREF_SHP_PATH, engine="pyogrio").to_crs("EPSG:4326")
|
| 137 |
+
|
| 138 |
+
# 5) matplotlib 静的図
|
| 139 |
+
fig, ax = plt.subplots(figsize=(7, 7))
|
| 140 |
+
gdf_pref.boundary.plot(ax=ax, linewidth=0.5)
|
| 141 |
+
# 有効な点のみ
|
| 142 |
+
gdf_pts_valid = gdf_pts[gdf_pts.geometry.notnull()]
|
| 143 |
+
if not gdf_pts_valid.empty:
|
| 144 |
+
# CF列を数値化してカラーマップに使う
|
| 145 |
+
cf_num = pd.to_numeric(gdf_pts_valid["CF"], errors="coerce")
|
| 146 |
+
gdf_pts_valid.assign(CF_num=cf_num).plot(
|
| 147 |
+
ax=ax, column="CF_num", cmap="OrRd", markersize=10, alpha=0.8, legend=True
|
| 148 |
+
)
|
| 149 |
+
ax.set_axis_off()
|
| 150 |
+
buf = io.BytesIO()
|
| 151 |
+
plt.tight_layout()
|
| 152 |
+
fig.savefig(buf, format="png", dpi=150)
|
| 153 |
+
plt.close(fig)
|
| 154 |
+
buf.seek(0)
|
| 155 |
+
|
| 156 |
+
# 6) folium インタラクティブ地図
|
| 157 |
+
# 中心は点の中央値あたりに
|
| 158 |
+
if not gdf_pts_valid.empty:
|
| 159 |
+
center_lat = gdf_pts_valid["lat"].astype(float).median()
|
| 160 |
+
center_lon = gdf_pts_valid["lon"].astype(float).median()
|
| 161 |
+
else:
|
| 162 |
+
center_lat, center_lon = 35.6812, 139.7671 # 東京駅あたり
|
| 163 |
+
|
| 164 |
+
m = folium.Map(location=[center_lat, center_lon], zoom_start=8)
|
| 165 |
+
|
| 166 |
+
# 都道府県境界(簡略表示:GeoJSONに直に積むと重いので省略可)
|
| 167 |
+
# ここでは境界線のみ少数表示(本番は軽量化したGeoJSONに置換推奨)
|
| 168 |
+
# folium.GeoJson(gdf_pref.to_json(), name="prefecture").add_to(m)
|
| 169 |
+
|
| 170 |
+
for _, r in gdf_pts_valid.iterrows():
|
| 171 |
+
popup = f"{r['address_input']}<br>CF:{r['CF']}"
|
| 172 |
+
folium.CircleMarker(
|
| 173 |
+
location=(float(r["lat"]), float(r["lon"])),
|
| 174 |
+
radius=4,
|
| 175 |
+
fill=True,
|
| 176 |
+
fill_opacity=0.8,
|
| 177 |
+
popup=popup,
|
| 178 |
+
).add_to(m)
|
| 179 |
+
|
| 180 |
+
map_html = m._repr_html_() # GradioのHTMLに埋め込み
|
| 181 |
+
|
| 182 |
+
# 出力:結果テーブル、静的画像、インタラクティブHTML
|
| 183 |
+
return gdf_pts, buf, map_html
|
| 184 |
+
|
| 185 |
+
# ----------------------------
|
| 186 |
+
# Gradio UI
|
| 187 |
+
# ----------------------------
|
| 188 |
+
with gr.Blocks(title="JP Geocoding & Mapping") as demo:
|
| 189 |
+
gr.Markdown("## 住所ジオコーディング+可視化(Nominatim×GeoPandas×Folium)")
|
| 190 |
+
|
| 191 |
+
with gr.Row():
|
| 192 |
+
xlsx = gr.File(label="Excelファイル(例:13.東京都_202507.xlsx)", file_types=[".xlsx"])
|
| 193 |
+
sheet = gr.Textbox(label="シート名", value="認定設備")
|
| 194 |
+
header_row = gr.Number(label="ヘッダー行番号(0始まり)", value=2, precision=0)
|
| 195 |
+
with gr.Row():
|
| 196 |
+
address_col = gr.Textbox(label="住所列(列名 or 0始まり列番号)", value="発電設備の所在地")
|
| 197 |
+
power_col = gr.Textbox(label="出力列(列名 or 0始まり列番号)", value="発電出力(kW)")
|
| 198 |
+
use_inet = gr.Checkbox(label="Nominatimに問い合わせ(オフでキャッシュのみ使用)", value=True)
|
| 199 |
+
|
| 200 |
+
run_btn = gr.Button("実行")
|
| 201 |
+
|
| 202 |
+
out_df = gr.Dataframe(label="結果(住所・緯度経度・CF)", interactive=False)
|
| 203 |
+
out_img = gr.Image(label="静的地図(matplotlib)")
|
| 204 |
+
out_map = gr.HTML(label="インタラクティブ地図(folium)")
|
| 205 |
+
|
| 206 |
+
def _coerce_indexer(x):
|
| 207 |
+
# 列名/数字の両対応
|
| 208 |
+
try:
|
| 209 |
+
return int(x)
|
| 210 |
+
except Exception:
|
| 211 |
+
return x
|
| 212 |
+
|
| 213 |
+
run_btn.click(
|
| 214 |
+
fn=lambda xls, s, h, a, c, inet: run_pipeline(
|
| 215 |
+
xls, s, int(h), _coerce_indexer(a), _coerce_indexer(c), inet
|
| 216 |
+
),
|
| 217 |
+
inputs=[xlsx, sheet, header_row, address_col, power_col, use_inet],
|
| 218 |
+
outputs=[out_df, out_img, out_map],
|
| 219 |
+
)
|
| 220 |
+
|
| 221 |
+
if __name__ == "__main__":
|
| 222 |
+
demo.launch()
|