Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -49,4 +49,213 @@ def load_gdf_from_zip(zip_path: str) -> gpd.GeoDataFrame:
|
|
| 49 |
|
| 50 |
def make_geocoder():
|
| 51 |
geolocator = Nominatim(user_agent=USER_AGENT, timeout=10)
|
| 52 |
-
geocode =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
def make_geocoder():
|
| 51 |
geolocator = Nominatim(user_agent=USER_AGENT, timeout=10)
|
| 52 |
+
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=GEOCODE_DELAY_SEC)
|
| 53 |
+
return geocode
|
| 54 |
+
|
| 55 |
+
def geocode_with_cache(addresses, CFs, use_internet=True):
|
| 56 |
+
cache = load_cache()
|
| 57 |
+
cache_map = {row["address_input"]: (row["lat"], row["lon"], row["CF"]) for _, row in cache.iterrows()}
|
| 58 |
+
results = []
|
| 59 |
+
geocode = make_geocoder() if use_internet else None
|
| 60 |
+
|
| 61 |
+
for a, cf in zip(addresses, CFs):
|
| 62 |
+
a = str(a)
|
| 63 |
+
cf = "" if (cf is None or (isinstance(cf, float) and np.isnan(cf))) else str(cf)
|
| 64 |
+
|
| 65 |
+
# cache hit
|
| 66 |
+
if a in cache_map:
|
| 67 |
+
lat, lon, _cached_cf = cache_map[a]
|
| 68 |
+
if pd.notna(lat) and pd.notna(lon):
|
| 69 |
+
results.append({"address_input": a, "CF": cf, "lat": lat, "lon": lon})
|
| 70 |
+
continue
|
| 71 |
+
|
| 72 |
+
if not use_internet:
|
| 73 |
+
results.append({"address_input": a, "CF": cf, "lat": np.nan, "lon": np.nan})
|
| 74 |
+
continue
|
| 75 |
+
|
| 76 |
+
try:
|
| 77 |
+
loc = geocode(a, country_codes="jp", addressdetails=True)
|
| 78 |
+
if loc:
|
| 79 |
+
lat, lon = loc.latitude, loc.longitude
|
| 80 |
+
else:
|
| 81 |
+
lat, lon = np.nan, np.nan
|
| 82 |
+
except Exception:
|
| 83 |
+
lat, lon = np.nan, np.nan
|
| 84 |
+
|
| 85 |
+
cache = cache[cache["address_input"] != a]
|
| 86 |
+
cache = pd.concat(
|
| 87 |
+
[cache, pd.DataFrame([{"address_input": a, "lat": lat, "lon": lon, "CF": cf}])],
|
| 88 |
+
ignore_index=True
|
| 89 |
+
)
|
| 90 |
+
save_cache(cache)
|
| 91 |
+
results.append({"address_input": a, "CF": cf, "lat": lat, "lon": lon})
|
| 92 |
+
|
| 93 |
+
return pd.DataFrame(results)
|
| 94 |
+
|
| 95 |
+
def plot_map_png(gdf_pref: gpd.GeoDataFrame, gdf_pts: gpd.GeoDataFrame,
|
| 96 |
+
line_width: float = 0.6, figsize=(7, 7)) -> Image.Image:
|
| 97 |
+
fig, ax = plt.subplots(figsize=figsize)
|
| 98 |
+
gdf_pref.boundary.plot(ax=ax, linewidth=line_width, color="black")
|
| 99 |
+
|
| 100 |
+
gdf_pts_valid = gdf_pts[gdf_pts.geometry.notnull()]
|
| 101 |
+
if not gdf_pts_valid.empty:
|
| 102 |
+
cf_num = pd.to_numeric(gdf_pts_valid.get("CF", pd.Series([np.nan]*len(gdf_pts_valid))), errors="coerce")
|
| 103 |
+
gdf_pts_valid.assign(CF_num=cf_num).plot(
|
| 104 |
+
ax=ax, column="CF_num", cmap="OrRd", markersize=12, alpha=0.85, legend=True
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
ax.set_axis_off()
|
| 108 |
+
plt.tight_layout()
|
| 109 |
+
buf = io.BytesIO()
|
| 110 |
+
fig.savefig(buf, format="png", dpi=200)
|
| 111 |
+
plt.close(fig)
|
| 112 |
+
buf.seek(0)
|
| 113 |
+
return Image.open(buf)
|
| 114 |
+
|
| 115 |
+
def make_folium_html(gdf_pref: gpd.GeoDataFrame, gdf_pts: gpd.GeoDataFrame):
|
| 116 |
+
gdf_pts_valid = gdf_pts[gdf_pts.geometry.notnull()]
|
| 117 |
+
if not gdf_pts_valid.empty:
|
| 118 |
+
center_lat = gdf_pts_valid.geometry.y.median()
|
| 119 |
+
center_lon = gdf_pts_valid.geometry.x.median()
|
| 120 |
+
zoom = 6
|
| 121 |
+
else:
|
| 122 |
+
center_lat, center_lon, zoom = 35.6812, 139.7671, 5
|
| 123 |
+
|
| 124 |
+
m = folium.Map(location=[center_lat, center_lon], zoom_start=zoom)
|
| 125 |
+
|
| 126 |
+
try:
|
| 127 |
+
folium.GeoJson(gdf_pref.to_json(), name="prefecture").add_to(m)
|
| 128 |
+
except Exception:
|
| 129 |
+
pass
|
| 130 |
+
|
| 131 |
+
for _, r in gdf_pts_valid.iterrows():
|
| 132 |
+
lat, lon = r.geometry.y, r.geometry.x
|
| 133 |
+
popup = f"{r.get('address_input','(no addr)')}<br>CF:{r.get('CF','')}"
|
| 134 |
+
folium.CircleMarker(
|
| 135 |
+
location=(float(lat), float(lon)),
|
| 136 |
+
radius=4,
|
| 137 |
+
fill=True,
|
| 138 |
+
fill_opacity=0.9,
|
| 139 |
+
popup=popup,
|
| 140 |
+
).add_to(m)
|
| 141 |
+
|
| 142 |
+
return m._repr_html_()
|
| 143 |
+
|
| 144 |
+
def _parse_indexer(x):
|
| 145 |
+
try:
|
| 146 |
+
return int(x)
|
| 147 |
+
except Exception:
|
| 148 |
+
return x
|
| 149 |
+
|
| 150 |
+
def run(zip_file, excel_file, sheet_name, header_row, address_col, power_col,
|
| 151 |
+
use_inet, line_width):
|
| 152 |
+
# 1) Shapefile
|
| 153 |
+
if zip_file is not None and hasattr(zip_file, "name") and os.path.exists(zip_file.name):
|
| 154 |
+
zip_path = zip_file.name
|
| 155 |
+
elif os.path.exists(DEFAULT_ZIP):
|
| 156 |
+
zip_path = DEFAULT_ZIP
|
| 157 |
+
else:
|
| 158 |
+
empty_df = pd.DataFrame(columns=["address_input", "CF", "lat", "lon"])
|
| 159 |
+
return None, None, "", empty_df, "Shapefile の ZIP をアップロードするか、data/japan_ver85.zip を配置してください。"
|
| 160 |
+
|
| 161 |
+
try:
|
| 162 |
+
gdf_pref = load_gdf_from_zip(zip_path)
|
| 163 |
+
except Exception as e:
|
| 164 |
+
empty_df = pd.DataFrame(columns=["address_input", "CF", "lat", "lon"])
|
| 165 |
+
return None, None, "", empty_df, f"行政界の読み込みに失敗しました: {e}"
|
| 166 |
+
|
| 167 |
+
# 2) Excel→ジオコーディング
|
| 168 |
+
if excel_file is None or not hasattr(excel_file, "name"):
|
| 169 |
+
# 空のテーブルでも返す
|
| 170 |
+
gdf_pts = gpd.GeoDataFrame(columns=["address_input", "CF", "lat", "lon"], geometry=[], crs="EPSG:4326")
|
| 171 |
+
table_df = pd.DataFrame(columns=["address_input", "CF", "lat", "lon"])
|
| 172 |
+
else:
|
| 173 |
+
try:
|
| 174 |
+
df = pd.read_excel(excel_file.name, sheet_name=sheet_name, header=int(header_row))
|
| 175 |
+
except Exception as e:
|
| 176 |
+
empty_df = pd.DataFrame(columns=["address_input", "CF", "lat", "lon"])
|
| 177 |
+
return None, None, "", empty_df, f"Excel の読み込みに失敗しました: {e}"
|
| 178 |
+
|
| 179 |
+
addr_series = df.iloc[:, address_col] if isinstance(address_col, int) else df[address_col]
|
| 180 |
+
cf_series = df.iloc[:, power_col] if isinstance(power_col, int) else df[power_col]
|
| 181 |
+
|
| 182 |
+
addresses = addr_series.astype(str).tolist()
|
| 183 |
+
cfs = cf_series.tolist()
|
| 184 |
+
|
| 185 |
+
geo_df = geocode_with_cache(addresses, cfs, use_internet=bool(use_inet))
|
| 186 |
+
table_df = geo_df[["address_input", "CF", "lat", "lon"]].copy()
|
| 187 |
+
|
| 188 |
+
geometry = [
|
| 189 |
+
Point(lon, lat) if (pd.notna(lat) and pd.notna(lon)) else None
|
| 190 |
+
for lat, lon in zip(geo_df["lat"], geo_df["lon"])
|
| 191 |
+
]
|
| 192 |
+
gdf_pts = gpd.GeoDataFrame(geo_df, geometry=geometry, crs="EPSG:4326")
|
| 193 |
+
|
| 194 |
+
# 3) 図と地図
|
| 195 |
+
try:
|
| 196 |
+
img = plot_map_png(gdf_pref, gdf_pts, line_width=line_width)
|
| 197 |
+
except Exception as e:
|
| 198 |
+
return None, None, "", table_df, f"静的描画に失敗しました: {e}"
|
| 199 |
+
|
| 200 |
+
try:
|
| 201 |
+
html = make_folium_html(gdf_pref, gdf_pts)
|
| 202 |
+
except Exception as e:
|
| 203 |
+
html = f"<p>folium描画に失敗しました: {e}</p>"
|
| 204 |
+
|
| 205 |
+
# 4) 情報
|
| 206 |
+
info = []
|
| 207 |
+
info.append(f"都道府県レコード数: {len(gdf_pref)}")
|
| 208 |
+
if gdf_pref.crs:
|
| 209 |
+
info.append(f"PREF CRS: {gdf_pref.crs}")
|
| 210 |
+
info.append(f"ポイント数(有効座標): {int(gdf_pts.geometry.notnull().sum())} / {len(gdf_pts)}")
|
| 211 |
+
if not gdf_pts.empty and gdf_pts.crs:
|
| 212 |
+
info.append(f"PTS CRS: {gdf_pts.crs}")
|
| 213 |
+
|
| 214 |
+
return img, html, "\n".join(info), table_df, ""
|
| 215 |
+
|
| 216 |
+
with gr.Blocks(title="Japan Shapefile + Excel Geocoding Plotter") as demo:
|
| 217 |
+
gr.Markdown("## japan_ver85.shp(ZIP) + Excel住所 → 日本地図にプロット & 表で確認")
|
| 218 |
+
|
| 219 |
+
with gr.Row():
|
| 220 |
+
zip_in = gr.File(label="Shapefile (ZIP)", file_count="single", file_types=[".zip"])
|
| 221 |
+
xlsx_in = gr.File(label="Excelファイル(住所付き)", file_count="single", file_types=[".xlsx", ".xls"])
|
| 222 |
+
|
| 223 |
+
with gr.Row():
|
| 224 |
+
sheet = gr.Textbox(label="シート名", value="認定設備")
|
| 225 |
+
header_row = gr.Number(label="ヘッダー行番号(0始まり)", value=2, precision=0)
|
| 226 |
+
|
| 227 |
+
with gr.Row():
|
| 228 |
+
address_col = gr.Textbox(label="住所列(列名 or 0始まり列番号)", value="発電設備の所在地")
|
| 229 |
+
power_col = gr.Textbox(label="数値列(任意:列名 or 0始まり列番号)", value="発電出力(kW)")
|
| 230 |
+
|
| 231 |
+
with gr.Row():
|
| 232 |
+
use_inet = gr.Checkbox(label="Nominatimに問い合わせ(オフでキャッシュのみ使用)", value=True)
|
| 233 |
+
line_width = gr.Slider(0.2, 2.0, value=0.6, step=0.1, label="境界線の太さ")
|
| 234 |
+
|
| 235 |
+
run_btn = gr.Button("描画")
|
| 236 |
+
|
| 237 |
+
out_img = gr.Image(label="静的地図(matplotlib)", type="pil")
|
| 238 |
+
out_html = gr.HTML(label="インタラクティブ地図(folium)")
|
| 239 |
+
out_info = gr.Textbox(label="メタ情報", lines=4)
|
| 240 |
+
out_table = gr.Dataframe(label="ジオコーディング結果(住所・緯度・経度・CF)", wrap=True)
|
| 241 |
+
out_err = gr.Markdown(label="エラー", visible=True)
|
| 242 |
+
|
| 243 |
+
def _parse(x):
|
| 244 |
+
try:
|
| 245 |
+
return int(x)
|
| 246 |
+
except Exception:
|
| 247 |
+
return x
|
| 248 |
+
|
| 249 |
+
def app_run(zipf, xls, s, h, a, p, inet, lw):
|
| 250 |
+
return run(
|
| 251 |
+
zipf, xls, s, int(h), _parse(a), _parse(p), inet, lw
|
| 252 |
+
)
|
| 253 |
+
|
| 254 |
+
run_btn.click(
|
| 255 |
+
fn=app_run,
|
| 256 |
+
inputs=[zip_in, xlsx_in, sheet, header_row, address_col, power_col, use_inet, line_width],
|
| 257 |
+
outputs=[out_img, out_html, out_info, out_table, out_err],
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
if __name__ == "__main__":
|
| 261 |
+
demo.launch()
|