Spaces:

hiroki0008
/

test

Sleeping

App Files Files Community

hiroki0008 commited on Sep 15, 2025

Commit

ee18469

verified ·

1 Parent(s): db0cf7a

Update app.py

Browse files

Files changed (1) hide show

app.py +321 -287

app.py CHANGED Viewed

@@ -1,320 +1,354 @@
-# app.py  (Folium + 無料タイル / data:URL不使用 / File出力)
-# pip install folium gradio pandas numpy requests openpyxl
 import os
 import re
 import time
-import tempfile
 import requests
 import pandas as pd
-import numpy as np
-import gradio as gr
-# ----------------------------
-# 設定
-# ----------------------------
-GSI_USER_AGENT = os.environ.get(
-    "GSI_USER_AGENT",
-    "jp-gsi-geocoding-demo (contact: your_email@example.com)"  # 連絡先付き推奨
-)
-GSI_TIMEOUT_SEC = float(os.environ.get("GSI_TIMEOUT_SEC", "10"))
-GEOCODE_DELAY_SEC = float(os.environ.get("GSI_RATE_LIMIT_SEC", "0.0"))
-GSI_GEOCODE_URL = "https://msearch.gsi.go.jp/address-search/AddressSearch"
-CACHE_DIR = "data/cache"
-os.makedirs(CACHE_DIR, exist_ok=True)
-CACHE_PATH = os.path.join(CACHE_DIR, "geocode_cache.csv")
-# ----------------------------
-# キャッシュ
-# ----------------------------
-def load_cache():
-    if os.path.exists(CACHE_PATH):
         try:
-            df = pd.read_csv(CACHE_PATH)
-            need = {"address_input", "lat", "lon", "CF"}
-            if need.issubset(df.columns):
-                df["CF"]  = pd.to_numeric(df["CF"], errors="coerce")
-                df["lat"] = pd.to_numeric(df["lat"], errors="coerce")
-                df["lon"] = pd.to_numeric(df["lon"], errors="coerce")
-                return df
         except Exception:
-            pass
-    return pd.DataFrame(columns=["address_input", "lat", "lon", "CF"])
-def save_cache(df_cache):
     try:
-        df_cache.to_csv(CACHE_PATH, index=False)
     except Exception:
-        pass
-# ----------------------------
-# 国土地理院 ジオコーダ
-# ----------------------------
-def make_gsi_session() -> requests.Session:
-    s = requests.Session()
-    s.headers.update({"User-Agent": GSI_USER_AGENT})
     return s
-def gsi_geocode_once(address: str, session: requests.Session) -> tuple[float, float]:
     """
-    国土地理院 住所検索APIを1回呼び出し、(lat, lon) を返す（失敗時は (nan, nan)）。
-    APIは [lon, lat] を返すため、順を入れ替える。
     """
-    try:
-        if not address or str(address).strip() == "" or str(address).strip().lower() in ("nan", "none"):
-            return (np.nan, np.nan)
-        resp = session.get(GSI_GEOCODE_URL, params={"q": address}, timeout=GSI_TIMEOUT_SEC)
-        if not resp.ok:
-            return (np.nan, np.nan)
-        data = resp.json()
-        if isinstance(data, list) and len(data) > 0:
-            feat = data[0]
-            coords = (feat.get("geometry") or {}).get("coordinates") or []
-            if isinstance(coords, (list, tuple)) and len(coords) >= 2:
-                lon, lat = float(coords[0]), float(coords[1])
-                return (lat, lon)
-    except Exception:
-        pass
-    return (np.nan, np.nan)
-def geocode_with_cache(addresses, CFs, use_internet=True):
-    cache = load_cache()
-    cache_map = {row["address_input"]: (row["lat"], row["lon"], row["CF"]) for _, row in cache.iterrows()}
-    results = []
-    session = make_gsi_session() if use_internet else None
-    for a, cf in zip(addresses, CFs):
-        a = "" if (a is None or (isinstance(a, float) and np.isnan(a))) else str(a).strip()
-        cf_num = pd.to_numeric(cf, errors="coerce")
-        # cache hit
-        if a in cache_map:
-            lat, lon, _cached_cf = cache_map[a]
-            if pd.notna(lat) and pd.notna(lon):
-                results.append({"address_input": a, "CF": cf_num, "lat": float(lat), "lon": float(lon)})
-                continue
-        if not use_internet:
-            results.append({"address_input": a, "CF": cf_num, "lat": np.nan, "lon": np.nan})
-            continue
-        lat, lon = gsi_geocode_once(a, session)
-        if GEOCODE_DELAY_SEC > 0:
-            time.sleep(GEOCODE_DELAY_SEC)
-        # キャッシュ更新
-        cache = cache[cache["address_input"] != a]
-        cache = pd.concat(
-            [cache, pd.DataFrame([{"address_input": a, "lat": lat, "lon": lon, "CF": cf_num}])],
-            ignore_index=True
-        )
-        save_cache(cache)
-        results.append({"address_input": a, "CF": cf_num, "lat": lat, "lon": lon})
-    df = pd.DataFrame(results)
-    df["lat"] = pd.to_numeric(df["lat"], errors="coerce")
-    df["lon"] = pd.to_numeric(df["lon"], errors="coerce")
-    df["CF"]  = pd.to_numeric(df["CF"], errors="coerce")
-    return df
-# ----------------------------
-# Folium 地図生成（無料タイル）
-# ----------------------------
-import folium
-TILE_CATALOG = {
-    "GSI 標準地図": "https://cyberjapandata.gsi.go.jp/xyz/std/{z}/{x}/{y}.png",
-    "GSI 淡色地図": "https://cyberjapandata.gsi.go.jp/xyz/pale/{z}/{x}/{y}.png",
-    "GSI 写真（シームレス）": "https://cyberjapandata.gsi.go.jp/xyz/seamlessphoto/{z}/{x}/{y}.jpg",
-    "OpenStreetMap": "https://tile.openstreetmap.org/{z}/{x}/{y}.png",
-}
-def _build_folium_map_html(df_points: pd.DataFrame, base_name: str) -> str:
-    df_valid = df_points.dropna(subset=["lat", "lon"]).copy()
-    if df_valid.empty:
-        center_lat, center_lon, zoom = 35.0, 135.0, 4
-    else:
-        center_lat = float(df_valid["lat"].median())
-        center_lon = float(df_valid["lon"].median())
-        zoom = 6
-    # ベースマップ（複数切替）
-    m = folium.Map(location=[center_lat, center_lon], zoom_start=zoom, control_scale=True, tiles=None)
-    for name, url in TILE_CATALOG.items():
-        folium.TileLayer(
-            tiles=url,
-            name=name,
-            attr=f"© {name}",
-            overlay=False,
-            control=True,
-            max_zoom=20,
-        ).add_to(m)
-    # マーカー（CF でサイズ可変）
-    if "CF" in df_valid.columns and df_valid["CF"].notna().any():
-        cf = df_valid["CF"].clip(lower=0)
-        cf_norm = (cf - cf.min()) / (cf.max() - cf.min() + 1e-9)
-        sizes = (cf_norm * 12 + 3).fillna(6).tolist()
-    else:
-        sizes = [6] * len(df_valid)
-    for (_, row), r in zip(df_valid.iterrows(), sizes):
-        lat, lon = float(row["lat"]), float(row["lon"])
-        addr = str(row.get("address_input", ""))
-        cfv  = row.get("CF", np.nan)
-        popup_html = f"<b>住所:</b> {addr}<br><b>CF:</b> {'' if pd.isna(cfv) else cfv}"
-        folium.CircleMarker(
-            location=(lat, lon),
-            radius=float(r),
-            weight=1,
-            color="#117a8b",
-            fill=True,
-            fill_opacity=0.8,
-            fill_color="#12939A",
-            popup=folium.Popup(popup_html, max_width=260),
-        ).add_to(m)
-    folium.LayerControl(position="topright").add_to(m)
-    return m.get_root().render()
-def _rewrite_leaflet_cdn(html_text: str, host: str) -> str:
     """
-    Folium が出力する Leaflet の CDN（通常 jsDelivr）を、必要に応じて置換。
-    SRI不整合を避けるため integrity/crossorigin を除去する。
     """
-    # integrity / crossorigin を削除（SRIミスマッチ回避）
-    html_text = re.sub(r'\sintegrity="[^"]+"', "", html_text)
-    html_text = re.sub(r'\scrossorigin="[^"]+"', "", html_text)
-    if host == "jsdelivr":
-        return html_text  # 置換しない
-    elif host == "cdnjs":
-        html_text = html_text.replace(
-            "https://cdn.jsdelivr.net/npm/leaflet@", "https://cdnjs.cloudflare.com/ajax/libs/leaflet/"
-        )
-        html_text = html_text.replace("/dist/leaflet.css", "/leaflet.css")
-        html_text = html_text.replace("/dist/leaflet.js",  "/leaflet.js")
-        return html_text
-    elif host == "unpkg":
-        html_text = html_text.replace(
-            "https://cdn.jsdelivr.net/npm/", "https://unpkg.com/"
-        )
-        return html_text
     else:
-        return html_text
-def _save_map_html_file(html_text: str) -> str:
-    """地図HTMLを実ファイルに保存（Gradio Fileに渡すパスを返す）"""
-    fd, path = tempfile.mkstemp(suffix=".html")
-    os.close(fd)
-    with open(path, "w", encoding="utf-8") as f:
-        f.write(html_text)
-    return path
-# ----------------------------
-# 実行パイプライン
-# ----------------------------
-def _parse_indexer(x):
-    try:
-        return int(x)
-    except Exception:
-        return x
-def run(excel_file, sheet_name, header_row, address_col, power_col, use_inet, base_name, leaflet_cdn):
-    # Excel 読み込み
-    if excel_file is None or not hasattr(excel_file, "name"):
-        table_df = pd.DataFrame(columns=["address_input", "CF", "lat", "lon"])
-        return ("Excelファイルを指定してください。", table_df, "", None)
     try:
-        df = pd.read_excel(excel_file.name, sheet_name=sheet_name, header=int(header_row))
     except Exception as e:
-        empty_df = pd.DataFrame(columns=["address_input", "CF", "lat", "lon"])
-        return (f"Excel の読み込みに失敗しました: {e}", empty_df, "", None)
-    # 列参照（番号/名前の両対応）
-    addr_series = df.iloc[:, address_col] if isinstance(address_col, int) else df[address_col]
-    cf_series   = df.iloc[:, power_col]   if isinstance(power_col,   int) else df[power_col]
-    addresses = addr_series.astype(str).tolist()
-    cfs       = cf_series.tolist()
-    # ジオコーディング
-    geo_df = geocode_with_cache(addresses, cfs, use_internet=bool(use_inet))
-    table_df = geo_df[["address_input", "CF", "lat", "lon"]].copy()
-    # 地図HTML生成 → CDN書換 → 実ファイル保存 → File出力
-    try:
-        html_text = _build_folium_map_html(table_df, base_name=base_name)
-        html_text = _rewrite_leaflet_cdn(html_text, host=leaflet_cdn)
-        map_file_path = _save_map_html_file(html_text)
-        msg = (
-            "✅ 地図HTMLを生成しました。下の **地図HTMLファイル** をクリックして新規タブで開いてください。\n"
-            "（埋め込みではなく実ファイル配信なので、CSPが厳しい環境でも表示できるはずです）"
-        )
-        info = f"ポイント数（有効座標）: {int(table_df[['lat','lon']].dropna().shape[0])} / {len(table_df)}"
-        return (msg, table_df, info, map_file_path)
-    except Exception as e:
-        return (f"地図描画に失敗しました: {e}", table_df, "", None)
-# ----------------------------
-# Gradio UI
-# ----------------------------
-with gr.Blocks(title="Excel住所 → Folium（無料タイル・File配信）") as demo:
     gr.Markdown(
-        "## Excelの住所を国土地理院APIでジオコーディング → Folium（Leaflet）で地図表示（無料タイル・Mapbox不要）\n"
-        "- 地図は **実ファイル(.html)** として配信します（CSPが厳しい環境でもOK）。\n"
-        "- タイル＝地理院/OSM、CDNは必要に応じて切替できます。"
     )
-    with gr.Row():
-        xlsx_in = gr.File(label="Excelファイル（住所付き）", file_count="single", file_types=[".xlsx", ".xls"])
     with gr.Row():
-        sheet = gr.Textbox(label="シート名", value="認定設備")
-        header_row = gr.Number(label="ヘッダー行番号（0始まり）", value=2, precision=0)
-    with gr.Row():
-        address_col = gr.Textbox(label="住所列（列名 or 0始まり列番号）", value="発電設備の所在地")
-        power_col   = gr.Textbox(label="数値列（任意：列名 or 0始まり列番号）", value="発電出力（kW）")
     with gr.Row():
-        use_inet   = gr.Checkbox(label="国土地理院APIに問い合わせ（オフでキャッシュのみ使用）", value=True)
-        base_name  = gr.Dropdown(choices=list(TILE_CATALOG.keys()), value="GSI 標準地図", label="ベースマップ")
-        leaflet_cdn = gr.Dropdown(
-            choices=["jsdelivr", "cdnjs", "unpkg"], value="jsdelivr",
-            label="Leaflet CDN（遮断時に切替）"
-        )
-    run_btn = gr.Button("描画")
-    out_html  = gr.HTML(label="案内メッセージ")
-    out_table = gr.Dataframe(label="ジオコーディング結果（住所・緯度・経度・CF）", wrap=True)
-    out_info  = gr.Textbox(label="メタ情報", lines=2)
-    out_file  = gr.File(label="地図HTMLファイル（クリックで開く／ダウンロード）")
-    def _parse(x):
-        try:
-            return int(x)
-        except Exception:
-            return x
-    def app_run(xls, s, h, a, p, inet, base, cdn):
-        return run(
-            xls, s, int(h), _parse(a), _parse(p), inet, base, cdn
-        )
     run_btn.click(
-        fn=app_run,
-        inputs=[xlsx_in, sheet, header_row, address_col, power_col, use_inet, base_name, leaflet_cdn],
-        outputs=[out_html, out_table, out_info, out_file],
     )
 if __name__ == "__main__":
-    demo.launch()

 import os
 import re
 import time
+import zipfile
+import unicodedata
+from urllib.parse import urljoin, urlparse, parse_qs, unquote
+import gradio as gr
 import requests
 import pandas as pd
+from bs4 import BeautifulSoup
+PUBLIC_URL = "https://www.fit-portal.go.jp/PublicInfo"
+OUTDIR = "data_fit"
+# -------------------- ユーティリティ --------------------
+def normalize_filename(name: str) -> str:
+    name = unicodedata.normalize("NFKC", name)
+    name = re.sub(r'[\\/:*?"<>|]+', "_", name)
+    name = name.strip()
+    return name or "file"
+def guess_filename_from_headers(resp: requests.Response, fallback: str) -> str:
+    cd = resp.headers.get("Content-Disposition", "")
+    m = re.search(r'filename\*?=(?:UTF-8\'\')?"?([^";]+)"?', cd, flags=re.IGNORECASE)
+    if m:
         try:
+            fn = unquote(m.group(1))
         except Exception:
+            fn = m.group(1)
+        return normalize_filename(fn)
+    return normalize_filename(fallback)
+def is_pref_link(a_tag) -> bool:
+    href = a_tag.get("href") or ""
+    return "servlet.FileDownload" in href and "file=" in href
+def extract_pref_name(a_tag) -> str:
+    txt = (a_tag.get_text() or "").strip()
+    return txt or "pref"
+def pick_sheet_name(xls_path: str, preferred: str | None) -> str | None:
     try:
+        xl = pd.ExcelFile(xls_path)
+        if preferred and preferred in xl.sheet_names:
+            return preferred
+        # 一般的に「代表地番」を優先
+        for candidate in ["代表地番", "代表地番のみ", "代表地番シート"]:
+            if candidate in xl.sheet_names:
+                return candidate
+        return xl.sheet_names[0] if xl.sheet_names else None
     except Exception:
+        return None
+def collect_pref_links(session: requests.Session) -> list[dict]:
+    r = session.get(PUBLIC_URL, timeout=60)
+    r.raise_for_status()
+    soup = BeautifulSoup(r.text, "html.parser")
+    links = []
+    for a in soup.find_all("a"):
+        if is_pref_link(a):
+            links.append({"pref": extract_pref_name(a), "href": urljoin(PUBLIC_URL, a.get("href"))})
+    # 重複除去
+    seen, uniq = set(), []
+    for item in links:
+        key = (item["pref"], item["href"])
+        if key not in seen:
+            seen.add(key)
+            uniq.append(item)
+    return uniq
+def download_one(session: requests.Session, url: str, outdir: str, pref: str) -> str:
+    os.makedirs(outdir, exist_ok=True)
+    qs = parse_qs(urlparse(url).query)
+    file_id = (qs.get("file", ["unknown"])[0])[:18]
+    with session.get(url, timeout=180, stream=True) as r:
+        r.raise_for_status()
+        fname = guess_filename_from_headers(r, f"{pref}_{file_id}.xlsx")
+        path = os.path.join(outdir, fname)
+        with open(path, "wb") as f:
+            for chunk in r.iter_content(chunk_size=1 << 15):
+                if chunk:
+                    f.write(chunk)
+    return path
+# -------------------- 列名選択: 小分類 > 中分類 > 大分類 --------------------
+def _clean_cell(x) -> str:
+    if x is None:
+        return ""
+    s = str(x).strip()
+    if s.lower() == "nan":
+        return ""
     return s
+def choose_names_from_multiindex(mi: pd.MultiIndex) -> list[str]:
     """
+    3段ヘッダ(MultiIndex)から列名を選ぶ。
+    ルール: 小分類(第3段)に値があればそれ、無ければ中分類(第2段)、
+           それも無ければ大分類(第1段)。すべて空なら 'col'。
+    最後に重複を .1, .2… で解消。
     """
+    names = []
+    for tpl in mi:
+        # tpl は (大, 中, 小) 想定
+        if len(tpl) < 3:
+            # 念のため不足時の安全対策
+            a = _clean_cell(tpl[0]) if len(tpl) >= 1 else ""
+            b = _clean_cell(tpl[1]) if len(tpl) >= 2 else ""
+            c = ""
+        else:
+            a, b, c = (_clean_cell(tpl[0]), _clean_cell(tpl[1]), _clean_cell(tpl[2]))
+        name = c or b or a or "col"
+        names.append(name)
+    # 重複解消
+    seen = {}
+    out = []
+    for n in names:
+        if n not in seen:
+            seen[n] = 0
+            out.append(n)
+        else:
+            seen[n] += 1
+            out.append(f"{n}.{seen[n]}")
+    return out
+# -------------------- 読み込みルール --------------------
+# 0行目は削除し、1/2/3行目をヘッダ（= header=[1,2,3]）
+HEADER_ROWS = [1, 2, 3]
+# 2枚目以降は 0〜3行目をスキップ（= skiprows=4）、header=None でデータのみ
+SKIP_ROWS_OTHERS = 4
+def load_excel_first(xls_path: str, sheet_pref: str | None) -> tuple[pd.DataFrame, list[str]]:
     """
+    1枚目:
+      - header=[1,2,3] で3段ヘッダを読み込み（0行目は自動的に使われない）
+      - 左端の列を削除
+      - MultiIndex から列名を「小＞中＞大」の優先で単一行に変換
+    戻り値: (df, chosen_names)
     """
+    sheet = pick_sheet_name(xls_path, sheet_pref)
+    if not sheet:
+        raise RuntimeError("シートが見つかりません")
+    df = pd.read_excel(
+        xls_path,
+        sheet_name=sheet,
+        engine="openpyxl",
+        header=HEADER_ROWS,
+        dtype=str
+    )
+    # 左端の列を削除
+    df = df.iloc[:, 1:]
+    # 前後空白トリム
+    for c in df.select_dtypes(include=["object"]).columns:
+        df[c] = df[c].str.strip()
+    # 列名を選択
+    if isinstance(df.columns, pd.MultiIndex):
+        chosen = choose_names_from_multiindex(df.columns)
     else:
+        # 念のため単層だった場合もクリーニング＆重複解消
+        raw = [_clean_cell(c) or "col" for c in df.columns]
+        seen = {}
+        chosen = []
+        for n in raw:
+            if n not in seen:
+                seen[n] = 0
+                chosen.append(n)
+            else:
+                seen[n] += 1
+                chosen.append(f"{n}.{seen[n]}")
+    df.columns = chosen
+    return df, chosen
+def load_excel_other(xls_path: str, sheet_pref: str | None, target_cols: list[str]) -> pd.DataFrame | None:
+    """
+    2枚目以降:
+      - skiprows=4, header=None でデータのみ
+      - 左端の列を削除
+      - 列数が合わなければ切り詰め/ダミー列追加で合わせる
+      - 列名を 1枚目の chosen に置換
+    """
+    sheet = pick_sheet_name(xls_path, sheet_pref)
+    if not sheet:
+        return None
+    df = pd.read_excel(
+        xls_path,
+        sheet_name=sheet,
+        engine="openpyxl",
+        header=None,
+        skiprows=SKIP_ROWS_OTHERS,
+        dtype=str
+    )
+    # 左端の列を削除
+    df = df.iloc[:, 1:]
+    # 前後空白トリム
+    for c in df.select_dtypes(include=["object"]).columns:
+        df[c] = df[c].str.strip()
+    # 列数調整
+    if df.shape[1] != len(target_cols):
+        print(f"[WARN] 列数不一致: file={os.path.basename(xls_path)} "
+              f"read={df.shape[1]} vs target={len(target_cols)} -> 自動調整")
+        if df.shape[1] > len(target_cols):
+            df = df.iloc[:, :len(target_cols)]
+        else:
+            # 足りないときは None 列を追加
+            for k in range(len(target_cols) - df.shape[1]):
+                df[f"_pad_{k}"] = None
+            df = df.iloc[:, :len(target_cols)]
+    df.columns = target_cols
+    return df
+def zip_paths(paths: list[str], out_zip: str) -> str:
+    with zipfile.ZipFile(out_zip, "w", compression=zipfile.ZIP_DEFLATED) as z:
+        for p in paths:
+            if os.path.exists(p):
+                z.write(p, arcname=os.path.basename(p))
+    return out_zip
+# -------------------- メイン実行（Gradioから呼ぶ） --------------------
+def run_job(sheet_name, sleep_sec, limit, re_download, progress=gr.Progress(track_tqdm=False)):
+    progress(0, desc="初期化中…")
+    session = requests.Session()
+    session.headers.update({
+        "User-Agent": "Mozilla/5.0 (compatible; FITCollector/1.3; +https://huggingface.co/spaces)",
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+    })
+    # 1) リンク収集
+    links = collect_pref_links(session)
+    if not links:
+        return ("都道府県ファイルのリンク検出に失敗しました。ページ構成の変更/一時的な制限の可能性があります。",
+                None, None, None, None)
+    if limit and limit > 0:
+        links = links[:int(limit)]
+    progress(0.1, desc=f"リンク検出 {len(links)} 件")
+    # 2) ダウンロード
+    downloaded = []
+    for i, item in enumerate(links, start=1):
+        progress(0.1 + 0.6 * i / max(1, len(links)),
+                 desc=f"ダウンロード {i}/{len(links)}: {item['pref']}")
+        try:
+            existing = None
+            if not re_download and os.path.isdir(OUTDIR):
+                for fn in os.listdir(OUTDIR):
+                    if fn.lower().endswith(".xlsx") and item["pref"] in fn:
+                        existing = os.path.join(OUTDIR, fn)
+                        break
+            if existing and os.path.exists(existing):
+                path = existing
+            else:
+                path = download_one(session, item["href"], OUTDIR, item["pref"])
+                time.sleep(float(sleep_sec))
+            downloaded.append(path)
+        except Exception as e:
+            print(f"[WARN] ダウンロード失敗: {item['pref']} {e}")
+    if not downloaded:
+        return ("ダウンロードに失敗しました。", None, None, None, None)
+    # 3) 読み込み（1枚目で列名確定）
+    progress(0.75, desc="1枚目を読み込み（列名を確定）")
+    first_path = downloaded[0]
     try:
+        df0, cols0 = load_excel_first(first_path, sheet_name if sheet_name else None)
     except Exception as e:
+        return (f"1枚目の読み込みに失敗しました: {os.path.basename(first_path)} / {e}",
+                None, None, None, None)
+    frames = [df0]
+    # 4) 読み込み（2枚目以降）
+    for j, p in enumerate(downloaded[1:], start=2):
+        progress(0.75 + 0.25 * (j - 1) / max(1, len(downloaded) - 1),
+                 desc=f"{j}枚目を読み込み")
+        df = load_excel_other(p, sheet_name if sheet_name else None, cols0)
+        if df is not None and len(df) > 0:
+            frames.append(df)
+        else:
+            print(f"[WARN] 読み込みスキップ: {os.path.basename(p)}")
+    # 5) 縦結合
+    combined = pd.concat(frames, ignore_index=True)
+    # 6) 出力
+    os.makedirs(OUTDIR, exist_ok=True)
+    out_xlsx = os.path.join(OUTDIR, "combined_fit.xlsx")
+    out_parq = os.path.join(OUTDIR, "combined_fit.parquet")
+    with pd.ExcelWriter(out_xlsx, engine="openpyxl") as w:
+        combined.to_excel(w, index=False, sheet_name="combined")
+    combined.to_parquet(out_parq, index=False)
+    # 7) ZIP（取得ファイル一式）
+    raw_zip = os.path.join(OUTDIR, "raw_excels.zip")
+    zip_paths(downloaded, raw_zip)
+    # 8) プレビュー
+    preview_csv = os.path.join(OUTDIR, "combined_head.csv")
+    combined.head(1000).to_csv(preview_csv, index=False)
+    progress(1.0, desc=f"完了（{len(combined):,} 行）")
+    msg = (
+        f"✅ 結合完了: 行数 = {len(combined):,}\n"
+        f"・Excel: combined_fit.xlsx\n"
+        f"・Parquet: combined_fit.parquet\n"
+        f"・Raw ZIP: raw_excels.zip\n"
+        f"・プレビュー: combined_head.csv\n"
+        f"・列名は『小分類＞中分類＞大分類』の優先で単一行化（結合は不実施）"
+    )
+    return (msg, out_xlsx, out_parq, raw_zip, preview_csv)
+# -------------------- Gradio UI --------------------
+with gr.Blocks(title="FIT 公表（都道府県別Excel）一括取得＆結合") as demo:
     gr.Markdown(
+        """
+        # FIT 公表（都道府県別Excel）一括取得 & 結合
+        **列名ポリシー**:
+        - 1枚目: 0行目を使わず、1/2/3行目をヘッダとして読み込み（3段）。
+        - 列名は **小分類に値があれば小分類、無ければ中分類のみ**（結合しません）。
+        - 2枚目以降: 0〜3行目をスキップし、データのみ読み込み。
+        - すべてのファイルで **左端の列は削除**。
+        - ファイル名／シート名などのメタ列は付与しません。
+        """
     )
     with gr.Row():
+        sheet = gr.Textbox(label="読み込むシート名（空欄=自動）", placeholder="例）代表地番 / 全地番")
+        sleep = gr.Slider(0.0, 5.0, value=1.0, step=0.1, label="ダウンロード間隔（秒）")
     with gr.Row():
+        limit = gr.Number(value=None, precision=0, label="先頭N県のみ（テスト用・空欄は全県）")
+        reget = gr.Checkbox(label="既存ファイルがあっても再ダウンロードする", value=False)
+    run_btn = gr.Button("実行", variant="primary")
+    out_msg = gr.Markdown()
+    out_xlsx = gr.File(label="結合Excel（combined_fit.xlsx）")
+    out_parq = gr.File(label="結合Parquet（combined_fit.parquet）")
+    out_zip  = gr.File(label="取得した都道府県Excel一式（zip）")
+    out_preview = gr.File(label="先頭1000行プレビュー（CSV）")
     run_btn.click(
+        fn=run_job,
+        inputs=[sheet, sleep, limit, reget],
+        outputs=[out_msg, out_xlsx, out_parq, out_zip, out_preview]
     )
 if __name__ == "__main__":
+    demo.queue(max_size=20).launch()