Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -89,12 +89,23 @@ def download_one(session: requests.Session, url: str, outdir: str, pref: str) ->
|
|
| 89 |
f.write(chunk)
|
| 90 |
return path
|
| 91 |
|
| 92 |
-
def load_excel(xls_path: str, sheet_pref: str | None, pref_name: str) -> pd.DataFrame | None:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
sheet = pick_sheet_name(xls_path, sheet_pref)
|
| 94 |
if not sheet:
|
| 95 |
return None
|
| 96 |
try:
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
# 前後空白トリム
|
| 99 |
for c in df.select_dtypes(include=["object"]).columns:
|
| 100 |
df[c] = df[c].str.strip()
|
|
@@ -105,6 +116,7 @@ def load_excel(xls_path: str, sheet_pref: str | None, pref_name: str) -> pd.Data
|
|
| 105 |
except Exception:
|
| 106 |
return None
|
| 107 |
|
|
|
|
| 108 |
def zip_paths(paths: list[str], out_zip: str) -> str:
|
| 109 |
with zipfile.ZipFile(out_zip, "w", compression=zipfile.ZIP_DEFLATED) as z:
|
| 110 |
for p in paths:
|
|
@@ -173,10 +185,18 @@ def run_job(sheet_name, sleep_sec, limit, re_download, progress=gr.Progress(trac
|
|
| 173 |
for i, it in enumerate(downloaded, start=1):
|
| 174 |
progress(0.72 + 0.18 * i / max(1, len(downloaded)),
|
| 175 |
desc=f"読み込み {i}/{len(downloaded)}: {os.path.basename(it['path'])}")
|
| 176 |
-
df = load_excel(it["path"],
|
|
|
|
|
|
|
|
|
|
| 177 |
if df is not None and len(df) > 0:
|
| 178 |
frames.append(df)
|
| 179 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
if not frames:
|
| 181 |
return ("Excelは取得できましたが、読み込めるデータがありませんでした(シート名の指定を見直してください)。",
|
| 182 |
None, None, None, None)
|
|
|
|
| 89 |
f.write(chunk)
|
| 90 |
return path
|
| 91 |
|
| 92 |
+
def load_excel(xls_path: str, sheet_pref: str | None, pref_name: str, use_header: bool) -> pd.DataFrame | None:
|
| 93 |
+
"""
|
| 94 |
+
use_header=True のときのみ上3行を列名として使用
|
| 95 |
+
use_header=False のときは列名なし(skiprows=3)
|
| 96 |
+
"""
|
| 97 |
sheet = pick_sheet_name(xls_path, sheet_pref)
|
| 98 |
if not sheet:
|
| 99 |
return None
|
| 100 |
try:
|
| 101 |
+
if use_header:
|
| 102 |
+
df = pd.read_excel(xls_path, sheet_name=sheet, engine="openpyxl",
|
| 103 |
+
header=[0,1,2], dtype=str)
|
| 104 |
+
else:
|
| 105 |
+
# 3行スキップしてデータだけ読み込む(列名は後で統一)
|
| 106 |
+
df = pd.read_excel(xls_path, sheet_name=sheet, engine="openpyxl",
|
| 107 |
+
header=None, skiprows=3, dtype=str)
|
| 108 |
+
|
| 109 |
# 前後空白トリム
|
| 110 |
for c in df.select_dtypes(include=["object"]).columns:
|
| 111 |
df[c] = df[c].str.strip()
|
|
|
|
| 116 |
except Exception:
|
| 117 |
return None
|
| 118 |
|
| 119 |
+
|
| 120 |
def zip_paths(paths: list[str], out_zip: str) -> str:
|
| 121 |
with zipfile.ZipFile(out_zip, "w", compression=zipfile.ZIP_DEFLATED) as z:
|
| 122 |
for p in paths:
|
|
|
|
| 185 |
for i, it in enumerate(downloaded, start=1):
|
| 186 |
progress(0.72 + 0.18 * i / max(1, len(downloaded)),
|
| 187 |
desc=f"読み込み {i}/{len(downloaded)}: {os.path.basename(it['path'])}")
|
| 188 |
+
df = load_excel(it["path"],
|
| 189 |
+
sheet_name if sheet_name else None,
|
| 190 |
+
it["pref"],
|
| 191 |
+
use_header=(i==1))
|
| 192 |
if df is not None and len(df) > 0:
|
| 193 |
frames.append(df)
|
| 194 |
|
| 195 |
+
# 2件目以降は列名が無いため、1件目の列名を上書き
|
| 196 |
+
if len(frames) > 1:
|
| 197 |
+
frames[1:] = [f.set_axis(frames[0].columns, axis=1) for f in frames[1:]]
|
| 198 |
+
|
| 199 |
+
|
| 200 |
if not frames:
|
| 201 |
return ("Excelは取得できましたが、読み込めるデータがありませんでした(シート名の指定を見直してください)。",
|
| 202 |
None, None, None, None)
|