hiroki0008 commited on
Commit
9e10dc0
·
verified ·
1 Parent(s): 7c5a3be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -3
app.py CHANGED
@@ -89,12 +89,23 @@ def download_one(session: requests.Session, url: str, outdir: str, pref: str) ->
89
  f.write(chunk)
90
  return path
91
 
92
- def load_excel(xls_path: str, sheet_pref: str | None, pref_name: str) -> pd.DataFrame | None:
 
 
 
 
93
  sheet = pick_sheet_name(xls_path, sheet_pref)
94
  if not sheet:
95
  return None
96
  try:
97
- df = pd.read_excel(xls_path, sheet_name=sheet, engine="openpyxl", dtype=str)
 
 
 
 
 
 
 
98
  # 前後空白トリム
99
  for c in df.select_dtypes(include=["object"]).columns:
100
  df[c] = df[c].str.strip()
@@ -105,6 +116,7 @@ def load_excel(xls_path: str, sheet_pref: str | None, pref_name: str) -> pd.Data
105
  except Exception:
106
  return None
107
 
 
108
  def zip_paths(paths: list[str], out_zip: str) -> str:
109
  with zipfile.ZipFile(out_zip, "w", compression=zipfile.ZIP_DEFLATED) as z:
110
  for p in paths:
@@ -173,10 +185,18 @@ def run_job(sheet_name, sleep_sec, limit, re_download, progress=gr.Progress(trac
173
  for i, it in enumerate(downloaded, start=1):
174
  progress(0.72 + 0.18 * i / max(1, len(downloaded)),
175
  desc=f"読み込み {i}/{len(downloaded)}: {os.path.basename(it['path'])}")
176
- df = load_excel(it["path"], sheet_name if sheet_name else None, it["pref"])
 
 
 
177
  if df is not None and len(df) > 0:
178
  frames.append(df)
179
 
 
 
 
 
 
180
  if not frames:
181
  return ("Excelは取得できましたが、読み込めるデータがありませんでした(シート名の指定を見直してください)。",
182
  None, None, None, None)
 
89
  f.write(chunk)
90
  return path
91
 
92
+ def load_excel(xls_path: str, sheet_pref: str | None, pref_name: str, use_header: bool) -> pd.DataFrame | None:
93
+ """
94
+ use_header=True のときのみ上3行を列名として使用
95
+ use_header=False のときは列名なし(skiprows=3)
96
+ """
97
  sheet = pick_sheet_name(xls_path, sheet_pref)
98
  if not sheet:
99
  return None
100
  try:
101
+ if use_header:
102
+ df = pd.read_excel(xls_path, sheet_name=sheet, engine="openpyxl",
103
+ header=[0,1,2], dtype=str)
104
+ else:
105
+ # 3行スキップしてデータだけ読み込む(列名は後で統一)
106
+ df = pd.read_excel(xls_path, sheet_name=sheet, engine="openpyxl",
107
+ header=None, skiprows=3, dtype=str)
108
+
109
  # 前後空白トリム
110
  for c in df.select_dtypes(include=["object"]).columns:
111
  df[c] = df[c].str.strip()
 
116
  except Exception:
117
  return None
118
 
119
+
120
  def zip_paths(paths: list[str], out_zip: str) -> str:
121
  with zipfile.ZipFile(out_zip, "w", compression=zipfile.ZIP_DEFLATED) as z:
122
  for p in paths:
 
185
  for i, it in enumerate(downloaded, start=1):
186
  progress(0.72 + 0.18 * i / max(1, len(downloaded)),
187
  desc=f"読み込み {i}/{len(downloaded)}: {os.path.basename(it['path'])}")
188
+ df = load_excel(it["path"],
189
+ sheet_name if sheet_name else None,
190
+ it["pref"],
191
+ use_header=(i==1))
192
  if df is not None and len(df) > 0:
193
  frames.append(df)
194
 
195
+ # 2件目以降は列名が無いため、1件目の列名を上書き
196
+ if len(frames) > 1:
197
+ frames[1:] = [f.set_axis(frames[0].columns, axis=1) for f in frames[1:]]
198
+
199
+
200
  if not frames:
201
  return ("Excelは取得できましたが、読み込めるデータがありませんでした(シート名の指定を見直してください)。",
202
  None, None, None, None)