hiroki0008 commited on
Commit
3fa0fec
·
verified ·
1 Parent(s): aa937e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -4
app.py CHANGED
@@ -83,6 +83,39 @@ def download_one(session: requests.Session, url: str, outdir: str, pref: str) ->
83
  if chunk:
84
  f.write(chunk)
85
  return path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
 
88
  # ---- 3段ヘッダ → 1枚目のみ採用/他はスキップ行数で読込 ----------------------
@@ -108,10 +141,29 @@ def load_excel_first(xls_path: str, sheet_pref: str | None) -> tuple[pd.DataFram
108
  )
109
  # 左端の余計な列を削除
110
  df = df.iloc[:, 1:]
111
- # 前後空白トリム
112
- for c in df.select_dtypes(include=["object"]).columns:
113
- df[c] = df[c].str.strip()
114
- cols = list(df.columns) # MultiIndex のまま保持
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  return df, cols
116
 
117
  def load_excel_other(xls_path: str, sheet_pref: str | None, target_cols: list) -> pd.DataFrame | None:
 
83
  if chunk:
84
  f.write(chunk)
85
  return path
86
+
87
+ def choose_names_from_multiindex(mi: pd.MultiIndex) -> list[str]:
88
+ """
89
+ 3段ヘッダ(MultiIndex: 大,中,小)から列名を選ぶ。
90
+ 優先順: 中分類(第2段) → 小分類(第3段) → 大分類(第1段)。
91
+ 空/NaN/空白は無視。重複は .1, .2… を付与。
92
+ """
93
+ def clean(x) -> str:
94
+ if x is None:
95
+ return ""
96
+ s = str(x).strip()
97
+ return "" if s.lower() == "nan" else s
98
+
99
+ # 優先で選択
100
+ picked = []
101
+ for tpl in mi:
102
+ a = clean(tpl[0]) if len(tpl) >= 1 else "" # 大
103
+ b = clean(tpl[1]) if len(tpl) >= 2 else "" # 中
104
+ c = clean(tpl[2]) if len(tpl) >= 3 else "" # 小
105
+ name = b or c or a or "col" # ★ 中 > 小 > 大
106
+ picked.append(name)
107
+
108
+ # 重複解消
109
+ seen = {}
110
+ uniq = []
111
+ for n in picked:
112
+ if n not in seen:
113
+ seen[n] = 0
114
+ uniq.append(n)
115
+ else:
116
+ seen[n] += 1
117
+ uniq.append(f"{n}.{seen[n]}")
118
+ return uniq
119
 
120
 
121
  # ---- 3段ヘッダ → 1枚目のみ採用/他はスキップ行数で読込 ----------------------
 
141
  )
142
  # 左端の余計な列を削除
143
  df = df.iloc[:, 1:]
144
+ # 列名を選択(中 > 小 > 大)
145
+ if isinstance(df.columns, pd.MultiIndex):
146
+ chosen = choose_names_from_multiindex(df.columns)
147
+ else:
148
+ # 単層ヘッダの場合のフォールバック
149
+ raw = []
150
+ for c in df.columns:
151
+ s = "" if c is None else str(c).strip()
152
+ raw.append("" if s.lower() == "nan" else s)
153
+ raw = [r if r else "col" for r in raw]
154
+ seen = {}
155
+ chosen = []
156
+ for n in raw:
157
+ if n in seen:
158
+ seen[n] += 1
159
+ chosen.append(f"{n}.{seen[n]}")
160
+ else:
161
+ seen[n] = 0
162
+ chosen.append(n)
163
+
164
+ df.columns = chosen
165
+
166
+
167
  return df, cols
168
 
169
  def load_excel_other(xls_path: str, sheet_pref: str | None, target_cols: list) -> pd.DataFrame | None: