Spaces:
Sleeping
Sleeping
File size: 7,216 Bytes
dd63fcd f5fc7e8 dd63fcd f5fc7e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
import streamlit as st
from pathlib import Path
import pandas as pd
import re
import json
import warnings
from io import BytesIO
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")
# --- helper functions (kept from your script) ---
STYLE_PATTERNS = [re.compile(p, re.I) for p in [
r"^style[_\s\-]?id$", r"styleid", r"style[_\s\-]?code", r"^sku$"
]]
def find_styleid_column(columns):
for col in columns:
s = str(col)
for p in STYLE_PATTERNS:
if p.search(s):
return col
for col in columns:
low = str(col).lower()
if 'style' in low and 'id' in low:
return col
return None
def find_brand_size_start(columns):
for col in columns:
s = str(col).lower()
if "brand" in s and "size" in s:
return col
for col in columns:
if "size" in str(col).lower():
return col
return None
def unique_preserve_order(seq):
seen = set()
out = []
for x in seq:
if x not in seen:
seen.add(x)
out.append(x)
return out
# --- Streamlit UI ---
st.set_page_config(page_title="Size Chart Merger", layout="wide")
st.title("Size Chart \u2194 Product Details Merger")
st.write("Upload a Size Chart workbook and a Product Details workbook (matching sheet names). This app merges size columns into product details.")
col1, col2 = st.columns(2)
with col1:
size_file = st.file_uploader("Upload Size Chart Excel", type=["xlsx", "xlsm"], key="size")
with col2:
prod_file = st.file_uploader("Upload Product Details Excel", type=["xlsx", "xlsm"], key="prod")
output_name = st.text_input("Output filename (optional)", value="input.xlsx")
show_logs = st.checkbox("Show detailed log", value=True)
if st.button("Run merge"):
if size_file is None or prod_file is None:
st.error("Please upload both files before running the merge.")
else:
try:
size_xl = pd.ExcelFile(size_file, engine="openpyxl")
prod_xl = pd.ExcelFile(prod_file, engine="openpyxl")
size_sheets = size_xl.sheet_names
prod_sheets = prod_xl.sheet_names
product_dfs = {name: prod_xl.parse(name, dtype=str) for name in prod_sheets}
log = []
progress_bar = st.progress(0)
total = len(size_sheets)
for i, sheet_name in enumerate(size_sheets):
# update progress
progress_bar.progress(int((i / max(total, 1)) * 100))
st.write(f"Processing sheet: **{sheet_name}**")
if sheet_name not in prod_sheets:
log.append(f"Skipped '{sheet_name}': not present in Product Details.")
continue
size_df = size_xl.parse(sheet_name, dtype=str)
prod_df = product_dfs[sheet_name]
size_df.columns = [str(c) for c in size_df.columns]
prod_df.columns = [str(c) for c in prod_df.columns]
style_col_size = find_styleid_column(size_df.columns)
style_col_prod = find_styleid_column(prod_df.columns)
if style_col_size is None:
log.append(f"Sheet '{sheet_name}': could not detect style id in Size Chart.")
continue
if style_col_prod is None:
style_col_prod = style_col_size
prod_df[style_col_prod] = pd.NA
log.append(f"Sheet '{sheet_name}': Product Details missing style id; created '{style_col_prod}'.")
brand_size_col = find_brand_size_start(size_df.columns)
if brand_size_col is None:
log.append(f"Sheet '{sheet_name}': no size column found. Skipping.")
continue
size_cols = list(size_df.columns)
start_idx = size_cols.index(brand_size_col)
size_columns_to_merge = size_cols[start_idx:]
if brand_size_col not in prod_df.columns:
prod_df[brand_size_col] = pd.NA
log.append(f"Sheet '{sheet_name}': created '{brand_size_col}' in Product Details.")
for col in size_columns_to_merge:
if col not in prod_df.columns:
prod_df[col] = pd.NA
log.append(f"Sheet '{sheet_name}': inserted missing column '{col}'.")
long = size_df.melt(id_vars=[style_col_size], value_vars=size_columns_to_merge,
var_name="col_name", value_name="value")
long["value"] = long["value"].astype(str).str.strip()
invalid = long["value"].isin(["", "nan", "none", "na"])
long = long[~invalid]
if long.empty:
log.append(f"Sheet '{sheet_name}': no valid size entries to merge.")
continue
grouped = (
long.groupby([style_col_size, "col_name"])['value']
.apply(lambda s: json.dumps(unique_preserve_order(list(s)), ensure_ascii=False))
.reset_index()
)
pivot = grouped.pivot(index=style_col_size, columns="col_name", values="value")
pivot.reset_index(inplace=True)
pivot.rename(columns={style_col_size: style_col_prod}, inplace=True)
merged = prod_df.merge(pivot, on=style_col_prod, how="outer", suffixes=("", "_new"))
for col in size_columns_to_merge:
newcol = col + "_new"
if newcol in merged.columns:
merged[col] = merged[newcol].combine_first(merged[col])
merged.drop(columns=newcol, inplace=True)
product_dfs[sheet_name] = merged
log.append(f"Sheet '{sheet_name}': merged {pivot.shape[0]} style ids.")
progress_bar.progress(100)
# write result to an in-memory Excel file
output = BytesIO()
with pd.ExcelWriter(output, engine="xlsxwriter") as writer:
for name, df in product_dfs.items():
df.to_excel(writer, sheet_name=name[:31], index=False)
output.seek(0)
st.success("Merge complete!")
st.write(f"Output ready: **{output_name}**")
if show_logs:
st.subheader("Merge Log")
for line in log:
st.write("- ", line)
# show previews and download
st.subheader("Preview of merged sheets")
for name, df in product_dfs.items():
with st.expander(f"Sheet: {name} ({len(df)} rows)"):
st.dataframe(df.head(200))
st.download_button(
label="Download merged workbook",
data=output.getvalue(),
file_name=output_name if output_name.endswith('.xlsx') else output_name + '.xlsx',
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
except Exception as e:
st.exception(e)
|