File size: 7,216 Bytes
dd63fcd
f5fc7e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd63fcd
f5fc7e8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import streamlit as st
from pathlib import Path
import pandas as pd
import re
import json
import warnings
from io import BytesIO

warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")

# --- helper functions (kept from your script) ---
STYLE_PATTERNS = [re.compile(p, re.I) for p in [
    r"^style[_\s\-]?id$", r"styleid", r"style[_\s\-]?code", r"^sku$"
]]

def find_styleid_column(columns):
    for col in columns:
        s = str(col)
        for p in STYLE_PATTERNS:
            if p.search(s):
                return col
    for col in columns:
        low = str(col).lower()
        if 'style' in low and 'id' in low:
            return col
    return None


def find_brand_size_start(columns):
    for col in columns:
        s = str(col).lower()
        if "brand" in s and "size" in s:
            return col
    for col in columns:
        if "size" in str(col).lower():
            return col
    return None


def unique_preserve_order(seq):
    seen = set()
    out = []
    for x in seq:
        if x not in seen:
            seen.add(x)
            out.append(x)
    return out


# --- Streamlit UI ---
st.set_page_config(page_title="Size Chart Merger", layout="wide")
st.title("Size Chart \u2194 Product Details Merger")
st.write("Upload a Size Chart workbook and a Product Details workbook (matching sheet names). This app merges size columns into product details.")

col1, col2 = st.columns(2)
with col1:
    size_file = st.file_uploader("Upload Size Chart Excel", type=["xlsx", "xlsm"], key="size")
with col2:
    prod_file = st.file_uploader("Upload Product Details Excel", type=["xlsx", "xlsm"], key="prod")

output_name = st.text_input("Output filename (optional)", value="input.xlsx")
show_logs = st.checkbox("Show detailed log", value=True)

if st.button("Run merge"):
    if size_file is None or prod_file is None:
        st.error("Please upload both files before running the merge.")
    else:
        try:
            size_xl = pd.ExcelFile(size_file, engine="openpyxl")
            prod_xl = pd.ExcelFile(prod_file, engine="openpyxl")
            size_sheets = size_xl.sheet_names
            prod_sheets = prod_xl.sheet_names

            product_dfs = {name: prod_xl.parse(name, dtype=str) for name in prod_sheets}
            log = []

            progress_bar = st.progress(0)
            total = len(size_sheets)

            for i, sheet_name in enumerate(size_sheets):
                # update progress
                progress_bar.progress(int((i / max(total, 1)) * 100))
                st.write(f"Processing sheet: **{sheet_name}**")

                if sheet_name not in prod_sheets:
                    log.append(f"Skipped '{sheet_name}': not present in Product Details.")
                    continue

                size_df = size_xl.parse(sheet_name, dtype=str)
                prod_df = product_dfs[sheet_name]

                size_df.columns = [str(c) for c in size_df.columns]
                prod_df.columns = [str(c) for c in prod_df.columns]

                style_col_size = find_styleid_column(size_df.columns)
                style_col_prod = find_styleid_column(prod_df.columns)

                if style_col_size is None:
                    log.append(f"Sheet '{sheet_name}': could not detect style id in Size Chart.")
                    continue

                if style_col_prod is None:
                    style_col_prod = style_col_size
                    prod_df[style_col_prod] = pd.NA
                    log.append(f"Sheet '{sheet_name}': Product Details missing style id; created '{style_col_prod}'.")

                brand_size_col = find_brand_size_start(size_df.columns)
                if brand_size_col is None:
                    log.append(f"Sheet '{sheet_name}': no size column found. Skipping.")
                    continue

                size_cols = list(size_df.columns)
                start_idx = size_cols.index(brand_size_col)
                size_columns_to_merge = size_cols[start_idx:]

                if brand_size_col not in prod_df.columns:
                    prod_df[brand_size_col] = pd.NA
                    log.append(f"Sheet '{sheet_name}': created '{brand_size_col}' in Product Details.")

                for col in size_columns_to_merge:
                    if col not in prod_df.columns:
                        prod_df[col] = pd.NA
                        log.append(f"Sheet '{sheet_name}': inserted missing column '{col}'.")

                long = size_df.melt(id_vars=[style_col_size], value_vars=size_columns_to_merge,
                                    var_name="col_name", value_name="value")

                long["value"] = long["value"].astype(str).str.strip()
                invalid = long["value"].isin(["", "nan", "none", "na"])
                long = long[~invalid]

                if long.empty:
                    log.append(f"Sheet '{sheet_name}': no valid size entries to merge.")
                    continue

                grouped = (
                    long.groupby([style_col_size, "col_name"])['value']
                    .apply(lambda s: json.dumps(unique_preserve_order(list(s)), ensure_ascii=False))
                    .reset_index()
                )

                pivot = grouped.pivot(index=style_col_size, columns="col_name", values="value")
                pivot.reset_index(inplace=True)
                pivot.rename(columns={style_col_size: style_col_prod}, inplace=True)

                merged = prod_df.merge(pivot, on=style_col_prod, how="outer", suffixes=("", "_new"))

                for col in size_columns_to_merge:
                    newcol = col + "_new"
                    if newcol in merged.columns:
                        merged[col] = merged[newcol].combine_first(merged[col])
                        merged.drop(columns=newcol, inplace=True)

                product_dfs[sheet_name] = merged
                log.append(f"Sheet '{sheet_name}': merged {pivot.shape[0]} style ids.")

            progress_bar.progress(100)

            # write result to an in-memory Excel file
            output = BytesIO()
            with pd.ExcelWriter(output, engine="xlsxwriter") as writer:
                for name, df in product_dfs.items():
                    df.to_excel(writer, sheet_name=name[:31], index=False)
            output.seek(0)

            st.success("Merge complete!")
            st.write(f"Output ready: **{output_name}**")

            if show_logs:
                st.subheader("Merge Log")
                for line in log:
                    st.write("- ", line)

            # show previews and download
            st.subheader("Preview of merged sheets")
            for name, df in product_dfs.items():
                with st.expander(f"Sheet: {name} ({len(df)} rows)"):
                    st.dataframe(df.head(200))

            st.download_button(
                label="Download merged workbook",
                data=output.getvalue(),
                file_name=output_name if output_name.endswith('.xlsx') else output_name + '.xlsx',
                mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
            )

        except Exception as e:
            st.exception(e)