txhno commited on
Commit
f5fc7e8
·
verified ·
1 Parent(s): 61e82b3

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +186 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,188 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
1
  import streamlit as st
2
+ from pathlib import Path
3
+ import pandas as pd
4
+ import re
5
+ import json
6
+ import warnings
7
+ from io import BytesIO
8
+
9
+ warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")
10
+
11
+ # --- helper functions (kept from your script) ---
12
+ STYLE_PATTERNS = [re.compile(p, re.I) for p in [
13
+ r"^style[_\s\-]?id$", r"styleid", r"style[_\s\-]?code", r"^sku$"
14
+ ]]
15
+
16
+ def find_styleid_column(columns):
17
+ for col in columns:
18
+ s = str(col)
19
+ for p in STYLE_PATTERNS:
20
+ if p.search(s):
21
+ return col
22
+ for col in columns:
23
+ low = str(col).lower()
24
+ if 'style' in low and 'id' in low:
25
+ return col
26
+ return None
27
+
28
+
29
+ def find_brand_size_start(columns):
30
+ for col in columns:
31
+ s = str(col).lower()
32
+ if "brand" in s and "size" in s:
33
+ return col
34
+ for col in columns:
35
+ if "size" in str(col).lower():
36
+ return col
37
+ return None
38
+
39
+
40
+ def unique_preserve_order(seq):
41
+ seen = set()
42
+ out = []
43
+ for x in seq:
44
+ if x not in seen:
45
+ seen.add(x)
46
+ out.append(x)
47
+ return out
48
+
49
+
50
+ # --- Streamlit UI ---
51
+ st.set_page_config(page_title="Size Chart Merger", layout="wide")
52
+ st.title("Size Chart \u2194 Product Details Merger")
53
+ st.write("Upload a Size Chart workbook and a Product Details workbook (matching sheet names). This app merges size columns into product details.")
54
+
55
+ col1, col2 = st.columns(2)
56
+ with col1:
57
+ size_file = st.file_uploader("Upload Size Chart Excel", type=["xlsx", "xlsm"], key="size")
58
+ with col2:
59
+ prod_file = st.file_uploader("Upload Product Details Excel", type=["xlsx", "xlsm"], key="prod")
60
+
61
+ output_name = st.text_input("Output filename (optional)", value="input.xlsx")
62
+ show_logs = st.checkbox("Show detailed log", value=True)
63
+
64
+ if st.button("Run merge"):
65
+ if size_file is None or prod_file is None:
66
+ st.error("Please upload both files before running the merge.")
67
+ else:
68
+ try:
69
+ size_xl = pd.ExcelFile(size_file, engine="openpyxl")
70
+ prod_xl = pd.ExcelFile(prod_file, engine="openpyxl")
71
+ size_sheets = size_xl.sheet_names
72
+ prod_sheets = prod_xl.sheet_names
73
+
74
+ product_dfs = {name: prod_xl.parse(name, dtype=str) for name in prod_sheets}
75
+ log = []
76
+
77
+ progress_bar = st.progress(0)
78
+ total = len(size_sheets)
79
+
80
+ for i, sheet_name in enumerate(size_sheets):
81
+ # update progress
82
+ progress_bar.progress(int((i / max(total, 1)) * 100))
83
+ st.write(f"Processing sheet: **{sheet_name}**")
84
+
85
+ if sheet_name not in prod_sheets:
86
+ log.append(f"Skipped '{sheet_name}': not present in Product Details.")
87
+ continue
88
+
89
+ size_df = size_xl.parse(sheet_name, dtype=str)
90
+ prod_df = product_dfs[sheet_name]
91
+
92
+ size_df.columns = [str(c) for c in size_df.columns]
93
+ prod_df.columns = [str(c) for c in prod_df.columns]
94
+
95
+ style_col_size = find_styleid_column(size_df.columns)
96
+ style_col_prod = find_styleid_column(prod_df.columns)
97
+
98
+ if style_col_size is None:
99
+ log.append(f"Sheet '{sheet_name}': could not detect style id in Size Chart.")
100
+ continue
101
+
102
+ if style_col_prod is None:
103
+ style_col_prod = style_col_size
104
+ prod_df[style_col_prod] = pd.NA
105
+ log.append(f"Sheet '{sheet_name}': Product Details missing style id; created '{style_col_prod}'.")
106
+
107
+ brand_size_col = find_brand_size_start(size_df.columns)
108
+ if brand_size_col is None:
109
+ log.append(f"Sheet '{sheet_name}': no size column found. Skipping.")
110
+ continue
111
+
112
+ size_cols = list(size_df.columns)
113
+ start_idx = size_cols.index(brand_size_col)
114
+ size_columns_to_merge = size_cols[start_idx:]
115
+
116
+ if brand_size_col not in prod_df.columns:
117
+ prod_df[brand_size_col] = pd.NA
118
+ log.append(f"Sheet '{sheet_name}': created '{brand_size_col}' in Product Details.")
119
+
120
+ for col in size_columns_to_merge:
121
+ if col not in prod_df.columns:
122
+ prod_df[col] = pd.NA
123
+ log.append(f"Sheet '{sheet_name}': inserted missing column '{col}'.")
124
+
125
+ long = size_df.melt(id_vars=[style_col_size], value_vars=size_columns_to_merge,
126
+ var_name="col_name", value_name="value")
127
+
128
+ long["value"] = long["value"].astype(str).str.strip()
129
+ invalid = long["value"].isin(["", "nan", "none", "na"])
130
+ long = long[~invalid]
131
+
132
+ if long.empty:
133
+ log.append(f"Sheet '{sheet_name}': no valid size entries to merge.")
134
+ continue
135
+
136
+ grouped = (
137
+ long.groupby([style_col_size, "col_name"])['value']
138
+ .apply(lambda s: json.dumps(unique_preserve_order(list(s)), ensure_ascii=False))
139
+ .reset_index()
140
+ )
141
+
142
+ pivot = grouped.pivot(index=style_col_size, columns="col_name", values="value")
143
+ pivot.reset_index(inplace=True)
144
+ pivot.rename(columns={style_col_size: style_col_prod}, inplace=True)
145
+
146
+ merged = prod_df.merge(pivot, on=style_col_prod, how="outer", suffixes=("", "_new"))
147
+
148
+ for col in size_columns_to_merge:
149
+ newcol = col + "_new"
150
+ if newcol in merged.columns:
151
+ merged[col] = merged[newcol].combine_first(merged[col])
152
+ merged.drop(columns=newcol, inplace=True)
153
+
154
+ product_dfs[sheet_name] = merged
155
+ log.append(f"Sheet '{sheet_name}': merged {pivot.shape[0]} style ids.")
156
+
157
+ progress_bar.progress(100)
158
+
159
+ # write result to an in-memory Excel file
160
+ output = BytesIO()
161
+ with pd.ExcelWriter(output, engine="xlsxwriter") as writer:
162
+ for name, df in product_dfs.items():
163
+ df.to_excel(writer, sheet_name=name[:31], index=False)
164
+ output.seek(0)
165
+
166
+ st.success("Merge complete!")
167
+ st.write(f"Output ready: **{output_name}**")
168
+
169
+ if show_logs:
170
+ st.subheader("Merge Log")
171
+ for line in log:
172
+ st.write("- ", line)
173
+
174
+ # show previews and download
175
+ st.subheader("Preview of merged sheets")
176
+ for name, df in product_dfs.items():
177
+ with st.expander(f"Sheet: {name} ({len(df)} rows)"):
178
+ st.dataframe(df.head(200))
179
+
180
+ st.download_button(
181
+ label="Download merged workbook",
182
+ data=output.getvalue(),
183
+ file_name=output_name if output_name.endswith('.xlsx') else output_name + '.xlsx',
184
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
185
+ )
186
 
187
+ except Exception as e:
188
+ st.exception(e)