joycecast commited on
Commit
1ce5884
·
verified ·
1 Parent(s): 7aaf241

Upload 2 files

Browse files
Files changed (2) hide show
  1. app (2).py +177 -0
  2. requirements.txt +4 -0
app (2).py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import gradio as gr
3
+ import msoffcrypto
4
+ import io
5
+ import re
6
+ import tempfile
7
+
8
+ # Define transformation function
9
+
10
+ def convert_ci(by_line_item_file, combined_file, password):
11
+ # Decrypt the protected "by line item" file
12
+ decrypted = io.BytesIO()
13
+ with open(by_line_item_file.name, "rb") as f:
14
+ office_file = msoffcrypto.OfficeFile(f)
15
+ office_file.load_key(password=password)
16
+ office_file.decrypt(decrypted)
17
+
18
+ # Load "by line item" data and skip last 3 rows
19
+ by_line_item_df_raw = pd.read_excel(decrypted, skiprows=10, header=None)
20
+ by_line_item_df_raw = by_line_item_df_raw.iloc[:-3] # drop last 3 rows
21
+ by_line_item_df = by_line_item_df_raw.iloc[:, [1, 4, 5, 6, 7, 8, 9, 10, 11, 23]].copy()
22
+
23
+ # Drop fully empty rows
24
+ by_line_item_df = by_line_item_df.dropna(how='all')
25
+
26
+ # Clean up description for matching
27
+ def clean_desc(row):
28
+ desc_parts = [str(val).strip() for val in row.iloc[2:9] if pd.notna(val)]
29
+ description = " ".join(desc_parts)
30
+ description = re.sub(r"%", "", description)
31
+ description = re.sub(r"\d+", "", description)
32
+ return re.sub(r"[^\w\s]", "", description)
33
+
34
+ # by_line_item_df["Part"] = by_line_item_df.iloc[:, 0].astype(str)
35
+ by_line_item_df["Part"] = ""
36
+ by_line_item_df["Tariff_Number"] = by_line_item_df.iloc[:, 1].apply(lambda x: str(x).replace('.0', '') if isinstance(x, float) and str(x).endswith('.0') else str(x))
37
+ by_line_item_df["Commercial_Description"] = by_line_item_df.apply(clean_desc, axis=1)
38
+ by_line_item_df["MID_Code"] = by_line_item_df.iloc[:, 9].astype(str)
39
+ by_line_item_df = by_line_item_df[["Part", "Tariff_Number", "Commercial_Description", "MID_Code"]]
40
+
41
+ # Load the combined file and extract required fields
42
+ invoice = pd.read_excel(combined_file.name, header=None)
43
+ merged_value = invoice.iloc[8, 11] # L9 = 8th row, 11th col (0-indexed)
44
+ mawb = invoice.iloc[8, 20] # U9 = 8th row, 20th col (0-indexed)
45
+ invoice_no = str(merged_value) if pd.notna(merged_value) else ""
46
+
47
+ combined_df_raw = pd.read_excel(combined_file.name, skiprows=10, header=None)
48
+ combined_df_raw = combined_df_raw.iloc[:-3] # drop last 3 rows
49
+ combined_df = combined_df_raw.iloc[:, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12]].copy()
50
+ combined_df.columns = [
51
+ "Tariff_Number", "Country_of_Origin", "Quantity", "Gross_Weight_KG",
52
+ "Total_Line_Value", "Manufacturer_Name", "manufacturer_address",
53
+ "Manufacturer_City", "Manufacturer_Country", "Manufacturer_Zip", "MID_Code"
54
+ ]
55
+
56
+ # Clean Tariff_Number and MID_Code for consistent merging
57
+ combined_df["Tariff_Number"] = combined_df["Tariff_Number"].apply(lambda x: str(x).replace('.0', '') if isinstance(x, float) and str(x).endswith('.0') else str(x))
58
+ combined_df["MID_Code"] = combined_df["MID_Code"].astype(str)
59
+
60
+ # Format manufacturer address components
61
+ def extract_address_parts(row):
62
+ address = str(row["manufacturer_address"])
63
+ city = str(row["Manufacturer_City"])
64
+ address_1 = address
65
+ state = ""
66
+
67
+ if address.endswith("China"):
68
+ parts = address.split(",")
69
+ if len(parts) >= 2:
70
+ state = parts[-2].replace(" Province", "").strip()
71
+ elif "Singapore" in address:
72
+ state = ""
73
+
74
+ if city and city in address:
75
+ last_occurrence_index = address.rfind(city)
76
+ if last_occurrence_index != -1:
77
+ address_1 = address[:last_occurrence_index].strip().rstrip(",")
78
+
79
+ return pd.Series([state, address_1])
80
+
81
+ combined_df["Manufacturer_Country"] = combined_df["MID_Code"].str[:2]
82
+ combined_df[["Manufacturer_State", "Manufacturer_Address_1"]] = combined_df.apply(extract_address_parts, axis=1)
83
+
84
+ # Add calculated fields
85
+ combined_df["Invoice_No"] = invoice_no
86
+ combined_df["Country_of_Export"] = "CN"
87
+ combined_df["Quantity_UOM"] = "pcs"
88
+ combined_df["Buyer_Name"] = "SHEIN DISTRIBUTION CORPORATION"
89
+ combined_df["Buyer_Address_1"] = "777 S. Alameda St"
90
+ combined_df["Buyer_Address_2"] = "Suite 400"
91
+ combined_df["Buyer_City"] = "Log Angeles"
92
+ combined_df["Buyer_State"] = "CA"
93
+ combined_df["Buyer_Zip"] = "90021"
94
+ combined_df["Buyer_Country"] = "US"
95
+ combined_df["Buyer_ID_Number"] = ""
96
+ combined_df["Consignee_Name"] = "SHEIN DISTRIBUTION CORPORATION"
97
+ combined_df["Consignee_Address_1"] = "777 S. Alameda St Suite"
98
+ combined_df["Consignee_Address_2"] = "Suite 400"
99
+ combined_df["Consignee_City"] = "Log Angeles"
100
+ combined_df["Consignee_State"] = "CA"
101
+ combined_df["Consignee_Zip"] = "90021"
102
+ combined_df["Consignee_Country"] = "US"
103
+ combined_df["Consignee_ID_Number"] = ""
104
+ combined_df["Unit_Price"] = (combined_df["Total_Line_Value"] / combined_df["Quantity"]).round(2)
105
+ combined_df["Net_Weight_KG"] = (combined_df["Gross_Weight_KG"] / combined_df["Quantity"]).round(2)
106
+ combined_df["Gross_Weight_KG"] = combined_df["Gross_Weight_KG"].round(3)
107
+ combined_df["Total_Line_Value"] = combined_df["Total_Line_Value"].round(2)
108
+
109
+ # Drop duplicates in by_line_item_df for merge
110
+ by_line_item_unique = by_line_item_df.drop_duplicates(subset=["Tariff_Number", "MID_Code"])
111
+
112
+ # Add empty columns
113
+ empty_cols = [
114
+ 'SICountry', 'SP1', 'SP2', 'Zone_Status',
115
+ 'Privileged_Filing_Date', 'Line_Piece_Count', 'ADD_Case_Number',
116
+ 'CVD_Case_Number', 'AD_Non_Reimbursement_Statement',
117
+ 'AD-CVD_Certification_Designation'
118
+ ]
119
+ for col in empty_cols:
120
+ combined_df[col] = ""
121
+
122
+ # Merge combined with by_line_item (first match by key)
123
+ merged_df = combined_df.merge(by_line_item_unique, on=["Tariff_Number", "MID_Code"], how="left")
124
+
125
+ column_order = [
126
+ 'Invoice_No', 'Part', 'Commercial_Description', 'Country_of_Origin',
127
+ 'Country_of_Export', 'Tariff_Number', 'Quantity', 'Quantity_UOM',
128
+ 'Unit_Price', 'Total_Line_Value', 'Net_Weight_KG', 'Gross_Weight_KG',
129
+ 'Manufacturer_Name', 'Manufacturer_Address_1', 'Manufacturer_Address_2',
130
+ 'Manufacturer_City', 'Manufacturer_State', 'Manufacturer_Zip',
131
+ 'Manufacturer_Country', 'MID_Code', 'Buyer_Name', 'Buyer_Address_1',
132
+ 'Buyer_Address_2', 'Buyer_City', 'Buyer_State', 'Buyer_Zip',
133
+ 'Buyer_Country', 'Buyer_ID_Number', 'Consignee_Name',
134
+ 'Consignee_Address_1', 'Consignee_Address_2', 'Consignee_City',
135
+ 'Consignee_State', 'Consignee_Zip', 'Consignee_Country',
136
+ 'Consignee_ID_Number', 'SICountry', 'SP1', 'SP2', 'Zone_Status',
137
+ 'Privileged_Filing_Date', 'Line_Piece_Count', 'ADD_Case_Number',
138
+ 'CVD_Case_Number', 'AD_Non_Reimbursement_Statement',
139
+ 'AD-CVD_Certification_Designation'
140
+ ]
141
+ for col in column_order:
142
+ if col not in merged_df.columns:
143
+ merged_df[col] = ""
144
+ merged_df = merged_df[column_order]
145
+
146
+ # Save merged output partitioned into 998-row chunks
147
+ max_rows = 998
148
+ outputs = []
149
+
150
+ for i, start in enumerate(range(0, len(merged_df), max_rows)):
151
+ chunk = merged_df.iloc[start : start + max_rows]
152
+ suffix = chr(65 + i) if i < 26 else f"part{i + 1}" # A, B, C... then part27+
153
+ filename = f"{mawb} T01 Manifest - {suffix}.xlsx"
154
+ with pd.ExcelWriter(filename, engine='openpyxl') as writer:
155
+ chunk.to_excel(writer, sheet_name="Converted", index=False)
156
+ for column_cells in writer.sheets['Converted'].columns:
157
+ max_length = max(len(str(cell.value)) if cell.value is not None else 0 for cell in column_cells)
158
+ writer.sheets['Converted'].column_dimensions[column_cells[0].column_letter].width = max_length + 2
159
+ outputs.append(filename)
160
+
161
+ return outputs
162
+
163
+ # Gradio Interface
164
+ iface = gr.Interface(
165
+ fn=convert_ci,
166
+ inputs=[
167
+ gr.File(label="Upload 'By Line Item' File (.xlsx)", type="filepath"),
168
+ gr.File(label="Upload 'Combined' File (.xlsx)", type="filepath"),
169
+ gr.Textbox(label="Password for 'By Line Item' File", type="password")
170
+ ],
171
+ outputs=gr.File(label="Download Partitioned Files", file_types=[".xlsx"], file_count="multiple"),
172
+ title="SHEIN CI to Magaya Format Conversion Tool",
173
+ description="Upload both 'By Line Item' (with password) and 'Combined' Excel files to merge and generate the Manifest in Magaya T01 Format."
174
+ )
175
+
176
+ if __name__ == "__main__":
177
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ pandas
3
+ msoffcrypto-tool
4
+ openpyxl