Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app (2).py +177 -0
- requirements.txt +4 -0
app (2).py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import gradio as gr
|
| 3 |
+
import msoffcrypto
|
| 4 |
+
import io
|
| 5 |
+
import re
|
| 6 |
+
import tempfile
|
| 7 |
+
|
| 8 |
+
# Define transformation function
|
| 9 |
+
|
| 10 |
+
def convert_ci(by_line_item_file, combined_file, password):
|
| 11 |
+
# Decrypt the protected "by line item" file
|
| 12 |
+
decrypted = io.BytesIO()
|
| 13 |
+
with open(by_line_item_file.name, "rb") as f:
|
| 14 |
+
office_file = msoffcrypto.OfficeFile(f)
|
| 15 |
+
office_file.load_key(password=password)
|
| 16 |
+
office_file.decrypt(decrypted)
|
| 17 |
+
|
| 18 |
+
# Load "by line item" data and skip last 3 rows
|
| 19 |
+
by_line_item_df_raw = pd.read_excel(decrypted, skiprows=10, header=None)
|
| 20 |
+
by_line_item_df_raw = by_line_item_df_raw.iloc[:-3] # drop last 3 rows
|
| 21 |
+
by_line_item_df = by_line_item_df_raw.iloc[:, [1, 4, 5, 6, 7, 8, 9, 10, 11, 23]].copy()
|
| 22 |
+
|
| 23 |
+
# Drop fully empty rows
|
| 24 |
+
by_line_item_df = by_line_item_df.dropna(how='all')
|
| 25 |
+
|
| 26 |
+
# Clean up description for matching
|
| 27 |
+
def clean_desc(row):
|
| 28 |
+
desc_parts = [str(val).strip() for val in row.iloc[2:9] if pd.notna(val)]
|
| 29 |
+
description = " ".join(desc_parts)
|
| 30 |
+
description = re.sub(r"%", "", description)
|
| 31 |
+
description = re.sub(r"\d+", "", description)
|
| 32 |
+
return re.sub(r"[^\w\s]", "", description)
|
| 33 |
+
|
| 34 |
+
# by_line_item_df["Part"] = by_line_item_df.iloc[:, 0].astype(str)
|
| 35 |
+
by_line_item_df["Part"] = ""
|
| 36 |
+
by_line_item_df["Tariff_Number"] = by_line_item_df.iloc[:, 1].apply(lambda x: str(x).replace('.0', '') if isinstance(x, float) and str(x).endswith('.0') else str(x))
|
| 37 |
+
by_line_item_df["Commercial_Description"] = by_line_item_df.apply(clean_desc, axis=1)
|
| 38 |
+
by_line_item_df["MID_Code"] = by_line_item_df.iloc[:, 9].astype(str)
|
| 39 |
+
by_line_item_df = by_line_item_df[["Part", "Tariff_Number", "Commercial_Description", "MID_Code"]]
|
| 40 |
+
|
| 41 |
+
# Load the combined file and extract required fields
|
| 42 |
+
invoice = pd.read_excel(combined_file.name, header=None)
|
| 43 |
+
merged_value = invoice.iloc[8, 11] # L9 = 8th row, 11th col (0-indexed)
|
| 44 |
+
mawb = invoice.iloc[8, 20] # U9 = 8th row, 20th col (0-indexed)
|
| 45 |
+
invoice_no = str(merged_value) if pd.notna(merged_value) else ""
|
| 46 |
+
|
| 47 |
+
combined_df_raw = pd.read_excel(combined_file.name, skiprows=10, header=None)
|
| 48 |
+
combined_df_raw = combined_df_raw.iloc[:-3] # drop last 3 rows
|
| 49 |
+
combined_df = combined_df_raw.iloc[:, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12]].copy()
|
| 50 |
+
combined_df.columns = [
|
| 51 |
+
"Tariff_Number", "Country_of_Origin", "Quantity", "Gross_Weight_KG",
|
| 52 |
+
"Total_Line_Value", "Manufacturer_Name", "manufacturer_address",
|
| 53 |
+
"Manufacturer_City", "Manufacturer_Country", "Manufacturer_Zip", "MID_Code"
|
| 54 |
+
]
|
| 55 |
+
|
| 56 |
+
# Clean Tariff_Number and MID_Code for consistent merging
|
| 57 |
+
combined_df["Tariff_Number"] = combined_df["Tariff_Number"].apply(lambda x: str(x).replace('.0', '') if isinstance(x, float) and str(x).endswith('.0') else str(x))
|
| 58 |
+
combined_df["MID_Code"] = combined_df["MID_Code"].astype(str)
|
| 59 |
+
|
| 60 |
+
# Format manufacturer address components
|
| 61 |
+
def extract_address_parts(row):
|
| 62 |
+
address = str(row["manufacturer_address"])
|
| 63 |
+
city = str(row["Manufacturer_City"])
|
| 64 |
+
address_1 = address
|
| 65 |
+
state = ""
|
| 66 |
+
|
| 67 |
+
if address.endswith("China"):
|
| 68 |
+
parts = address.split(",")
|
| 69 |
+
if len(parts) >= 2:
|
| 70 |
+
state = parts[-2].replace(" Province", "").strip()
|
| 71 |
+
elif "Singapore" in address:
|
| 72 |
+
state = ""
|
| 73 |
+
|
| 74 |
+
if city and city in address:
|
| 75 |
+
last_occurrence_index = address.rfind(city)
|
| 76 |
+
if last_occurrence_index != -1:
|
| 77 |
+
address_1 = address[:last_occurrence_index].strip().rstrip(",")
|
| 78 |
+
|
| 79 |
+
return pd.Series([state, address_1])
|
| 80 |
+
|
| 81 |
+
combined_df["Manufacturer_Country"] = combined_df["MID_Code"].str[:2]
|
| 82 |
+
combined_df[["Manufacturer_State", "Manufacturer_Address_1"]] = combined_df.apply(extract_address_parts, axis=1)
|
| 83 |
+
|
| 84 |
+
# Add calculated fields
|
| 85 |
+
combined_df["Invoice_No"] = invoice_no
|
| 86 |
+
combined_df["Country_of_Export"] = "CN"
|
| 87 |
+
combined_df["Quantity_UOM"] = "pcs"
|
| 88 |
+
combined_df["Buyer_Name"] = "SHEIN DISTRIBUTION CORPORATION"
|
| 89 |
+
combined_df["Buyer_Address_1"] = "777 S. Alameda St"
|
| 90 |
+
combined_df["Buyer_Address_2"] = "Suite 400"
|
| 91 |
+
combined_df["Buyer_City"] = "Log Angeles"
|
| 92 |
+
combined_df["Buyer_State"] = "CA"
|
| 93 |
+
combined_df["Buyer_Zip"] = "90021"
|
| 94 |
+
combined_df["Buyer_Country"] = "US"
|
| 95 |
+
combined_df["Buyer_ID_Number"] = ""
|
| 96 |
+
combined_df["Consignee_Name"] = "SHEIN DISTRIBUTION CORPORATION"
|
| 97 |
+
combined_df["Consignee_Address_1"] = "777 S. Alameda St Suite"
|
| 98 |
+
combined_df["Consignee_Address_2"] = "Suite 400"
|
| 99 |
+
combined_df["Consignee_City"] = "Log Angeles"
|
| 100 |
+
combined_df["Consignee_State"] = "CA"
|
| 101 |
+
combined_df["Consignee_Zip"] = "90021"
|
| 102 |
+
combined_df["Consignee_Country"] = "US"
|
| 103 |
+
combined_df["Consignee_ID_Number"] = ""
|
| 104 |
+
combined_df["Unit_Price"] = (combined_df["Total_Line_Value"] / combined_df["Quantity"]).round(2)
|
| 105 |
+
combined_df["Net_Weight_KG"] = (combined_df["Gross_Weight_KG"] / combined_df["Quantity"]).round(2)
|
| 106 |
+
combined_df["Gross_Weight_KG"] = combined_df["Gross_Weight_KG"].round(3)
|
| 107 |
+
combined_df["Total_Line_Value"] = combined_df["Total_Line_Value"].round(2)
|
| 108 |
+
|
| 109 |
+
# Drop duplicates in by_line_item_df for merge
|
| 110 |
+
by_line_item_unique = by_line_item_df.drop_duplicates(subset=["Tariff_Number", "MID_Code"])
|
| 111 |
+
|
| 112 |
+
# Add empty columns
|
| 113 |
+
empty_cols = [
|
| 114 |
+
'SICountry', 'SP1', 'SP2', 'Zone_Status',
|
| 115 |
+
'Privileged_Filing_Date', 'Line_Piece_Count', 'ADD_Case_Number',
|
| 116 |
+
'CVD_Case_Number', 'AD_Non_Reimbursement_Statement',
|
| 117 |
+
'AD-CVD_Certification_Designation'
|
| 118 |
+
]
|
| 119 |
+
for col in empty_cols:
|
| 120 |
+
combined_df[col] = ""
|
| 121 |
+
|
| 122 |
+
# Merge combined with by_line_item (first match by key)
|
| 123 |
+
merged_df = combined_df.merge(by_line_item_unique, on=["Tariff_Number", "MID_Code"], how="left")
|
| 124 |
+
|
| 125 |
+
column_order = [
|
| 126 |
+
'Invoice_No', 'Part', 'Commercial_Description', 'Country_of_Origin',
|
| 127 |
+
'Country_of_Export', 'Tariff_Number', 'Quantity', 'Quantity_UOM',
|
| 128 |
+
'Unit_Price', 'Total_Line_Value', 'Net_Weight_KG', 'Gross_Weight_KG',
|
| 129 |
+
'Manufacturer_Name', 'Manufacturer_Address_1', 'Manufacturer_Address_2',
|
| 130 |
+
'Manufacturer_City', 'Manufacturer_State', 'Manufacturer_Zip',
|
| 131 |
+
'Manufacturer_Country', 'MID_Code', 'Buyer_Name', 'Buyer_Address_1',
|
| 132 |
+
'Buyer_Address_2', 'Buyer_City', 'Buyer_State', 'Buyer_Zip',
|
| 133 |
+
'Buyer_Country', 'Buyer_ID_Number', 'Consignee_Name',
|
| 134 |
+
'Consignee_Address_1', 'Consignee_Address_2', 'Consignee_City',
|
| 135 |
+
'Consignee_State', 'Consignee_Zip', 'Consignee_Country',
|
| 136 |
+
'Consignee_ID_Number', 'SICountry', 'SP1', 'SP2', 'Zone_Status',
|
| 137 |
+
'Privileged_Filing_Date', 'Line_Piece_Count', 'ADD_Case_Number',
|
| 138 |
+
'CVD_Case_Number', 'AD_Non_Reimbursement_Statement',
|
| 139 |
+
'AD-CVD_Certification_Designation'
|
| 140 |
+
]
|
| 141 |
+
for col in column_order:
|
| 142 |
+
if col not in merged_df.columns:
|
| 143 |
+
merged_df[col] = ""
|
| 144 |
+
merged_df = merged_df[column_order]
|
| 145 |
+
|
| 146 |
+
# Save merged output partitioned into 998-row chunks
|
| 147 |
+
max_rows = 998
|
| 148 |
+
outputs = []
|
| 149 |
+
|
| 150 |
+
for i, start in enumerate(range(0, len(merged_df), max_rows)):
|
| 151 |
+
chunk = merged_df.iloc[start : start + max_rows]
|
| 152 |
+
suffix = chr(65 + i) if i < 26 else f"part{i + 1}" # A, B, C... then part27+
|
| 153 |
+
filename = f"{mawb} T01 Manifest - {suffix}.xlsx"
|
| 154 |
+
with pd.ExcelWriter(filename, engine='openpyxl') as writer:
|
| 155 |
+
chunk.to_excel(writer, sheet_name="Converted", index=False)
|
| 156 |
+
for column_cells in writer.sheets['Converted'].columns:
|
| 157 |
+
max_length = max(len(str(cell.value)) if cell.value is not None else 0 for cell in column_cells)
|
| 158 |
+
writer.sheets['Converted'].column_dimensions[column_cells[0].column_letter].width = max_length + 2
|
| 159 |
+
outputs.append(filename)
|
| 160 |
+
|
| 161 |
+
return outputs
|
| 162 |
+
|
| 163 |
+
# Gradio Interface
|
| 164 |
+
iface = gr.Interface(
|
| 165 |
+
fn=convert_ci,
|
| 166 |
+
inputs=[
|
| 167 |
+
gr.File(label="Upload 'By Line Item' File (.xlsx)", type="filepath"),
|
| 168 |
+
gr.File(label="Upload 'Combined' File (.xlsx)", type="filepath"),
|
| 169 |
+
gr.Textbox(label="Password for 'By Line Item' File", type="password")
|
| 170 |
+
],
|
| 171 |
+
outputs=gr.File(label="Download Partitioned Files", file_types=[".xlsx"], file_count="multiple"),
|
| 172 |
+
title="SHEIN CI to Magaya Format Conversion Tool",
|
| 173 |
+
description="Upload both 'By Line Item' (with password) and 'Combined' Excel files to merge and generate the Manifest in Magaya T01 Format."
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
if __name__ == "__main__":
|
| 177 |
+
iface.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
pandas
|
| 3 |
+
msoffcrypto-tool
|
| 4 |
+
openpyxl
|