joycecast commited on
Commit
1ebc79d
·
verified ·
1 Parent(s): 8d715d5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -0
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import re
4
+ import unicodedata
5
+ import io
6
+
7
+ # ---------- Helper Functions ----------
8
+
9
+ def clean_text(s: str) -> str:
10
+ """Remove unwanted characters and normalize."""
11
+ if pd.isna(s):
12
+ return ""
13
+ s = str(s).replace("ÿ", "")
14
+ s = unicodedata.normalize("NFKD", s)
15
+ s = "".join(ch for ch in s if 32 <= ord(ch) <= 126)
16
+ return s.strip()
17
+
18
+ def format_zip(zip_code):
19
+ """Pad ZIP codes to 5 digits."""
20
+ if pd.isna(zip_code):
21
+ return ""
22
+ z = str(zip_code).strip()
23
+ z = re.sub(r"[^\d]", "", z)
24
+ if not z:
25
+ return ""
26
+ return z.zfill(5)[:5]
27
+
28
+ def flow_address_lines(lines, maxlen=35, maxlines=3):
29
+ """Split long address lines into multiple lines."""
30
+ tokens = []
31
+ for ln in lines:
32
+ txt = clean_text(ln)
33
+ if txt:
34
+ tokens.extend(txt.split())
35
+ out = ["", "", ""]
36
+ line_i = 0
37
+ for tok in tokens:
38
+ while len(tok) > maxlen:
39
+ chunk, tok = tok[:maxlen], tok[maxlen:]
40
+ if line_i >= maxlines:
41
+ return out
42
+ if out[line_i]:
43
+ line_i += 1
44
+ if line_i >= maxlines:
45
+ return out
46
+ out[line_i] = chunk
47
+ line_i += 1
48
+ if line_i >= maxlines:
49
+ return out
50
+ if line_i >= maxlines:
51
+ return out
52
+ add_len = len(tok) if not out[line_i] else len(tok) + 1
53
+ if len(out[line_i]) + add_len <= maxlen:
54
+ out[line_i] = (out[line_i] + (" " if out[line_i] else "") + tok).strip()
55
+ else:
56
+ line_i += 1
57
+ if line_i >= maxlines:
58
+ return out
59
+ out[line_i] = tok
60
+ return [ln[:maxlen] for ln in out]
61
+
62
+ def convert_dry_ice_kg(x):
63
+ """Convert lbs -> kg and round."""
64
+ if pd.isna(x) or str(x).strip() == "":
65
+ return ""
66
+ try:
67
+ kg = round(float(str(x).strip()) / 2.2)
68
+ return str(int(kg))
69
+ except:
70
+ return ""
71
+
72
+ # ---------- Main Cleaning Function ----------
73
+
74
+ def clean_csv(file):
75
+ try:
76
+ df = pd.read_csv(file.name, encoding="latin1")
77
+ except Exception:
78
+ df = pd.read_csv(file.name, encoding="utf-8-sig")
79
+
80
+ df.columns = df.columns.str.strip()
81
+
82
+ # --- Cleaning operations ---
83
+ if "ZipCode" in df.columns:
84
+ df["ZipCode"] = df["ZipCode"].map(format_zip)
85
+
86
+ # Address split logic
87
+ addr1, addr2, addr3 = [], [], []
88
+ for _, row in df.iterrows():
89
+ a1, a2, a3 = flow_address_lines([
90
+ row.get("Address1", ""), row.get("Address2", ""), row.get("Address3", "")
91
+ ])
92
+ addr1.append(a1)
93
+ addr2.append(a2)
94
+ addr3.append(a3)
95
+ df["Address1"] = addr1
96
+ df["Address2"] = addr2
97
+ df["Address3"] = addr3
98
+
99
+ # Clean text fields
100
+ text_cols = ["Company Name", "Contact Name", "City", "State", "Phone Number", "Email"]
101
+ for col in text_cols:
102
+ if col in df.columns:
103
+ df[col] = df[col].map(clean_text)
104
+
105
+ # Dry Ice conversion
106
+ if "Dry Ice Weight" in df.columns:
107
+ df["Dry Ice Weight (kg)"] = df["Dry Ice Weight"].map(convert_dry_ice_kg)
108
+
109
+ # Save to BytesIO for Gradio download
110
+ buffer = io.BytesIO()
111
+ df.to_csv(buffer, index=False, encoding="utf-8-sig")
112
+ buffer.seek(0)
113
+ return buffer, "cleaned_output.csv"
114
+
115
+ # ---------- Gradio UI ----------
116
+
117
+ title = "UPS Shipment CSV Cleaner"
118
+ description = """
119
+ Upload your **raw shipment CSV file** below.
120
+ This tool will:
121
+ - Remove strange characters (e.g. ÿ)
122
+ - Pad ZIP codes to 5 digits
123
+ - Split long addresses into ≤ 35 characters
124
+ - Convert Dry Ice Weight from lbs → kg
125
+ Then download the cleaned CSV ready for UPS Batch import.
126
+ """
127
+
128
+ demo = gr.Interface(
129
+ fn=clean_csv,
130
+ inputs=gr.File(label="📤 Upload CSV File"),
131
+ outputs=gr.File(label="📥 Download Cleaned CSV"),
132
+ title=title,
133
+ description=description,
134
+ allow_flagging="never"
135
+ )
136
+
137
+ if __name__ == "__main__":
138
+ demo.launch()