joycecast commited on
Commit
70170d3
·
verified ·
1 Parent(s): 2abab19

Upload 4 files

Browse files
Files changed (4) hide show
  1. HTS_list.py +205 -0
  2. app.py +873 -0
  3. hts_validator.py +731 -0
  4. requirements.txt +3 -0
HTS_list.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 99037801 for copper content; non-copper content or 99037802 goods with no copper
2
+ Copper_primary_HTS_list = [
3
+ 74061000, 74091990, 74130010,
4
+ 74062000, 74092100, 74130050,
5
+ 74071015, 74092900, 74130090,
6
+ 74071030, 74093110, 74151000,
7
+ 74071050, 74093150, 74152100,
8
+ 74072115, 74093190, 74152900,
9
+ 74072130, 74093910, 74153305,
10
+ 74072150, 74093950, 74153310,
11
+ 74072170, 74093990, 74153380,
12
+ 74072190, 74094000, 74153900,
13
+ 74072916, 74099010, 74181000,
14
+ 74072934, 74099050, 74182010,
15
+ 74072938, 74099090, 74182050,
16
+ 74072940, 74101100, 74192000,
17
+ 74072950, 74101200, 74198003,
18
+ 74081130, 74102130, 74198006,
19
+ 74081160, 74102160, 74198009,
20
+ 74081900, 74102200, 74198015,
21
+ 74082100, 74111010, 74198016,
22
+ 74082210, 74111050, 74198017,
23
+ 74082250, 74112110, 74198030,
24
+ 74082910, 74112150, 74198050,
25
+ 74082950, 74112200, 85444210,
26
+ 74091110, 74112910, 85444220,
27
+ 74091150, 74112950, 85444290,
28
+ 74091910, 74121000, 85444910,
29
+ 74091950, 74122000
30
+ ]
31
+
32
+ # Steel primary HTS List
33
+ Steel_primary_HTS_list = [
34
+ 84313100, 84314200, 84314910, 84314990, 84321000, 84329000, 85479000, 94032000,
35
+ 94059920, 94059940, 94062000, 94069001, 84181000, 84183000, 84184000, 84221100,
36
+ 84501100, 84502000, 84512100, 84512900, 85098020, 85166040, 9403999020, 4029968,
37
+ 82119120, 84291100, 85042300, 4029970, 82119125, 84291900, 85043300, 4029990,
38
+ 82119130, 84292000, 8504909634, 2106909998, 82119140, 84293000, 8504909638,
39
+ 2710193050, 82119150, 84294000, 8504909642, 2711120020, 82119180, 84295110,
40
+ 85142040, 2804290010, 82119220, 84295150, 85142060, 2804400000, 82119240,
41
+ 84295210, 85162900, 2901220000, 82119260, 84295250, 85166060, 2903421000,
42
+ 82119290, 84295910, 86011000, 29034310, 82119300, 84295950, 86012000, 29034410,
43
+ 82119410, 84312000, 86021000, 29034510, 82119450, 84313900, 86029000, 29034900,
44
+ 82119510, 84314100, 86031000, 29035110, 82119550, 84314340, 86039000, 29035990,
45
+ 82119590, 84314380, 86040000, 2903710100, 82151000, 84331100, 86050000,
46
+ 3004909244, 82152000, 84332000, 86071100, 32081000, 82159130, 84335100,
47
+ 8607190300, 32082000, 82159160, 84335900, 86071906, 32089000, 82159190,
48
+ 84339010, 86071912, 32091000, 82159901, 84339050, 86071915, 32099000,
49
+ 82159905, 8443160000, 86071990, 32139000, 82159910, 8454200010, 86072150,
50
+ 32141000, 82159915, 8454200060, 8607301010, 33030010, 82159920, 84553000,
51
+ 8607301050, 33030020, 82159922, 8455904000, 8607301090, 33030030, 82159924,
52
+ 8455908000, 86073050, 33043000, 82159926, 84571000, 86079100, 33049950,
53
+ 82159930, 84749000, 86079950, 33051000, 82159935, 84771030, 86090000, 33053000,
54
+ 82159940, 84771040, 87011001, 33059000, 82159945, 84771090, 8701210080,
55
+ 33069000, 82159950, 84779025, 8701220080, 33071010, 83021060, 8477908601,
56
+ 8701230080, 33071020, 83024130, 84798955, 8701240080, 33072000, 83024160,
57
+ 84798965, 8701290080, 33074900, 83024230, 84799045, 87013010, 33079000,
58
+ 83024960, 84799055, 87013050, 34013010, 83052000, 84799065, 87019110,
59
+ 34013050, 83071060, 84799075, 87019150, 34023190, 8401400000, 84799085,
60
+ 87019210, 34024990, 84031000, 84799095, 87019250, 34025011, 8406904000,
61
+ 8480490010, 87019310, 34025051, 84072100, 8480718045, 87019350, 34029010,
62
+ 84079010, 8480718060, 87019410, 34029030, 84079090, 8480799010, 87019450,
63
+ 34029050, 84082010, 8482105004, 87019510, 34031910, 84082090, 8482105008,
64
+ 87019550, 34031950, 84099150, 8482105012, 87021031, 34039900, 84099192,
65
+ 8482105016, 87021061, 34051000, 84099199, 8482105024, 87031010, 34052000,
66
+ 84099991, 8482105028, 87031050, 34054000, 84099992, 8482105032, 87032101,
67
+ 34059000, 84099999, 8482105036, 87051000, 35061050, 8410900000, 8482105052,
68
+ 87052000, 35069110, 84118180, 8482105056, 8705900010, 35069150, 84122100,
69
+ 8482105060, 8705900020, 35069900, 84122980, 8482105064, 87060030, 38085910,
70
+ 8412909070, 8482105068, 87084030, 38085940, 8412909075, 8482200064,
71
+ 87084060, 38086110, 84138100, 8482200067, 87089210, 38086150, 8413919055,
72
+ 8482200090, 87089250, 38086210, 8413919060, 84829905, 87089260, 38086250,
73
+ 8413919096, 84829915, 87089275, 38086910, 84143040, 84829925, 87089315,
74
+ 38086950, 84148016, 84829935, 87089330, 38089115, 84149030, 84829945,
75
+ 87089923, 38089125, 84149041, 84829965, 87089981, 38089130, 84151030,
76
+ 8483101010, 87100000, 38089150, 84151060, 8483101050, 87113000, 38089410,
77
+ 84151090, 84831050, 87115000, 38089450, 84158101, 84832040, 87116000,
78
+ 38099100, 84158201, 84832080, 87141000, 38101000, 84158300, 84833040,
79
+ 87161000, 38111900, 84159040, 84833080, 87163900, 38112100, 84159080,
80
+ 84834010, 87168010, 38140010, 8417900000, 8483405020, 87168050, 38140020,
81
+ 84182100, 84834090, 87169010, 38140050, 84182920, 84835060, 87169030,
82
+ 38200000, 84189940, 84835090, 94017100, 3824999397, 84198150, 84836040,
83
+ 94017900, 3827610000, 84212900, 84836080, 94031000, 3827620000, 8424100000,
84
+ 84839020, 94039910, 3827630000, 84248990, 84839030, 9403999010, 3827640000,
85
+ 84254200, 84839050, 9403999015, 3827650000, 84262000, 84839070, 9403999040,
86
+ 39252000, 84269900, 84839080, 9403999045, 39269010, 84271040, 85015340,
87
+ 9403999051, 76141010, 84271080, 85015360, 9403999061, 8202390040, 84272040,
88
+ 85015380, 95069100, 82034060, 84272080, 8501640110, 82055955, 84279000,
89
+ 8502310000, 82057000, 84283200, 85030035, 8207200070, 84283300, 85030045,
90
+ 8207306062, 84283900, 85030065, 8207306095, 84286000, 85030075, 82111000,
91
+ 84287000, 85030090, 82119110, 84289003, 85030095,
92
+ 73012010, 73012050, 73023000, 73072110, 73072150, 73072210, 73072250, 73072300,
93
+ 73072900, 73079110, 73079130, 73079150, 73079230, 73079290, 73079330, 73079360,
94
+ 73079390, 73079910, 73079930, 73079950, 73081000, 73082000, 73083010, 73083050,
95
+ 73084000, 73089030, 73089060, 73089070, 73089095, 73090000, 73101000, 73102100,
96
+ 73102900, 73110000, 73121005, 73121010, 73121020, 73121030, 73121050, 73121060,
97
+ 73121070, 73121080, 73121090, 73129000, 73130000, 73141210, 73141220, 73141230,
98
+ 73141260, 73141290, 73141410, 73141420, 73141430, 73141460, 73141490, 73141901,
99
+ 73142000, 73143110, 73143150, 73143900, 73144100, 73144200, 73144930, 73144960,
100
+ 73145000, 73151100, 73151200, 73151900, 73152010, 73152050, 73158100, 73158210,
101
+ 73158230, 73158250, 73158270, 73158910, 73158930, 73158950, 73159000, 73160000,
102
+ 73170010, 73170020, 73170055, 73170065, 73170075, 73181100, 73181200, 73181300,
103
+ 73181410, 73181450, 73181520, 73181540, 73181550, 73181560, 73181580, 73181600,
104
+ 73181900, 73182100, 73182200, 73182300, 73182400, 73182900, 73194020, 73194030,
105
+ 73194050, 73199010, 73199090, 73201030, 73201060, 73201090, 73202010, 73202050,
106
+ 73209010, 73209050, 73211110, 73211130, 73211160, 73211200, 73211900, 73218110,
107
+ 73218150, 73218210, 73218250, 73218900, 73219010, 73219020, 73219040, 73219050,
108
+ 73219060, 73221900, 73229000, 73231000, 73239300, 73239400, 73239910, 73239930,
109
+ 73239950, 73239970, 73239990, 73241000, 73242900, 73249000, 73259100, 73259910,
110
+ 73259950, 73261100, 73261900, 73262000, 73269010, 73269025, 73269035, 73269045,
111
+ 73269060, 73269086, 7317005501, 7317005502, 7317005508, 7317005511, 7317005518,
112
+ 7317005519, 7317005520, 7317005530, 7317005540, 7317005550, 7317005570,
113
+ 7317005590, 7317006530, 7216910010, 7302909000, 73071930, 73071990
114
+ ]
115
+
116
+ # Aluminum primary HTS List
117
+ Aluminum_primary_HTS_list = [
118
+ 6603908100, 8302103000, 8302106030, 8302106060, 8302106090,
119
+ 8302200000, 8302303010, 8302303060,
120
+ 8302413000, 8302416015, 8302416045, 8302416050, 8302416080,
121
+ 8302423010, 8302423015, 8302423065,
122
+ 8302496035, 8302496045, 8302496055, 8302496085,
123
+ 8302500000, 8302603000, 8302609000,
124
+ 8305100050, 8306300000, 8414596590,
125
+ 8415908025, 8415908045, 8415908085,
126
+ 8418998005, 8418998050, 8418998060,
127
+ 8419505000, 8419901000, 8422900640,
128
+ 8424909080, 8473302000, 8473305100,
129
+ 8479899599, 8479908500, 8479909596,
130
+ 8481909060, 8481909085, 8486900000,
131
+ 8487900080, 8503009520, 8508700000,
132
+ 8513902000, 8515902000, 8516905000, 8516908050,
133
+ 8517710000, 8517790000,
134
+ 8529907300, 8529909760,
135
+ 8536908585, 8538100000,
136
+ 8541900000, 8543908885,
137
+ 8547900020, 8547900030, 8547900040,
138
+ 8708103050, 87081060, 8708295160,
139
+ 8708806590, 8708996890, 8716805010,
140
+ 8807300060, 9013908000, 9031909195,
141
+ 9401999081, 94031000, 94032000,
142
+ 9403991040, 9403999010, 9403999015,
143
+ 9403999020, 9403999040, 9403999045,
144
+ 9405994020,
145
+ 9506114080, 9506514000, 9506516000,
146
+ 9506594040, 9506702090,
147
+ 9506910010, 9506910020, 9506910030,
148
+ 9506990510, 9506990520, 9506990530,
149
+ 9506991500, 9506992000, 9506992580,
150
+ 9506992800, 9506995500, 9506996080,
151
+ 9507302000, 9507304000, 9507306000,
152
+ 9507308000, 9507906000,
153
+ 9603908050,
154
+ 2203000060, 2203000090,
155
+ 4029968, 4029970, 4029990,
156
+ 2106909998, 2710193050,
157
+ 29034310, 29034510, 29034900, 29035110, 29035990,
158
+ 3004909244,
159
+ 32081000, 32082000, 32089000, 32091000, 32099000,
160
+ 32139000, 32141000,
161
+ 33030010, 33030020, 33030030,
162
+ 33043000, 33049950,
163
+ 33051000, 33053000, 33059000,
164
+ 33069000,
165
+ 33071010, 33071020, 33072000, 33074900, 33079000,
166
+ 34013010, 34013050,
167
+ 34023190, 34024990, 34025011, 34025051,
168
+ 34029010, 34029030, 34029050,
169
+ 34031910, 34031950, 34039900,
170
+ 34051000, 34052000, 34054000, 34059000,
171
+ 35061050, 35069110, 35069150, 35069900,
172
+ 3701300000,
173
+ 38085910, 38085940, 38086110, 38086150,
174
+ 38086210, 38086250, 38086910, 38086950,
175
+ 38089115, 38089125, 38089130, 38089150,
176
+ 38089410, 38089450,
177
+ 38099100, 38101000,
178
+ 38111900, 38112100,
179
+ 38140010, 38140020, 38140050,
180
+ 38200000, 3824999397,
181
+ 7308200035,
182
+ 8307906000, 8309900020, 8309900025,
183
+ 8412909070, 8412909075, 84148016, 84181000,
184
+ 8419501000, 84248990, 8443160000,
185
+ 84501100, 84512100,
186
+ 84672200, 84672900, 84678100, 84678950,
187
+ 8483405020, 8483905020,
188
+ 8501640110, 85022000, 8502310000,
189
+ 8503009546, 8503009570,
190
+ 85043120, 85043140, 85043160,
191
+ 85043300, 85043400,
192
+ 85049020, 85049041, 85049065, 85049075, 85049096,
193
+ 85441900, 85444290, 8544492000, 8544499000,
194
+ 8544602000, 8544606000,
195
+ 8716390040,
196
+ 94017900,
197
+ 76101000, 76109000, 76129010,
198
+ 7615102015, 7615102025, 7615103015, 7615103025,
199
+ 7615105020, 7615105040,
200
+ 7615107125, 7615107130, 7615107155, 7615107180,
201
+ 7615109100, 7615200000,
202
+ 7616109090, 7616991000, 7616995130,
203
+ 7616995140, 7616995190,
204
+ 7612100000, 7612905000, 7613000000, 76141010
205
+ ]
app.py ADDED
@@ -0,0 +1,873 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ HTS Checker - Streamlit Application for HTS Tariff Auditing
3
+ Deployed on Hugging Face Spaces
4
+ """
5
+
6
+ import streamlit as st
7
+ import pandas as pd
8
+ from io import BytesIO
9
+ import hashlib
10
+ from hts_validator import HTSValidator, validate_dataframe, SCENARIO_SUMMARIES
11
+ from HTS_list import Steel_primary_HTS_list, Aluminum_primary_HTS_list, Copper_primary_HTS_list
12
+
13
+
14
+ # Page configuration
15
+ st.set_page_config(
16
+ page_title="HTS Checker - Tariff Audit Tool",
17
+ page_icon="",
18
+ layout="wide"
19
+ )
20
+
21
+
22
+ @st.cache_data
23
+ def load_and_validate_excel(file_content, file_name, keywords_hash):
24
+ """Load Excel and run validation - cached to avoid re-running on filter changes"""
25
+ # Read Excel with HTS columns as string
26
+ df = pd.read_excel(file_content, dtype={
27
+ "Tariff": str,
28
+ "Primary 1": str,
29
+ "Primary 2": str,
30
+ "Primary 3": str,
31
+ "Primary 4": str,
32
+ "Primary 5": str,
33
+ "Primary 6": str,
34
+ })
35
+
36
+ # Clean up HTS columns
37
+ hts_columns = ["Tariff", "Primary 1", "Primary 2", "Primary 3",
38
+ "Primary 4", "Primary 5", "Primary 6"]
39
+ for col in hts_columns:
40
+ if col in df.columns:
41
+ df[col] = df[col].astype(str).str.replace(r'\.0$', '', regex=True)
42
+ df[col] = df[col].replace('nan', '')
43
+
44
+ return df
45
+
46
+
47
+ @st.cache_data
48
+ def run_validation(df_hash, _df, _validator):
49
+ """Run validation - cached based on dataframe hash"""
50
+ results = validate_dataframe(_df, _validator)
51
+ return results
52
+
53
+
54
+ def get_df_hash(df):
55
+ """Get hash of dataframe for caching"""
56
+ return hashlib.md5(pd.util.hash_pandas_object(df).values.tobytes()).hexdigest()
57
+
58
+
59
+ def get_keywords_hash(keywords):
60
+ """Get hash of keywords for cache invalidation"""
61
+ return hashlib.md5(str(keywords).encode()).hexdigest()
62
+
63
+ # Initialize session state
64
+ if "keywords" not in st.session_state:
65
+ st.session_state.keywords = {
66
+ "metal": ["steel", "stainless steel", "carbon steel", "iron", "metal"],
67
+ "aluminum": ["aluminum", "aluminium"],
68
+ "copper": ["copper"],
69
+ "zinc": ["zinc"],
70
+ "plastics": ["plastic", "abs", "pu", "pvc", "polyester", "nylon"]
71
+ }
72
+
73
+ if "export_cache" not in st.session_state:
74
+ st.session_state.export_cache = []
75
+
76
+ if "validation_results" not in st.session_state:
77
+ st.session_state.validation_results = None
78
+
79
+ if "original_df" not in st.session_state:
80
+ st.session_state.original_df = None
81
+
82
+
83
+ def get_validator():
84
+ """Create validator with current keyword settings"""
85
+ return HTSValidator(
86
+ metal_keywords=st.session_state.keywords["metal"],
87
+ aluminum_keywords=st.session_state.keywords["aluminum"],
88
+ copper_keywords=st.session_state.keywords["copper"],
89
+ zinc_keywords=st.session_state.keywords["zinc"],
90
+ plastics_keywords=st.session_state.keywords["plastics"]
91
+ )
92
+
93
+
94
+ def color_status(val):
95
+ """Color code status column"""
96
+ if val == "PASS":
97
+ return "background-color: #90EE90" # Light green
98
+ elif val == "FAIL":
99
+ return "background-color: #FFB6C1" # Light red
100
+ elif val == "FLAG":
101
+ return "background-color: #FFFFE0" # Light yellow
102
+ return ""
103
+
104
+
105
+ def format_hts(hts_value):
106
+ """Format HTS value as string, removing .0 suffix"""
107
+ if not hts_value:
108
+ return ""
109
+ s = str(hts_value)
110
+ # Remove .0 suffix if present (from float conversion)
111
+ if s.endswith(".0"):
112
+ s = s[:-2]
113
+ return s
114
+
115
+
116
+ def results_to_dataframe(results):
117
+ """Convert validation results to DataFrame"""
118
+ data = []
119
+ for r in results:
120
+ # Format additional HTS as strings
121
+ additional_hts_str = ", ".join([format_hts(h) for h in r.additional_hts if h])
122
+ expected_hts_str = ", ".join([format_hts(h) for h in r.expected_hts if h])
123
+ missing_hts_str = ", ".join([format_hts(h) for h in r.missing_hts if h])
124
+ unexpected_hts_str = ", ".join([format_hts(h) for h in r.unexpected_hts if h])
125
+
126
+ data.append({
127
+ "Entry Number": r.entry_number,
128
+ "Description": r.description[:100] + "..." if len(r.description) > 100 else r.description,
129
+ "Full Description": r.description,
130
+ "Primary HTS": format_hts(r.primary_hts),
131
+ "Additional HTS": additional_hts_str,
132
+ "Scenario": r.scenario_id,
133
+ "Scenario Summary": r.scenario_summary,
134
+ "Status": r.status,
135
+ "Expected HTS": expected_hts_str,
136
+ "Missing HTS": missing_hts_str,
137
+ "Unexpected HTS": unexpected_hts_str,
138
+ "Issue": r.issue
139
+ })
140
+ return pd.DataFrame(data)
141
+
142
+
143
+ def export_to_excel(df, results_df=None):
144
+ """Export DataFrame to Excel with optional validation results"""
145
+ output = BytesIO()
146
+
147
+ with pd.ExcelWriter(output, engine="openpyxl") as writer:
148
+ if results_df is not None:
149
+ # Merge original data with validation results
150
+ # Use Full Description for export
151
+ export_df = df.copy()
152
+
153
+ # Add validation columns
154
+ if len(results_df) == len(export_df):
155
+ export_df["Scenario ID"] = results_df["Scenario"].values
156
+ export_df["Scenario Summary"] = results_df["Scenario Summary"].values
157
+ export_df["Status"] = results_df["Status"].values
158
+ export_df["Expected HTS"] = results_df["Expected HTS"].values
159
+ export_df["Missing HTS"] = results_df["Missing HTS"].values
160
+ export_df["Unexpected HTS"] = results_df["Unexpected HTS"].values
161
+ export_df["Issue Description"] = results_df["Issue"].values
162
+
163
+ export_df.to_excel(writer, sheet_name="Audit Results", index=False)
164
+ else:
165
+ df.to_excel(writer, sheet_name="Export", index=False)
166
+
167
+ output.seek(0)
168
+ return output
169
+
170
+
171
+ # Main app
172
+ st.title("HTS Checker - Tariff Audit Tool")
173
+ st.markdown("Audit primary HTS codes against additional tariffs and description keywords")
174
+
175
+ # Create tabs
176
+ tab1, tab2, tab2b, tab3, tab4, tab5 = st.tabs([
177
+ "Upload & Filter",
178
+ "Validation Results",
179
+ "Unique Combinations",
180
+ "Keyword Management",
181
+ "Export Selection",
182
+ "HTS Reference"
183
+ ])
184
+
185
+ # Tab 1: Upload & Filter
186
+ with tab1:
187
+ st.header("Upload Excel File")
188
+
189
+ uploaded_file = st.file_uploader(
190
+ "Upload your entry report Excel file",
191
+ type=["xlsx", "xls"],
192
+ help="Upload the customizable entry report from NetCHB"
193
+ )
194
+
195
+ if uploaded_file is not None:
196
+ try:
197
+ # Use cached loading function
198
+ keywords_hash = get_keywords_hash(st.session_state.keywords)
199
+ df = load_and_validate_excel(uploaded_file, uploaded_file.name, keywords_hash)
200
+
201
+ st.session_state.original_df = df
202
+ st.success(f"Loaded {len(df)} rows")
203
+
204
+ # Display column mapping info
205
+ with st.expander("Column Mapping"):
206
+ st.markdown("""
207
+ **Expected Columns:**
208
+ - Column E: `Description` - Product description for keyword matching
209
+ - Column F: `Tariff` - 10-digit Primary HTS code
210
+ - Columns I-N: `Primary 1-6` - Additional HTS codes
211
+ """)
212
+
213
+ st.write("**Detected columns:**", df.columns.tolist())
214
+
215
+ # Filter controls
216
+ st.subheader("Filter Options")
217
+
218
+ col1, col2 = st.columns(2)
219
+
220
+ with col1:
221
+ hts_filter = st.text_input(
222
+ "Filter by Primary HTS (partial match)",
223
+ placeholder="e.g., 7301 or 730120",
224
+ help="Enter partial HTS to filter entries"
225
+ )
226
+
227
+ with col2:
228
+ desc_exclude = st.text_input(
229
+ "Exclude by description keyword",
230
+ placeholder="e.g., polyester",
231
+ help="Exclude entries containing this keyword in description"
232
+ )
233
+
234
+ # Apply filters
235
+ filtered_df = df.copy()
236
+
237
+ if hts_filter:
238
+ tariff_col = "Tariff" if "Tariff" in df.columns else df.columns[5]
239
+ filtered_df = filtered_df[
240
+ filtered_df[tariff_col].astype(str).str.contains(hts_filter, na=False)
241
+ ]
242
+
243
+ if desc_exclude:
244
+ desc_col = "Description" if "Description" in df.columns else df.columns[4]
245
+ filtered_df = filtered_df[
246
+ ~filtered_df[desc_col].astype(str).str.lower().str.contains(
247
+ desc_exclude.lower(), na=False
248
+ )
249
+ ]
250
+
251
+ st.write(f"**Showing {len(filtered_df)} of {len(df)} entries**")
252
+
253
+ # Display filtered data
254
+ if len(filtered_df) > 0:
255
+ # Show key columns
256
+ display_cols = ["Entry Number", "Description", "Tariff",
257
+ "Primary 1", "Primary 2", "Primary 3",
258
+ "Primary 4", "Primary 5", "Primary 6"]
259
+ display_cols = [c for c in display_cols if c in filtered_df.columns]
260
+
261
+ st.dataframe(
262
+ filtered_df[display_cols].head(100),
263
+ use_container_width=True
264
+ )
265
+
266
+ if len(filtered_df) > 100:
267
+ st.info("Showing first 100 rows.")
268
+
269
+ # Run validation ONCE on full dataset (cached), then filter results
270
+ if "cached_full_results" not in st.session_state or st.session_state.get("cached_file_name") != uploaded_file.name:
271
+ with st.spinner("Validating all entries (one-time)..."):
272
+ validator = get_validator()
273
+ full_results = validate_dataframe(df, validator)
274
+ full_results_df = results_to_dataframe(full_results)
275
+ st.session_state.cached_full_results = full_results_df
276
+ st.session_state.cached_file_name = uploaded_file.name
277
+
278
+ # Filter cached results based on current filters
279
+ full_results_df = st.session_state.cached_full_results
280
+ filtered_indices = filtered_df.index.tolist()
281
+ filtered_results_df = full_results_df.iloc[filtered_indices].copy()
282
+
283
+ st.session_state.validation_results = filtered_results_df
284
+ st.session_state.filtered_df = filtered_df
285
+
286
+ st.success(f"Showing {len(filtered_df)} entries. Go to 'Validation Results' tab to review.")
287
+
288
+ except Exception as e:
289
+ st.error(f"Error loading file: {str(e)}")
290
+
291
+ # Tab 2: Validation Results
292
+ with tab2:
293
+ st.header("Validation Results")
294
+
295
+ if st.session_state.validation_results is None:
296
+ st.info("Upload a file and run validation first.")
297
+ else:
298
+ # Results are already a DataFrame now (cached)
299
+ results_df = st.session_state.validation_results.copy()
300
+
301
+ # Summary statistics
302
+ col1, col2, col3, col4 = st.columns(4)
303
+ with col1:
304
+ pass_count = len(results_df[results_df["Status"] == "PASS"])
305
+ st.metric("PASS", pass_count)
306
+ with col2:
307
+ fail_count = len(results_df[results_df["Status"] == "FAIL"])
308
+ st.metric("FAIL", fail_count)
309
+ with col3:
310
+ flag_count = len(results_df[results_df["Status"] == "FLAG"])
311
+ st.metric("FLAG", flag_count)
312
+ with col4:
313
+ none_count = len(results_df[results_df["Scenario"] == "NONE"])
314
+ st.metric("No Match", none_count)
315
+
316
+ # Filter by status
317
+ st.subheader("Filter Results")
318
+
319
+ col1, col2 = st.columns(2)
320
+ with col1:
321
+ status_filter = st.multiselect(
322
+ "Filter by Status",
323
+ options=["PASS", "FAIL", "FLAG"],
324
+ default=["FAIL", "FLAG"]
325
+ )
326
+
327
+ with col2:
328
+ scenario_filter = st.multiselect(
329
+ "Filter by Scenario",
330
+ options=list(SCENARIO_SUMMARIES.keys()),
331
+ default=[]
332
+ )
333
+
334
+ # Apply filters
335
+ display_df = results_df.copy()
336
+
337
+ if status_filter:
338
+ display_df = display_df[display_df["Status"].isin(status_filter)]
339
+
340
+ if scenario_filter:
341
+ display_df = display_df[display_df["Scenario"].isin(scenario_filter)]
342
+
343
+ # Exclude "NONE" scenario by default
344
+ show_none = st.checkbox("Show 'No Match' entries", value=False)
345
+ if not show_none:
346
+ display_df = display_df[display_df["Scenario"] != "NONE"]
347
+
348
+ st.write(f"**Showing {len(display_df)} results**")
349
+
350
+ # Display results table
351
+ if len(display_df) > 0:
352
+ # Select columns to display
353
+ display_columns = [
354
+ "Entry Number", "Description", "Primary HTS",
355
+ "Additional HTS", "Scenario", "Status", "Issue"
356
+ ]
357
+
358
+ # Interactive filtering section
359
+ st.markdown("**Interactive Filters:**")
360
+ filter_col1, filter_col2, filter_col3 = st.columns(3)
361
+
362
+ with filter_col1:
363
+ hts_search = st.text_input(
364
+ "Filter by Primary HTS",
365
+ placeholder="e.g., 7301 or 8302",
366
+ key="results_hts_filter"
367
+ )
368
+
369
+ with filter_col2:
370
+ desc_search = st.text_input(
371
+ "Filter by Description",
372
+ placeholder="e.g., steel, aluminum",
373
+ key="results_desc_filter"
374
+ )
375
+
376
+ with filter_col3:
377
+ additional_hts_search = st.text_input(
378
+ "Filter by Additional HTS",
379
+ placeholder="e.g., 99038191",
380
+ key="results_additional_filter"
381
+ )
382
+
383
+ # Apply interactive filters
384
+ interactive_df = display_df.copy()
385
+
386
+ if hts_search:
387
+ interactive_df = interactive_df[
388
+ interactive_df["Primary HTS"].astype(str).str.contains(hts_search, case=False, na=False)
389
+ ]
390
+
391
+ if desc_search:
392
+ interactive_df = interactive_df[
393
+ interactive_df["Description"].astype(str).str.contains(desc_search, case=False, na=False)
394
+ ]
395
+
396
+ if additional_hts_search:
397
+ interactive_df = interactive_df[
398
+ interactive_df["Additional HTS"].astype(str).str.contains(additional_hts_search, case=False, na=False)
399
+ ]
400
+
401
+ st.write(f"**Filtered: {len(interactive_df)} of {len(display_df)} results**")
402
+
403
+ # Store interactive filtered df for export
404
+ st.session_state.interactive_filtered_df = interactive_df
405
+
406
+ styled_df = interactive_df[display_columns].style.applymap(
407
+ color_status, subset=["Status"]
408
+ )
409
+
410
+ st.dataframe(styled_df, use_container_width=True, height=400)
411
+
412
+ # Scenario legend
413
+ with st.expander("Scenario Legend"):
414
+ for scenario_id, summary in SCENARIO_SUMMARIES.items():
415
+ st.write(f"**{scenario_id}**: {summary}")
416
+
417
+ # Bulk Export Actions
418
+ st.subheader("Add to Export Cache")
419
+ st.markdown("Use bulk actions to add **currently filtered** results to export cache")
420
+
421
+ col1, col2, col3 = st.columns(3)
422
+
423
+ with col1:
424
+ if st.button("Add ALL Filtered to Cache", type="primary"):
425
+ added_count = 0
426
+ for _, row in interactive_df.iterrows():
427
+ row_dict = row.to_dict()
428
+ # Check if not already in cache (by Entry + HTS + Description for uniqueness)
429
+ key = (row_dict.get("Entry Number", ""), row_dict.get("Primary HTS", ""), row_dict.get("Description", ""))
430
+ existing_keys = [(d.get("Entry Number", ""), d.get("Primary HTS", ""), d.get("Description", ""))
431
+ for d in st.session_state.export_cache]
432
+ if key not in existing_keys:
433
+ st.session_state.export_cache.append(row_dict)
434
+ added_count += 1
435
+ st.success(f"Added {added_count} entries to cache ({len(st.session_state.export_cache)} total)")
436
+
437
+ with col2:
438
+ if st.button("Add FAIL Only to Cache"):
439
+ fail_df = interactive_df[interactive_df["Status"] == "FAIL"]
440
+ added_count = 0
441
+ for _, row in fail_df.iterrows():
442
+ row_dict = row.to_dict()
443
+ key = (row_dict.get("Entry Number", ""), row_dict.get("Primary HTS", ""), row_dict.get("Description", ""))
444
+ existing_keys = [(d.get("Entry Number", ""), d.get("Primary HTS", ""), d.get("Description", ""))
445
+ for d in st.session_state.export_cache]
446
+ if key not in existing_keys:
447
+ st.session_state.export_cache.append(row_dict)
448
+ added_count += 1
449
+ st.success(f"Added {added_count} FAIL entries to cache")
450
+
451
+ with col3:
452
+ if st.button("Add FLAG Only to Cache"):
453
+ flag_df = interactive_df[interactive_df["Status"] == "FLAG"]
454
+ added_count = 0
455
+ for _, row in flag_df.iterrows():
456
+ row_dict = row.to_dict()
457
+ key = (row_dict.get("Entry Number", ""), row_dict.get("Primary HTS", ""), row_dict.get("Description", ""))
458
+ existing_keys = [(d.get("Entry Number", ""), d.get("Primary HTS", ""), d.get("Description", ""))
459
+ for d in st.session_state.export_cache]
460
+ if key not in existing_keys:
461
+ st.session_state.export_cache.append(row_dict)
462
+ added_count += 1
463
+ st.success(f"Added {added_count} FLAG entries to cache")
464
+
465
+ # Add by scenario
466
+ st.markdown("**Add by Scenario (from filtered results):**")
467
+ scenario_cols = st.columns(4)
468
+ available_scenarios = interactive_df["Scenario"].unique().tolist()
469
+
470
+ for idx, scenario in enumerate(available_scenarios[:8]): # Limit to 8 buttons
471
+ col_idx = idx % 4
472
+ with scenario_cols[col_idx]:
473
+ scenario_count = len(interactive_df[interactive_df["Scenario"] == scenario])
474
+ if st.button(f"{scenario} ({scenario_count})", key=f"add_scenario_{scenario}"):
475
+ scenario_df = interactive_df[interactive_df["Scenario"] == scenario]
476
+ added_count = 0
477
+ for _, row in scenario_df.iterrows():
478
+ row_dict = row.to_dict()
479
+ key = (row_dict.get("Entry Number", ""), row_dict.get("Primary HTS", ""), row_dict.get("Description", ""))
480
+ existing_keys = [(d.get("Entry Number", ""), d.get("Primary HTS", ""), d.get("Description", ""))
481
+ for d in st.session_state.export_cache]
482
+ if key not in existing_keys:
483
+ st.session_state.export_cache.append(row_dict)
484
+ added_count += 1
485
+ st.success(f"Added {added_count} {scenario} entries to cache")
486
+
487
+ # Show cache status
488
+ st.info(f"Current cache: {len(st.session_state.export_cache)} entries. Go to 'Export Selection' tab to download.")
489
+
490
+ # Tab 2b: Unique Combinations
491
+ with tab2b:
492
+ st.header("Unique HTS + Description Combinations")
493
+ st.markdown("View unique combinations to avoid reviewing duplicates")
494
+
495
+ if st.session_state.validation_results is None:
496
+ st.info("Upload a file and run validation first.")
497
+ else:
498
+ results_df = st.session_state.validation_results.copy()
499
+
500
+ # Filter by status first
501
+ st.subheader("Filter Options")
502
+ col1, col2 = st.columns(2)
503
+
504
+ with col1:
505
+ unique_status_filter = st.multiselect(
506
+ "Filter by Status",
507
+ options=["PASS", "FAIL", "FLAG"],
508
+ default=["FAIL", "FLAG"],
509
+ key="unique_status_filter"
510
+ )
511
+
512
+ with col2:
513
+ unique_scenario_filter = st.multiselect(
514
+ "Filter by Scenario",
515
+ options=list(SCENARIO_SUMMARIES.keys()),
516
+ default=[],
517
+ key="unique_scenario_filter"
518
+ )
519
+
520
+ # Apply filters
521
+ filtered_df = results_df.copy()
522
+
523
+ if unique_status_filter:
524
+ filtered_df = filtered_df[filtered_df["Status"].isin(unique_status_filter)]
525
+
526
+ if unique_scenario_filter:
527
+ filtered_df = filtered_df[filtered_df["Scenario"].isin(unique_scenario_filter)]
528
+
529
+ # Exclude NONE by default
530
+ show_none_unique = st.checkbox("Show 'No Match' entries", value=False, key="show_none_unique")
531
+ if not show_none_unique:
532
+ filtered_df = filtered_df[filtered_df["Scenario"] != "NONE"]
533
+
534
+ if len(filtered_df) > 0:
535
+ # Group by Primary HTS + Description (use Full Description for grouping)
536
+ # Aggregate to get unique combinations
537
+ unique_df = filtered_df.groupby(
538
+ ["Primary HTS", "Full Description"], as_index=False
539
+ ).agg({
540
+ "Entry Number": "count", # Count occurrences
541
+ "Additional HTS": "first", # Take first (should be same for same HTS+desc)
542
+ "Scenario": "first",
543
+ "Scenario Summary": "first",
544
+ "Status": "first",
545
+ "Expected HTS": "first",
546
+ "Missing HTS": "first",
547
+ "Unexpected HTS": "first",
548
+ "Issue": "first"
549
+ }).rename(columns={"Entry Number": "Count"})
550
+
551
+ # Sort by count descending to show most common first
552
+ unique_df = unique_df.sort_values("Count", ascending=False)
553
+
554
+ # Create shorter description for display
555
+ unique_df["Description"] = unique_df["Full Description"].apply(
556
+ lambda x: x[:80] + "..." if len(str(x)) > 80 else x
557
+ )
558
+
559
+ st.write(f"**{len(unique_df)} unique combinations** (from {len(filtered_df)} total entries)")
560
+
561
+ # Interactive filters for unique view
562
+ st.markdown("**Search Filters:**")
563
+ ucol1, ucol2 = st.columns(2)
564
+
565
+ with ucol1:
566
+ unique_hts_search = st.text_input(
567
+ "Filter by Primary HTS",
568
+ placeholder="e.g., 7301 or 8302",
569
+ key="unique_hts_search"
570
+ )
571
+
572
+ with ucol2:
573
+ unique_desc_search = st.text_input(
574
+ "Filter by Description",
575
+ placeholder="e.g., steel, aluminum",
576
+ key="unique_desc_search"
577
+ )
578
+
579
+ # Apply search filters
580
+ display_unique_df = unique_df.copy()
581
+
582
+ if unique_hts_search:
583
+ display_unique_df = display_unique_df[
584
+ display_unique_df["Primary HTS"].astype(str).str.contains(unique_hts_search, case=False, na=False)
585
+ ]
586
+
587
+ if unique_desc_search:
588
+ display_unique_df = display_unique_df[
589
+ display_unique_df["Description"].astype(str).str.contains(unique_desc_search, case=False, na=False)
590
+ ]
591
+
592
+ st.write(f"**Showing {len(display_unique_df)} unique combinations**")
593
+
594
+ # Display columns
595
+ display_cols = [
596
+ "Primary HTS", "Description", "Additional HTS",
597
+ "Scenario", "Status", "Count", "Issue"
598
+ ]
599
+
600
+ styled_unique = display_unique_df[display_cols].style.applymap(
601
+ color_status, subset=["Status"]
602
+ )
603
+
604
+ st.dataframe(styled_unique, use_container_width=True, height=400)
605
+
606
+ # Bulk export for unique combinations
607
+ st.subheader("Add Unique Combinations to Cache")
608
+
609
+ col1, col2 = st.columns(2)
610
+
611
+ with col1:
612
+ if st.button("Add ALL Unique to Cache", type="primary", key="add_all_unique"):
613
+ added_count = 0
614
+ for _, row in display_unique_df.iterrows():
615
+ row_dict = row.to_dict()
616
+ key = (row_dict.get("Primary HTS", ""), row_dict.get("Full Description", ""))
617
+ existing_keys = [(d.get("Primary HTS", ""), d.get("Full Description", ""))
618
+ for d in st.session_state.export_cache]
619
+ if key not in existing_keys:
620
+ st.session_state.export_cache.append(row_dict)
621
+ added_count += 1
622
+ st.success(f"Added {added_count} unique combinations to cache")
623
+
624
+ with col2:
625
+ if st.button("Add FAIL/FLAG Unique to Cache", key="add_fail_flag_unique"):
626
+ fail_flag_df = display_unique_df[display_unique_df["Status"].isin(["FAIL", "FLAG"])]
627
+ added_count = 0
628
+ for _, row in fail_flag_df.iterrows():
629
+ row_dict = row.to_dict()
630
+ key = (row_dict.get("Primary HTS", ""), row_dict.get("Full Description", ""))
631
+ existing_keys = [(d.get("Primary HTS", ""), d.get("Full Description", ""))
632
+ for d in st.session_state.export_cache]
633
+ if key not in existing_keys:
634
+ st.session_state.export_cache.append(row_dict)
635
+ added_count += 1
636
+ st.success(f"Added {added_count} FAIL/FLAG combinations to cache")
637
+
638
+ st.info(f"Current cache: {len(st.session_state.export_cache)} entries")
639
+ else:
640
+ st.info("No results matching the selected filters.")
641
+
642
+ # Tab 3: Keyword Management
643
+ with tab3:
644
+ st.header("Keyword Management")
645
+ st.markdown("Edit keyword lists used for validation. Changes apply immediately.")
646
+
647
+ col1, col2 = st.columns(2)
648
+
649
+ with col1:
650
+ st.subheader("Metal Keywords")
651
+ metal_text = st.text_area(
652
+ "Metal keywords (one per line)",
653
+ value="\n".join(st.session_state.keywords["metal"]),
654
+ height=150,
655
+ key="metal_input"
656
+ )
657
+
658
+ st.subheader("Aluminum Keywords")
659
+ aluminum_text = st.text_area(
660
+ "Aluminum keywords (one per line)",
661
+ value="\n".join(st.session_state.keywords["aluminum"]),
662
+ height=100,
663
+ key="aluminum_input"
664
+ )
665
+
666
+ st.subheader("Copper Keywords")
667
+ copper_text = st.text_area(
668
+ "Copper keywords (one per line)",
669
+ value="\n".join(st.session_state.keywords["copper"]),
670
+ height=100,
671
+ key="copper_input"
672
+ )
673
+
674
+ with col2:
675
+ st.subheader("Zinc Keywords")
676
+ zinc_text = st.text_area(
677
+ "Zinc keywords (one per line)",
678
+ value="\n".join(st.session_state.keywords["zinc"]),
679
+ height=100,
680
+ key="zinc_input"
681
+ )
682
+
683
+ st.subheader("Plastics Keywords")
684
+ plastics_text = st.text_area(
685
+ "Plastics keywords (one per line)",
686
+ value="\n".join(st.session_state.keywords["plastics"]),
687
+ height=150,
688
+ key="plastics_input"
689
+ )
690
+
691
+ col1, col2 = st.columns(2)
692
+
693
+ with col1:
694
+ if st.button("Save Keywords", type="primary"):
695
+ st.session_state.keywords["metal"] = [
696
+ k.strip() for k in metal_text.split("\n") if k.strip()
697
+ ]
698
+ st.session_state.keywords["aluminum"] = [
699
+ k.strip() for k in aluminum_text.split("\n") if k.strip()
700
+ ]
701
+ st.session_state.keywords["copper"] = [
702
+ k.strip() for k in copper_text.split("\n") if k.strip()
703
+ ]
704
+ st.session_state.keywords["zinc"] = [
705
+ k.strip() for k in zinc_text.split("\n") if k.strip()
706
+ ]
707
+ st.session_state.keywords["plastics"] = [
708
+ k.strip() for k in plastics_text.split("\n") if k.strip()
709
+ ]
710
+ # Clear cached results to force re-validation
711
+ if "cached_full_results" in st.session_state:
712
+ del st.session_state.cached_full_results
713
+ if "cached_file_name" in st.session_state:
714
+ del st.session_state.cached_file_name
715
+ st.success("Keywords saved! Re-upload file or refresh to apply changes.")
716
+
717
+ with col2:
718
+ if st.button("Reset to Defaults"):
719
+ st.session_state.keywords = {
720
+ "metal": ["steel", "stainless steel", "carbon steel", "iron", "metal"],
721
+ "aluminum": ["aluminum", "aluminium"],
722
+ "copper": ["copper"],
723
+ "zinc": ["zinc"],
724
+ "plastics": ["plastic", "abs", "pu", "pvc", "polyester", "nylon"]
725
+ }
726
+ # Clear cached results
727
+ if "cached_full_results" in st.session_state:
728
+ del st.session_state.cached_full_results
729
+ if "cached_file_name" in st.session_state:
730
+ del st.session_state.cached_file_name
731
+ st.success("Keywords reset to defaults!")
732
+ st.rerun()
733
+
734
+ # Tab 4: Export Selection
735
+ with tab4:
736
+ st.header("Export Selection")
737
+
738
+ if len(st.session_state.export_cache) == 0:
739
+ st.info("No entries in export cache. Select entries from Validation Results tab.")
740
+ else:
741
+ st.write(f"**{len(st.session_state.export_cache)} entries in cache**")
742
+
743
+ # Display cache contents
744
+ cache_df = pd.DataFrame(st.session_state.export_cache)
745
+ st.dataframe(cache_df, use_container_width=True)
746
+
747
+ col1, col2, col3 = st.columns(3)
748
+
749
+ with col1:
750
+ if st.button("Clear Cache"):
751
+ st.session_state.export_cache = []
752
+ st.success("Cache cleared!")
753
+ st.rerun()
754
+
755
+ with col2:
756
+ # Export cached entries only
757
+ if st.button("Export Cache to Excel"):
758
+ excel_data = export_to_excel(cache_df)
759
+ st.download_button(
760
+ label="Download Excel (Cache Only)",
761
+ data=excel_data,
762
+ file_name="hts_audit_cache.xlsx",
763
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
764
+ )
765
+
766
+ # Export full results with original data
767
+ st.subheader("Export Full Results")
768
+
769
+ if st.session_state.validation_results is not None and st.session_state.original_df is not None:
770
+ # validation_results is already a DataFrame now
771
+ results_df = st.session_state.validation_results.copy()
772
+
773
+ # Status filter for export
774
+ export_status = st.multiselect(
775
+ "Export entries with status:",
776
+ options=["PASS", "FAIL", "FLAG"],
777
+ default=["FAIL", "FLAG"],
778
+ key="export_status_filter"
779
+ )
780
+
781
+ # Create filtered export
782
+ if export_status:
783
+ filtered_results = results_df[results_df["Status"].isin(export_status)]
784
+ filtered_indices = filtered_results.index.tolist()
785
+
786
+ if hasattr(st.session_state, "filtered_df"):
787
+ export_original = st.session_state.filtered_df.iloc[filtered_indices].copy()
788
+ else:
789
+ export_original = st.session_state.original_df.iloc[filtered_indices].copy()
790
+
791
+ st.write(f"**{len(filtered_results)} entries will be exported**")
792
+
793
+ if st.button("Generate Full Export", type="primary"):
794
+ excel_data = export_to_excel(export_original, filtered_results)
795
+ st.download_button(
796
+ label="Download Full Excel Report",
797
+ data=excel_data,
798
+ file_name="hts_audit_full_report.xlsx",
799
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
800
+ )
801
+ else:
802
+ st.info("Run validation first to enable full export.")
803
+
804
+ # Tab 5: HTS Reference
805
+ with tab5:
806
+ st.header("HTS Reference Lists")
807
+ st.markdown("Reference lists of Steel, Aluminum, and Copper HTS codes used for validation")
808
+
809
+ # Search filter
810
+ hts_search = st.text_input(
811
+ "Search HTS code",
812
+ placeholder="Enter HTS to search across all lists",
813
+ key="hts_reference_search"
814
+ )
815
+
816
+ col1, col2, col3 = st.columns(3)
817
+
818
+ with col1:
819
+ st.subheader(f"Steel HTS ({len(Steel_primary_HTS_list)})")
820
+ steel_list = [str(h) for h in Steel_primary_HTS_list]
821
+ if hts_search:
822
+ steel_list = [h for h in steel_list if hts_search in h]
823
+ steel_df = pd.DataFrame({"Steel HTS": steel_list})
824
+ st.dataframe(steel_df, use_container_width=True, height=400)
825
+
826
+ with col2:
827
+ st.subheader(f"Aluminum HTS ({len(Aluminum_primary_HTS_list)})")
828
+ aluminum_list = [str(h) for h in Aluminum_primary_HTS_list]
829
+ if hts_search:
830
+ aluminum_list = [h for h in aluminum_list if hts_search in h]
831
+ aluminum_df = pd.DataFrame({"Aluminum HTS": aluminum_list})
832
+ st.dataframe(aluminum_df, use_container_width=True, height=400)
833
+
834
+ with col3:
835
+ st.subheader(f"Copper HTS ({len(Copper_primary_HTS_list)})")
836
+ copper_list = [str(h) for h in Copper_primary_HTS_list]
837
+ if hts_search:
838
+ copper_list = [h for h in copper_list if hts_search in h]
839
+ copper_df = pd.DataFrame({"Copper HTS": copper_list})
840
+ st.dataframe(copper_df, use_container_width=True, height=400)
841
+
842
+ # Show overlap info
843
+ st.subheader("HTS Overlap Analysis")
844
+ steel_set = set(str(h) for h in Steel_primary_HTS_list)
845
+ aluminum_set = set(str(h) for h in Aluminum_primary_HTS_list)
846
+ copper_set = set(str(h) for h in Copper_primary_HTS_list)
847
+
848
+ steel_aluminum = steel_set & aluminum_set
849
+ aluminum_copper = aluminum_set & copper_set
850
+ steel_copper = steel_set & copper_set
851
+
852
+ col1, col2, col3 = st.columns(3)
853
+ with col1:
854
+ st.metric("Steel & Aluminum Overlap", len(steel_aluminum))
855
+ if steel_aluminum:
856
+ with st.expander("View overlapping HTS"):
857
+ st.write(sorted(steel_aluminum))
858
+
859
+ with col2:
860
+ st.metric("Aluminum & Copper Overlap", len(aluminum_copper))
861
+ if aluminum_copper:
862
+ with st.expander("View overlapping HTS"):
863
+ st.write(sorted(aluminum_copper))
864
+
865
+ with col3:
866
+ st.metric("Steel & Copper Overlap", len(steel_copper))
867
+ if steel_copper:
868
+ with st.expander("View overlapping HTS"):
869
+ st.write(sorted(steel_copper))
870
+
871
+ # Footer
872
+ st.markdown("---")
873
+ st.markdown("HTS Checker v1.0 - Tariff Audit Tool")
hts_validator.py ADDED
@@ -0,0 +1,731 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ HTS Validator - Core validation logic for HTS tariff auditing
3
+ Validates primary HTS codes against additional HTS and description keywords
4
+ """
5
+
6
+ import re
7
+ from typing import Dict, List, Optional, Tuple, Set
8
+ from dataclasses import dataclass
9
+ from HTS_list import Steel_primary_HTS_list, Aluminum_primary_HTS_list, Copper_primary_HTS_list
10
+
11
+
12
+ # Key Additional HTS codes
13
+ STEEL_232_CODES = {"99038190", "99038191"}
14
+ ALUMINUM_232_CODES = {"99038507", "99038508"}
15
+ COPPER_CODES = {"99037801", "99037802"}
16
+ GENERAL_301_CODE = "99030133"
17
+ MISMATCH_CODE = "99030125"
18
+
19
+ # Scenario summaries
20
+ SCENARIO_SUMMARIES = {
21
+ "S1": "Steel HTS + 232 tariff applied - verify 99030133 present, no 99030125",
22
+ "S2": "Metal keyword but NOT steel HTS - should apply 99030125, no 232 tariffs",
23
+ "S3": "Steel HTS but NO metal keyword - description mismatch, should apply 99030125",
24
+ "S4": "Aluminum HTS + 232 tariff applied - verify 99030133 present",
25
+ "S5": "Aluminum keyword but NOT aluminum HTS - should apply 99030125, no 232 tariffs",
26
+ "S6": "Aluminum HTS but NO aluminum keyword - description mismatch, should apply 99030125",
27
+ "S7": "Dual Steel+Aluminum HTS - matches keyword, apply corresponding 232",
28
+ "S7a": "Dual Steel+Aluminum HTS + BOTH keywords - flag for manual review",
29
+ "S8": "Dual Steel+Aluminum HTS + NO keywords - should apply 99030125",
30
+ "S9": "Copper keyword but NOT copper HTS - potential misclassification",
31
+ "S10": "Copper HTS but NO copper keyword - description mismatch",
32
+ "S11": "Dual Aluminum+Copper HTS + copper keyword - apply 99030133 + copper tariffs",
33
+ "S12": "Dual Aluminum+Copper HTS + aluminum keyword - apply 99030133 + aluminum 232",
34
+ "S13": "Zinc keyword - should ONLY apply 99030125, no 232 tariffs allowed",
35
+ "S14": "Plastics keyword + metal HTS - override, should ONLY apply 99030125",
36
+ "COPPER_OK": "Copper HTS + copper keyword - verify copper tariffs applied",
37
+ "NONE": "No applicable scenario - entry does not match any validation rules",
38
+ }
39
+
40
+
41
+ @dataclass
42
+ class ValidationResult:
43
+ """Result of validating a single entry"""
44
+ entry_number: str
45
+ description: str
46
+ primary_hts: str
47
+ additional_hts: List[str]
48
+ scenario_id: str
49
+ scenario_summary: str
50
+ status: str # PASS, FAIL, FLAG
51
+ expected_hts: List[str]
52
+ missing_hts: List[str]
53
+ unexpected_hts: List[str]
54
+ issue: str
55
+
56
+
57
+ class HTSValidator:
58
+ """Validates HTS codes against descriptions and additional tariffs"""
59
+
60
+ def __init__(self,
61
+ metal_keywords: Optional[List[str]] = None,
62
+ aluminum_keywords: Optional[List[str]] = None,
63
+ copper_keywords: Optional[List[str]] = None,
64
+ zinc_keywords: Optional[List[str]] = None,
65
+ plastics_keywords: Optional[List[str]] = None):
66
+ """Initialize validator with keyword lists"""
67
+
68
+ # Default keyword lists
69
+ self.metal_keywords = metal_keywords or [
70
+ "steel", "stainless steel", "carbon steel", "iron", "metal"
71
+ ]
72
+ self.aluminum_keywords = aluminum_keywords or [
73
+ "aluminum", "aluminium"
74
+ ]
75
+ self.copper_keywords = copper_keywords or [
76
+ "copper"
77
+ ]
78
+ self.zinc_keywords = zinc_keywords or [
79
+ "zinc"
80
+ ]
81
+ self.plastics_keywords = plastics_keywords or [
82
+ "plastic", "abs", "pu", "pvc", "polyester", "nylon"
83
+ ]
84
+
85
+ # Convert HTS lists to string sets for matching
86
+ self.steel_hts_set = self._convert_hts_list(Steel_primary_HTS_list)
87
+ self.aluminum_hts_set = self._convert_hts_list(Aluminum_primary_HTS_list)
88
+ self.copper_hts_set = self._convert_hts_list(Copper_primary_HTS_list)
89
+
90
+ def _convert_hts_list(self, hts_list: List) -> Set[str]:
91
+ """Convert HTS list to set of strings"""
92
+ return {str(hts) for hts in hts_list}
93
+
94
+ def _hts_matches_list(self, primary_hts: str, hts_set: Set[str]) -> bool:
95
+ """Check if primary HTS matches any entry in HTS set using prefix matching"""
96
+ primary_str = str(primary_hts).replace(".", "").strip()
97
+
98
+ for list_hts in hts_set:
99
+ list_hts_str = str(list_hts).replace(".", "").strip()
100
+ # Prefix match: if list entry is 8 digits, match any 10-digit starting with it
101
+ if len(list_hts_str) <= len(primary_str):
102
+ if primary_str.startswith(list_hts_str):
103
+ return True
104
+ else:
105
+ # List entry is longer, check if primary starts with it
106
+ if list_hts_str.startswith(primary_str):
107
+ return True
108
+ return False
109
+
110
+ def _contains_keywords(self, text: str, keywords: List[str]) -> bool:
111
+ """Check if text contains any of the keywords (case-insensitive, word boundary)"""
112
+ if not text:
113
+ return False
114
+ text_lower = text.lower()
115
+ for kw in keywords:
116
+ # Use word boundary matching to avoid partial matches (e.g., "pu" in "punch")
117
+ # \b matches word boundaries
118
+ pattern = r'\b' + re.escape(kw.lower()) + r'\b'
119
+ if re.search(pattern, text_lower):
120
+ return True
121
+ return False
122
+
123
+ def _get_additional_hts_set(self, additional_hts: List[str]) -> Set[str]:
124
+ """Convert additional HTS list to normalized set"""
125
+ result = set()
126
+ for hts in additional_hts:
127
+ if hts:
128
+ # Remove decimal points and convert to string
129
+ normalized = str(hts).replace(".", "").strip()
130
+ # Remove trailing .0 from floats
131
+ if normalized.endswith("0") and len(normalized) > 8:
132
+ # Check if it's a float representation
133
+ try:
134
+ float_val = float(hts)
135
+ normalized = str(int(float_val))
136
+ except (ValueError, TypeError):
137
+ pass
138
+ result.add(normalized)
139
+ return result
140
+
141
+ def _check_hts_present(self, hts_code: str, additional_set: Set[str]) -> bool:
142
+ """Check if an HTS code is present in additional HTS set"""
143
+ return hts_code in additional_set
144
+
145
+ def _check_any_hts_present(self, hts_codes: Set[str], additional_set: Set[str]) -> bool:
146
+ """Check if any of the HTS codes are present"""
147
+ return bool(hts_codes & additional_set)
148
+
149
+ def validate_entry(self, entry_number: str, description: str,
150
+ primary_hts: str, additional_hts: List[str]) -> ValidationResult:
151
+ """Validate a single entry against all scenarios"""
152
+
153
+ # Normalize data
154
+ primary_str = str(primary_hts).replace(".", "").strip() if primary_hts else ""
155
+ desc = str(description) if description else ""
156
+ additional_set = self._get_additional_hts_set(additional_hts)
157
+
158
+ # Check which HTS lists the primary belongs to
159
+ in_steel = self._hts_matches_list(primary_str, self.steel_hts_set)
160
+ in_aluminum = self._hts_matches_list(primary_str, self.aluminum_hts_set)
161
+ in_copper = self._hts_matches_list(primary_str, self.copper_hts_set)
162
+
163
+ # Check description keywords
164
+ has_metal_kw = self._contains_keywords(desc, self.metal_keywords)
165
+ has_aluminum_kw = self._contains_keywords(desc, self.aluminum_keywords)
166
+ has_copper_kw = self._contains_keywords(desc, self.copper_keywords)
167
+ has_zinc_kw = self._contains_keywords(desc, self.zinc_keywords)
168
+ has_plastics_kw = self._contains_keywords(desc, self.plastics_keywords)
169
+
170
+ # Check which additional HTS are applied
171
+ has_steel_232 = self._check_any_hts_present(STEEL_232_CODES, additional_set)
172
+ has_aluminum_232 = self._check_any_hts_present(ALUMINUM_232_CODES, additional_set)
173
+ has_copper_tariff = self._check_any_hts_present(COPPER_CODES, additional_set)
174
+ has_301 = self._check_hts_present(GENERAL_301_CODE, additional_set)
175
+ has_mismatch = self._check_hts_present(MISMATCH_CODE, additional_set)
176
+
177
+ # Apply validation rules in priority order
178
+ return self._apply_validation_rules(
179
+ entry_number=entry_number,
180
+ description=desc,
181
+ primary_hts=primary_str,
182
+ additional_hts=list(additional_set),
183
+ in_steel=in_steel,
184
+ in_aluminum=in_aluminum,
185
+ in_copper=in_copper,
186
+ has_metal_kw=has_metal_kw,
187
+ has_aluminum_kw=has_aluminum_kw,
188
+ has_copper_kw=has_copper_kw,
189
+ has_zinc_kw=has_zinc_kw,
190
+ has_plastics_kw=has_plastics_kw,
191
+ has_steel_232=has_steel_232,
192
+ has_aluminum_232=has_aluminum_232,
193
+ has_copper_tariff=has_copper_tariff,
194
+ has_301=has_301,
195
+ has_mismatch=has_mismatch,
196
+ additional_set=additional_set
197
+ )
198
+
199
+ def _apply_validation_rules(self, entry_number: str, description: str,
200
+ primary_hts: str, additional_hts: List[str],
201
+ in_steel: bool, in_aluminum: bool, in_copper: bool,
202
+ has_metal_kw: bool, has_aluminum_kw: bool,
203
+ has_copper_kw: bool, has_zinc_kw: bool,
204
+ has_plastics_kw: bool, has_steel_232: bool,
205
+ has_aluminum_232: bool, has_copper_tariff: bool,
206
+ has_301: bool, has_mismatch: bool,
207
+ additional_set: Set[str]) -> ValidationResult:
208
+ """Apply all validation rules and return result"""
209
+
210
+ # Priority 1: Special overrides (zinc, plastics)
211
+
212
+ # S13: Zinc keyword - only 99030125, no 232 tariffs
213
+ if has_zinc_kw:
214
+ expected = [MISMATCH_CODE]
215
+ issues = []
216
+
217
+ if not has_mismatch:
218
+ issues.append("Missing 99030125")
219
+ if has_steel_232 or has_aluminum_232 or has_copper_tariff:
220
+ issues.append("Should NOT have 232/copper tariffs with zinc")
221
+
222
+ status = "PASS" if not issues else "FAIL"
223
+ return ValidationResult(
224
+ entry_number=entry_number,
225
+ description=description,
226
+ primary_hts=primary_hts,
227
+ additional_hts=additional_hts,
228
+ scenario_id="S13",
229
+ scenario_summary=SCENARIO_SUMMARIES["S13"],
230
+ status=status,
231
+ expected_hts=expected,
232
+ missing_hts=[MISMATCH_CODE] if not has_mismatch else [],
233
+ unexpected_hts=self._get_unexpected_232(additional_set),
234
+ issue="; ".join(issues) if issues else "Correct - zinc with only 99030125"
235
+ )
236
+
237
+ # S14: Plastics keyword + metal HTS - only 99030125
238
+ if has_plastics_kw and (in_steel or in_aluminum):
239
+ expected = [MISMATCH_CODE]
240
+ issues = []
241
+
242
+ if not has_mismatch:
243
+ issues.append("Missing 99030125")
244
+ if has_steel_232 or has_aluminum_232:
245
+ issues.append("Should NOT have 232 tariffs with plastics material")
246
+
247
+ status = "PASS" if not issues else "FAIL"
248
+ return ValidationResult(
249
+ entry_number=entry_number,
250
+ description=description,
251
+ primary_hts=primary_hts,
252
+ additional_hts=additional_hts,
253
+ scenario_id="S14",
254
+ scenario_summary=SCENARIO_SUMMARIES["S14"],
255
+ status=status,
256
+ expected_hts=expected,
257
+ missing_hts=[MISMATCH_CODE] if not has_mismatch else [],
258
+ unexpected_hts=self._get_unexpected_232(additional_set),
259
+ issue="; ".join(issues) if issues else "Correct - plastics with only 99030125"
260
+ )
261
+
262
+ # Priority 2: Dual list scenarios
263
+
264
+ # Dual Aluminum + Copper
265
+ if in_aluminum and in_copper:
266
+ # S11: Copper keyword
267
+ if has_copper_kw:
268
+ expected = [GENERAL_301_CODE] + list(COPPER_CODES)
269
+ issues = []
270
+
271
+ if not has_301:
272
+ issues.append("Missing 99030133")
273
+ if not has_copper_tariff:
274
+ issues.append("Missing copper tariff (99037801/02)")
275
+ if has_aluminum_232:
276
+ issues.append("Should NOT have aluminum 232 when description says copper")
277
+
278
+ status = "PASS" if not issues else "FAIL"
279
+ return ValidationResult(
280
+ entry_number=entry_number,
281
+ description=description,
282
+ primary_hts=primary_hts,
283
+ additional_hts=additional_hts,
284
+ scenario_id="S11",
285
+ scenario_summary=SCENARIO_SUMMARIES["S11"],
286
+ status=status,
287
+ expected_hts=expected,
288
+ missing_hts=self._get_missing_codes([GENERAL_301_CODE], has_301, COPPER_CODES, has_copper_tariff),
289
+ unexpected_hts=list(ALUMINUM_232_CODES & additional_set),
290
+ issue="; ".join(issues) if issues else "Correct - dual AL/CU with copper keyword"
291
+ )
292
+
293
+ # S12: Aluminum keyword
294
+ if has_aluminum_kw:
295
+ expected = [GENERAL_301_CODE] + list(ALUMINUM_232_CODES)
296
+ issues = []
297
+
298
+ if not has_301:
299
+ issues.append("Missing 99030133")
300
+ if not has_aluminum_232:
301
+ issues.append("Missing aluminum 232 tariff (99038507/08)")
302
+ if has_copper_tariff:
303
+ issues.append("Should NOT have copper tariff when description says aluminum")
304
+
305
+ status = "PASS" if not issues else "FAIL"
306
+ return ValidationResult(
307
+ entry_number=entry_number,
308
+ description=description,
309
+ primary_hts=primary_hts,
310
+ additional_hts=additional_hts,
311
+ scenario_id="S12",
312
+ scenario_summary=SCENARIO_SUMMARIES["S12"],
313
+ status=status,
314
+ expected_hts=expected,
315
+ missing_hts=self._get_missing_codes([GENERAL_301_CODE], has_301, ALUMINUM_232_CODES, has_aluminum_232),
316
+ unexpected_hts=list(COPPER_CODES & additional_set),
317
+ issue="; ".join(issues) if issues else "Correct - dual AL/CU with aluminum keyword"
318
+ )
319
+
320
+ # Dual Steel + Aluminum
321
+ if in_steel and in_aluminum:
322
+ # S7a: Both keywords - flag for review
323
+ if has_metal_kw and has_aluminum_kw:
324
+ return ValidationResult(
325
+ entry_number=entry_number,
326
+ description=description,
327
+ primary_hts=primary_hts,
328
+ additional_hts=additional_hts,
329
+ scenario_id="S7a",
330
+ scenario_summary=SCENARIO_SUMMARIES["S7a"],
331
+ status="FLAG",
332
+ expected_hts=[],
333
+ missing_hts=[],
334
+ unexpected_hts=[],
335
+ issue="AMBIGUOUS: Description contains both steel and aluminum keywords - manual review required"
336
+ )
337
+
338
+ # S7: Steel keyword
339
+ if has_metal_kw:
340
+ expected = [GENERAL_301_CODE] + list(STEEL_232_CODES)
341
+ issues = []
342
+
343
+ if not has_301:
344
+ issues.append("Missing 99030133")
345
+ if not has_steel_232:
346
+ issues.append("Missing steel 232 tariff (99038190/91)")
347
+ if has_aluminum_232:
348
+ issues.append("Should NOT have aluminum 232 when description says steel")
349
+ if has_mismatch:
350
+ issues.append("Should NOT have 99030125 with correct steel classification")
351
+
352
+ status = "PASS" if not issues else "FAIL"
353
+ return ValidationResult(
354
+ entry_number=entry_number,
355
+ description=description,
356
+ primary_hts=primary_hts,
357
+ additional_hts=additional_hts,
358
+ scenario_id="S7",
359
+ scenario_summary=SCENARIO_SUMMARIES["S7"],
360
+ status=status,
361
+ expected_hts=expected,
362
+ missing_hts=self._get_missing_codes([GENERAL_301_CODE], has_301, STEEL_232_CODES, has_steel_232),
363
+ unexpected_hts=list(ALUMINUM_232_CODES & additional_set) + ([MISMATCH_CODE] if has_mismatch else []),
364
+ issue="; ".join(issues) if issues else "Correct - dual ST/AL with steel keyword"
365
+ )
366
+
367
+ # S7: Aluminum keyword
368
+ if has_aluminum_kw:
369
+ expected = [GENERAL_301_CODE] + list(ALUMINUM_232_CODES)
370
+ issues = []
371
+
372
+ if not has_301:
373
+ issues.append("Missing 99030133")
374
+ if not has_aluminum_232:
375
+ issues.append("Missing aluminum 232 tariff (99038507/08)")
376
+ if has_steel_232:
377
+ issues.append("Should NOT have steel 232 when description says aluminum")
378
+ if has_mismatch:
379
+ issues.append("Should NOT have 99030125 with correct aluminum classification")
380
+
381
+ status = "PASS" if not issues else "FAIL"
382
+ return ValidationResult(
383
+ entry_number=entry_number,
384
+ description=description,
385
+ primary_hts=primary_hts,
386
+ additional_hts=additional_hts,
387
+ scenario_id="S7",
388
+ scenario_summary=SCENARIO_SUMMARIES["S7"],
389
+ status=status,
390
+ expected_hts=expected,
391
+ missing_hts=self._get_missing_codes([GENERAL_301_CODE], has_301, ALUMINUM_232_CODES, has_aluminum_232),
392
+ unexpected_hts=list(STEEL_232_CODES & additional_set) + ([MISMATCH_CODE] if has_mismatch else []),
393
+ issue="; ".join(issues) if issues else "Correct - dual ST/AL with aluminum keyword"
394
+ )
395
+
396
+ # S8: Neither keyword
397
+ expected = [MISMATCH_CODE]
398
+ issues = []
399
+
400
+ if not has_mismatch:
401
+ issues.append("Missing 99030125")
402
+ if has_steel_232 or has_aluminum_232:
403
+ issues.append("Should NOT have 232 tariffs without steel/aluminum keyword")
404
+
405
+ status = "PASS" if not issues else "FAIL"
406
+ return ValidationResult(
407
+ entry_number=entry_number,
408
+ description=description,
409
+ primary_hts=primary_hts,
410
+ additional_hts=additional_hts,
411
+ scenario_id="S8",
412
+ scenario_summary=SCENARIO_SUMMARIES["S8"],
413
+ status=status,
414
+ expected_hts=expected,
415
+ missing_hts=[MISMATCH_CODE] if not has_mismatch else [],
416
+ unexpected_hts=self._get_unexpected_232(additional_set),
417
+ issue="; ".join(issues) if issues else "Correct - dual ST/AL with no keyword, has 99030125"
418
+ )
419
+
420
+ # Priority 3: Single list scenarios
421
+
422
+ # Steel scenarios (S1, S2, S3)
423
+ if in_steel and not in_aluminum and not in_copper:
424
+ if has_metal_kw:
425
+ # S1: Steel HTS + metal keyword + 232 tariff
426
+ if has_steel_232:
427
+ expected = [GENERAL_301_CODE]
428
+ issues = []
429
+
430
+ if not has_301:
431
+ issues.append("Missing 99030133")
432
+ if has_mismatch:
433
+ issues.append("Should NOT have 99030125 with correct steel classification")
434
+
435
+ status = "PASS" if not issues else "FAIL"
436
+ return ValidationResult(
437
+ entry_number=entry_number,
438
+ description=description,
439
+ primary_hts=primary_hts,
440
+ additional_hts=additional_hts,
441
+ scenario_id="S1",
442
+ scenario_summary=SCENARIO_SUMMARIES["S1"],
443
+ status=status,
444
+ expected_hts=[GENERAL_301_CODE] + list(STEEL_232_CODES),
445
+ missing_hts=[GENERAL_301_CODE] if not has_301 else [],
446
+ unexpected_hts=[MISMATCH_CODE] if has_mismatch else [],
447
+ issue="; ".join(issues) if issues else "Correct - steel HTS + keyword + 232"
448
+ )
449
+ else:
450
+ # Steel HTS + metal keyword but no 232 - should have 232
451
+ issues = ["Missing steel 232 tariff (99038190/91)"]
452
+ if not has_301:
453
+ issues.append("Missing 99030133")
454
+
455
+ return ValidationResult(
456
+ entry_number=entry_number,
457
+ description=description,
458
+ primary_hts=primary_hts,
459
+ additional_hts=additional_hts,
460
+ scenario_id="S1",
461
+ scenario_summary=SCENARIO_SUMMARIES["S1"],
462
+ status="FAIL",
463
+ expected_hts=[GENERAL_301_CODE] + list(STEEL_232_CODES),
464
+ missing_hts=[GENERAL_301_CODE] + list(STEEL_232_CODES) if not has_301 else list(STEEL_232_CODES),
465
+ unexpected_hts=[],
466
+ issue="; ".join(issues)
467
+ )
468
+ else:
469
+ # S3: Steel HTS but NO metal keyword
470
+ expected = [MISMATCH_CODE]
471
+ issues = []
472
+
473
+ if not has_mismatch:
474
+ issues.append("Missing 99030125 - steel HTS without metal description")
475
+
476
+ status = "FLAG" # Always flag for review
477
+ return ValidationResult(
478
+ entry_number=entry_number,
479
+ description=description,
480
+ primary_hts=primary_hts,
481
+ additional_hts=additional_hts,
482
+ scenario_id="S3",
483
+ scenario_summary=SCENARIO_SUMMARIES["S3"],
484
+ status=status,
485
+ expected_hts=expected,
486
+ missing_hts=[MISMATCH_CODE] if not has_mismatch else [],
487
+ unexpected_hts=[],
488
+ issue="HTS in steel list but description has no metal keywords - verify classification"
489
+ )
490
+
491
+ # S2: Metal keyword but NOT in steel list
492
+ if has_metal_kw and not in_steel:
493
+ expected = [MISMATCH_CODE]
494
+ issues = []
495
+
496
+ if not has_mismatch:
497
+ issues.append("Missing 99030125")
498
+ if has_steel_232:
499
+ issues.append("Should NOT have steel 232 tariff - HTS not in steel list")
500
+
501
+ status = "PASS" if not issues else "FAIL"
502
+ return ValidationResult(
503
+ entry_number=entry_number,
504
+ description=description,
505
+ primary_hts=primary_hts,
506
+ additional_hts=additional_hts,
507
+ scenario_id="S2",
508
+ scenario_summary=SCENARIO_SUMMARIES["S2"],
509
+ status=status,
510
+ expected_hts=expected,
511
+ missing_hts=[MISMATCH_CODE] if not has_mismatch else [],
512
+ unexpected_hts=list(STEEL_232_CODES & additional_set),
513
+ issue="; ".join(issues) if issues else "Correct - metal keyword with non-steel HTS"
514
+ )
515
+
516
+ # Aluminum scenarios (S4, S5, S6)
517
+ if in_aluminum and not in_steel and not in_copper:
518
+ if has_aluminum_kw:
519
+ # S4: Aluminum HTS + aluminum keyword + 232 tariff
520
+ if has_aluminum_232:
521
+ expected = [GENERAL_301_CODE]
522
+ issues = []
523
+
524
+ if not has_301:
525
+ issues.append("Missing 99030133")
526
+
527
+ status = "PASS" if not issues else "FAIL"
528
+ return ValidationResult(
529
+ entry_number=entry_number,
530
+ description=description,
531
+ primary_hts=primary_hts,
532
+ additional_hts=additional_hts,
533
+ scenario_id="S4",
534
+ scenario_summary=SCENARIO_SUMMARIES["S4"],
535
+ status=status,
536
+ expected_hts=[GENERAL_301_CODE] + list(ALUMINUM_232_CODES),
537
+ missing_hts=[GENERAL_301_CODE] if not has_301 else [],
538
+ unexpected_hts=[],
539
+ issue="; ".join(issues) if issues else "Correct - aluminum HTS + keyword + 232"
540
+ )
541
+ else:
542
+ # Aluminum HTS + keyword but no 232
543
+ issues = ["Missing aluminum 232 tariff (99038507/08)"]
544
+ if not has_301:
545
+ issues.append("Missing 99030133")
546
+
547
+ return ValidationResult(
548
+ entry_number=entry_number,
549
+ description=description,
550
+ primary_hts=primary_hts,
551
+ additional_hts=additional_hts,
552
+ scenario_id="S4",
553
+ scenario_summary=SCENARIO_SUMMARIES["S4"],
554
+ status="FAIL",
555
+ expected_hts=[GENERAL_301_CODE] + list(ALUMINUM_232_CODES),
556
+ missing_hts=[GENERAL_301_CODE] + list(ALUMINUM_232_CODES) if not has_301 else list(ALUMINUM_232_CODES),
557
+ unexpected_hts=[],
558
+ issue="; ".join(issues)
559
+ )
560
+ else:
561
+ # S6: Aluminum HTS but NO aluminum keyword
562
+ expected = [MISMATCH_CODE]
563
+ issues = []
564
+
565
+ if not has_mismatch:
566
+ issues.append("Missing 99030125 - aluminum HTS without aluminum description")
567
+
568
+ status = "FLAG"
569
+ return ValidationResult(
570
+ entry_number=entry_number,
571
+ description=description,
572
+ primary_hts=primary_hts,
573
+ additional_hts=additional_hts,
574
+ scenario_id="S6",
575
+ scenario_summary=SCENARIO_SUMMARIES["S6"],
576
+ status=status,
577
+ expected_hts=expected,
578
+ missing_hts=[MISMATCH_CODE] if not has_mismatch else [],
579
+ unexpected_hts=[],
580
+ issue="HTS in aluminum list but description has no aluminum keywords - verify classification"
581
+ )
582
+
583
+ # S5: Aluminum keyword but NOT in aluminum list
584
+ if has_aluminum_kw and not in_aluminum:
585
+ expected = [MISMATCH_CODE]
586
+ issues = []
587
+
588
+ if not has_mismatch:
589
+ issues.append("Missing 99030125")
590
+ if has_aluminum_232:
591
+ issues.append("Should NOT have aluminum 232 tariff - HTS not in aluminum list")
592
+
593
+ status = "PASS" if not issues else "FAIL"
594
+ return ValidationResult(
595
+ entry_number=entry_number,
596
+ description=description,
597
+ primary_hts=primary_hts,
598
+ additional_hts=additional_hts,
599
+ scenario_id="S5",
600
+ scenario_summary=SCENARIO_SUMMARIES["S5"],
601
+ status=status,
602
+ expected_hts=expected,
603
+ missing_hts=[MISMATCH_CODE] if not has_mismatch else [],
604
+ unexpected_hts=list(ALUMINUM_232_CODES & additional_set),
605
+ issue="; ".join(issues) if issues else "Correct - aluminum keyword with non-aluminum HTS"
606
+ )
607
+
608
+ # Copper scenarios (S9, S10, COPPER_OK)
609
+ if in_copper and not in_steel and not in_aluminum:
610
+ if has_copper_kw:
611
+ # COPPER_OK: Copper HTS + copper keyword
612
+ expected = list(COPPER_CODES)
613
+ issues = []
614
+
615
+ if not has_copper_tariff:
616
+ issues.append("Missing copper tariff (99037801/02)")
617
+
618
+ status = "PASS" if not issues else "FAIL"
619
+ return ValidationResult(
620
+ entry_number=entry_number,
621
+ description=description,
622
+ primary_hts=primary_hts,
623
+ additional_hts=additional_hts,
624
+ scenario_id="COPPER_OK",
625
+ scenario_summary=SCENARIO_SUMMARIES["COPPER_OK"],
626
+ status=status,
627
+ expected_hts=expected,
628
+ missing_hts=list(COPPER_CODES) if not has_copper_tariff else [],
629
+ unexpected_hts=[],
630
+ issue="; ".join(issues) if issues else "Correct - copper HTS + keyword"
631
+ )
632
+ else:
633
+ # S10: Copper HTS but NO copper keyword
634
+ status = "FLAG"
635
+ return ValidationResult(
636
+ entry_number=entry_number,
637
+ description=description,
638
+ primary_hts=primary_hts,
639
+ additional_hts=additional_hts,
640
+ scenario_id="S10",
641
+ scenario_summary=SCENARIO_SUMMARIES["S10"],
642
+ status=status,
643
+ expected_hts=[],
644
+ missing_hts=[],
645
+ unexpected_hts=[],
646
+ issue="HTS in copper list but description has no copper keywords - verify classification"
647
+ )
648
+
649
+ # S9: Copper keyword but NOT in copper list
650
+ if has_copper_kw and not in_copper:
651
+ status = "FLAG"
652
+ return ValidationResult(
653
+ entry_number=entry_number,
654
+ description=description,
655
+ primary_hts=primary_hts,
656
+ additional_hts=additional_hts,
657
+ scenario_id="S9",
658
+ scenario_summary=SCENARIO_SUMMARIES["S9"],
659
+ status=status,
660
+ expected_hts=[],
661
+ missing_hts=[],
662
+ unexpected_hts=[],
663
+ issue="Description mentions copper but HTS not in copper list - potential misclassification"
664
+ )
665
+
666
+ # No applicable scenario
667
+ return ValidationResult(
668
+ entry_number=entry_number,
669
+ description=description,
670
+ primary_hts=primary_hts,
671
+ additional_hts=additional_hts,
672
+ scenario_id="NONE",
673
+ scenario_summary=SCENARIO_SUMMARIES["NONE"],
674
+ status="PASS",
675
+ expected_hts=[],
676
+ missing_hts=[],
677
+ unexpected_hts=[],
678
+ issue="No metal-related validation required"
679
+ )
680
+
681
+ def _get_unexpected_232(self, additional_set: Set[str]) -> List[str]:
682
+ """Get list of 232 tariffs that shouldn't be present"""
683
+ all_232 = STEEL_232_CODES | ALUMINUM_232_CODES | COPPER_CODES
684
+ return list(all_232 & additional_set)
685
+
686
+ def _get_missing_codes(self, fixed_codes: List[str], has_fixed: bool,
687
+ variable_codes: Set[str], has_variable: bool) -> List[str]:
688
+ """Get list of missing codes"""
689
+ missing = []
690
+ if not has_fixed:
691
+ missing.extend(fixed_codes)
692
+ if not has_variable:
693
+ missing.extend(list(variable_codes))
694
+ return missing
695
+
696
+
697
+ def validate_dataframe(df, validator: HTSValidator,
698
+ description_col: str = "Description",
699
+ tariff_col: str = "Tariff",
700
+ entry_col: str = "Entry Number",
701
+ additional_cols: List[str] = None) -> List[ValidationResult]:
702
+ """Validate all entries in a DataFrame"""
703
+
704
+ if additional_cols is None:
705
+ additional_cols = ["Primary 1", "Primary 2", "Primary 3",
706
+ "Primary 4", "Primary 5", "Primary 6"]
707
+
708
+ results = []
709
+
710
+ for idx, row in df.iterrows():
711
+ entry_number = str(row.get(entry_col, f"Row_{idx}"))
712
+ description = str(row.get(description_col, ""))
713
+ primary_hts = str(row.get(tariff_col, ""))
714
+
715
+ # Get additional HTS codes
716
+ additional_hts = []
717
+ for col in additional_cols:
718
+ if col in row and row[col] is not None:
719
+ val = row[col]
720
+ if str(val).strip() and str(val).lower() != "nan":
721
+ additional_hts.append(str(val))
722
+
723
+ result = validator.validate_entry(
724
+ entry_number=entry_number,
725
+ description=description,
726
+ primary_hts=primary_hts,
727
+ additional_hts=additional_hts
728
+ )
729
+ results.append(result)
730
+
731
+ return results
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ openpyxl