Spaces:
Running
Running
Upload 4 files
Browse files- HTS_list.py +205 -0
- app.py +873 -0
- hts_validator.py +731 -0
- requirements.txt +3 -0
HTS_list.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 99037801 for copper content; non-copper content or 99037802 goods with no copper
|
| 2 |
+
Copper_primary_HTS_list = [
|
| 3 |
+
74061000, 74091990, 74130010,
|
| 4 |
+
74062000, 74092100, 74130050,
|
| 5 |
+
74071015, 74092900, 74130090,
|
| 6 |
+
74071030, 74093110, 74151000,
|
| 7 |
+
74071050, 74093150, 74152100,
|
| 8 |
+
74072115, 74093190, 74152900,
|
| 9 |
+
74072130, 74093910, 74153305,
|
| 10 |
+
74072150, 74093950, 74153310,
|
| 11 |
+
74072170, 74093990, 74153380,
|
| 12 |
+
74072190, 74094000, 74153900,
|
| 13 |
+
74072916, 74099010, 74181000,
|
| 14 |
+
74072934, 74099050, 74182010,
|
| 15 |
+
74072938, 74099090, 74182050,
|
| 16 |
+
74072940, 74101100, 74192000,
|
| 17 |
+
74072950, 74101200, 74198003,
|
| 18 |
+
74081130, 74102130, 74198006,
|
| 19 |
+
74081160, 74102160, 74198009,
|
| 20 |
+
74081900, 74102200, 74198015,
|
| 21 |
+
74082100, 74111010, 74198016,
|
| 22 |
+
74082210, 74111050, 74198017,
|
| 23 |
+
74082250, 74112110, 74198030,
|
| 24 |
+
74082910, 74112150, 74198050,
|
| 25 |
+
74082950, 74112200, 85444210,
|
| 26 |
+
74091110, 74112910, 85444220,
|
| 27 |
+
74091150, 74112950, 85444290,
|
| 28 |
+
74091910, 74121000, 85444910,
|
| 29 |
+
74091950, 74122000
|
| 30 |
+
]
|
| 31 |
+
|
| 32 |
+
# Steel primary HTS List
|
| 33 |
+
Steel_primary_HTS_list = [
|
| 34 |
+
84313100, 84314200, 84314910, 84314990, 84321000, 84329000, 85479000, 94032000,
|
| 35 |
+
94059920, 94059940, 94062000, 94069001, 84181000, 84183000, 84184000, 84221100,
|
| 36 |
+
84501100, 84502000, 84512100, 84512900, 85098020, 85166040, 9403999020, 4029968,
|
| 37 |
+
82119120, 84291100, 85042300, 4029970, 82119125, 84291900, 85043300, 4029990,
|
| 38 |
+
82119130, 84292000, 8504909634, 2106909998, 82119140, 84293000, 8504909638,
|
| 39 |
+
2710193050, 82119150, 84294000, 8504909642, 2711120020, 82119180, 84295110,
|
| 40 |
+
85142040, 2804290010, 82119220, 84295150, 85142060, 2804400000, 82119240,
|
| 41 |
+
84295210, 85162900, 2901220000, 82119260, 84295250, 85166060, 2903421000,
|
| 42 |
+
82119290, 84295910, 86011000, 29034310, 82119300, 84295950, 86012000, 29034410,
|
| 43 |
+
82119410, 84312000, 86021000, 29034510, 82119450, 84313900, 86029000, 29034900,
|
| 44 |
+
82119510, 84314100, 86031000, 29035110, 82119550, 84314340, 86039000, 29035990,
|
| 45 |
+
82119590, 84314380, 86040000, 2903710100, 82151000, 84331100, 86050000,
|
| 46 |
+
3004909244, 82152000, 84332000, 86071100, 32081000, 82159130, 84335100,
|
| 47 |
+
8607190300, 32082000, 82159160, 84335900, 86071906, 32089000, 82159190,
|
| 48 |
+
84339010, 86071912, 32091000, 82159901, 84339050, 86071915, 32099000,
|
| 49 |
+
82159905, 8443160000, 86071990, 32139000, 82159910, 8454200010, 86072150,
|
| 50 |
+
32141000, 82159915, 8454200060, 8607301010, 33030010, 82159920, 84553000,
|
| 51 |
+
8607301050, 33030020, 82159922, 8455904000, 8607301090, 33030030, 82159924,
|
| 52 |
+
8455908000, 86073050, 33043000, 82159926, 84571000, 86079100, 33049950,
|
| 53 |
+
82159930, 84749000, 86079950, 33051000, 82159935, 84771030, 86090000, 33053000,
|
| 54 |
+
82159940, 84771040, 87011001, 33059000, 82159945, 84771090, 8701210080,
|
| 55 |
+
33069000, 82159950, 84779025, 8701220080, 33071010, 83021060, 8477908601,
|
| 56 |
+
8701230080, 33071020, 83024130, 84798955, 8701240080, 33072000, 83024160,
|
| 57 |
+
84798965, 8701290080, 33074900, 83024230, 84799045, 87013010, 33079000,
|
| 58 |
+
83024960, 84799055, 87013050, 34013010, 83052000, 84799065, 87019110,
|
| 59 |
+
34013050, 83071060, 84799075, 87019150, 34023190, 8401400000, 84799085,
|
| 60 |
+
87019210, 34024990, 84031000, 84799095, 87019250, 34025011, 8406904000,
|
| 61 |
+
8480490010, 87019310, 34025051, 84072100, 8480718045, 87019350, 34029010,
|
| 62 |
+
84079010, 8480718060, 87019410, 34029030, 84079090, 8480799010, 87019450,
|
| 63 |
+
34029050, 84082010, 8482105004, 87019510, 34031910, 84082090, 8482105008,
|
| 64 |
+
87019550, 34031950, 84099150, 8482105012, 87021031, 34039900, 84099192,
|
| 65 |
+
8482105016, 87021061, 34051000, 84099199, 8482105024, 87031010, 34052000,
|
| 66 |
+
84099991, 8482105028, 87031050, 34054000, 84099992, 8482105032, 87032101,
|
| 67 |
+
34059000, 84099999, 8482105036, 87051000, 35061050, 8410900000, 8482105052,
|
| 68 |
+
87052000, 35069110, 84118180, 8482105056, 8705900010, 35069150, 84122100,
|
| 69 |
+
8482105060, 8705900020, 35069900, 84122980, 8482105064, 87060030, 38085910,
|
| 70 |
+
8412909070, 8482105068, 87084030, 38085940, 8412909075, 8482200064,
|
| 71 |
+
87084060, 38086110, 84138100, 8482200067, 87089210, 38086150, 8413919055,
|
| 72 |
+
8482200090, 87089250, 38086210, 8413919060, 84829905, 87089260, 38086250,
|
| 73 |
+
8413919096, 84829915, 87089275, 38086910, 84143040, 84829925, 87089315,
|
| 74 |
+
38086950, 84148016, 84829935, 87089330, 38089115, 84149030, 84829945,
|
| 75 |
+
87089923, 38089125, 84149041, 84829965, 87089981, 38089130, 84151030,
|
| 76 |
+
8483101010, 87100000, 38089150, 84151060, 8483101050, 87113000, 38089410,
|
| 77 |
+
84151090, 84831050, 87115000, 38089450, 84158101, 84832040, 87116000,
|
| 78 |
+
38099100, 84158201, 84832080, 87141000, 38101000, 84158300, 84833040,
|
| 79 |
+
87161000, 38111900, 84159040, 84833080, 87163900, 38112100, 84159080,
|
| 80 |
+
84834010, 87168010, 38140010, 8417900000, 8483405020, 87168050, 38140020,
|
| 81 |
+
84182100, 84834090, 87169010, 38140050, 84182920, 84835060, 87169030,
|
| 82 |
+
38200000, 84189940, 84835090, 94017100, 3824999397, 84198150, 84836040,
|
| 83 |
+
94017900, 3827610000, 84212900, 84836080, 94031000, 3827620000, 8424100000,
|
| 84 |
+
84839020, 94039910, 3827630000, 84248990, 84839030, 9403999010, 3827640000,
|
| 85 |
+
84254200, 84839050, 9403999015, 3827650000, 84262000, 84839070, 9403999040,
|
| 86 |
+
39252000, 84269900, 84839080, 9403999045, 39269010, 84271040, 85015340,
|
| 87 |
+
9403999051, 76141010, 84271080, 85015360, 9403999061, 8202390040, 84272040,
|
| 88 |
+
85015380, 95069100, 82034060, 84272080, 8501640110, 82055955, 84279000,
|
| 89 |
+
8502310000, 82057000, 84283200, 85030035, 8207200070, 84283300, 85030045,
|
| 90 |
+
8207306062, 84283900, 85030065, 8207306095, 84286000, 85030075, 82111000,
|
| 91 |
+
84287000, 85030090, 82119110, 84289003, 85030095,
|
| 92 |
+
73012010, 73012050, 73023000, 73072110, 73072150, 73072210, 73072250, 73072300,
|
| 93 |
+
73072900, 73079110, 73079130, 73079150, 73079230, 73079290, 73079330, 73079360,
|
| 94 |
+
73079390, 73079910, 73079930, 73079950, 73081000, 73082000, 73083010, 73083050,
|
| 95 |
+
73084000, 73089030, 73089060, 73089070, 73089095, 73090000, 73101000, 73102100,
|
| 96 |
+
73102900, 73110000, 73121005, 73121010, 73121020, 73121030, 73121050, 73121060,
|
| 97 |
+
73121070, 73121080, 73121090, 73129000, 73130000, 73141210, 73141220, 73141230,
|
| 98 |
+
73141260, 73141290, 73141410, 73141420, 73141430, 73141460, 73141490, 73141901,
|
| 99 |
+
73142000, 73143110, 73143150, 73143900, 73144100, 73144200, 73144930, 73144960,
|
| 100 |
+
73145000, 73151100, 73151200, 73151900, 73152010, 73152050, 73158100, 73158210,
|
| 101 |
+
73158230, 73158250, 73158270, 73158910, 73158930, 73158950, 73159000, 73160000,
|
| 102 |
+
73170010, 73170020, 73170055, 73170065, 73170075, 73181100, 73181200, 73181300,
|
| 103 |
+
73181410, 73181450, 73181520, 73181540, 73181550, 73181560, 73181580, 73181600,
|
| 104 |
+
73181900, 73182100, 73182200, 73182300, 73182400, 73182900, 73194020, 73194030,
|
| 105 |
+
73194050, 73199010, 73199090, 73201030, 73201060, 73201090, 73202010, 73202050,
|
| 106 |
+
73209010, 73209050, 73211110, 73211130, 73211160, 73211200, 73211900, 73218110,
|
| 107 |
+
73218150, 73218210, 73218250, 73218900, 73219010, 73219020, 73219040, 73219050,
|
| 108 |
+
73219060, 73221900, 73229000, 73231000, 73239300, 73239400, 73239910, 73239930,
|
| 109 |
+
73239950, 73239970, 73239990, 73241000, 73242900, 73249000, 73259100, 73259910,
|
| 110 |
+
73259950, 73261100, 73261900, 73262000, 73269010, 73269025, 73269035, 73269045,
|
| 111 |
+
73269060, 73269086, 7317005501, 7317005502, 7317005508, 7317005511, 7317005518,
|
| 112 |
+
7317005519, 7317005520, 7317005530, 7317005540, 7317005550, 7317005570,
|
| 113 |
+
7317005590, 7317006530, 7216910010, 7302909000, 73071930, 73071990
|
| 114 |
+
]
|
| 115 |
+
|
| 116 |
+
# Aluminum primary HTS List
|
| 117 |
+
Aluminum_primary_HTS_list = [
|
| 118 |
+
6603908100, 8302103000, 8302106030, 8302106060, 8302106090,
|
| 119 |
+
8302200000, 8302303010, 8302303060,
|
| 120 |
+
8302413000, 8302416015, 8302416045, 8302416050, 8302416080,
|
| 121 |
+
8302423010, 8302423015, 8302423065,
|
| 122 |
+
8302496035, 8302496045, 8302496055, 8302496085,
|
| 123 |
+
8302500000, 8302603000, 8302609000,
|
| 124 |
+
8305100050, 8306300000, 8414596590,
|
| 125 |
+
8415908025, 8415908045, 8415908085,
|
| 126 |
+
8418998005, 8418998050, 8418998060,
|
| 127 |
+
8419505000, 8419901000, 8422900640,
|
| 128 |
+
8424909080, 8473302000, 8473305100,
|
| 129 |
+
8479899599, 8479908500, 8479909596,
|
| 130 |
+
8481909060, 8481909085, 8486900000,
|
| 131 |
+
8487900080, 8503009520, 8508700000,
|
| 132 |
+
8513902000, 8515902000, 8516905000, 8516908050,
|
| 133 |
+
8517710000, 8517790000,
|
| 134 |
+
8529907300, 8529909760,
|
| 135 |
+
8536908585, 8538100000,
|
| 136 |
+
8541900000, 8543908885,
|
| 137 |
+
8547900020, 8547900030, 8547900040,
|
| 138 |
+
8708103050, 87081060, 8708295160,
|
| 139 |
+
8708806590, 8708996890, 8716805010,
|
| 140 |
+
8807300060, 9013908000, 9031909195,
|
| 141 |
+
9401999081, 94031000, 94032000,
|
| 142 |
+
9403991040, 9403999010, 9403999015,
|
| 143 |
+
9403999020, 9403999040, 9403999045,
|
| 144 |
+
9405994020,
|
| 145 |
+
9506114080, 9506514000, 9506516000,
|
| 146 |
+
9506594040, 9506702090,
|
| 147 |
+
9506910010, 9506910020, 9506910030,
|
| 148 |
+
9506990510, 9506990520, 9506990530,
|
| 149 |
+
9506991500, 9506992000, 9506992580,
|
| 150 |
+
9506992800, 9506995500, 9506996080,
|
| 151 |
+
9507302000, 9507304000, 9507306000,
|
| 152 |
+
9507308000, 9507906000,
|
| 153 |
+
9603908050,
|
| 154 |
+
2203000060, 2203000090,
|
| 155 |
+
4029968, 4029970, 4029990,
|
| 156 |
+
2106909998, 2710193050,
|
| 157 |
+
29034310, 29034510, 29034900, 29035110, 29035990,
|
| 158 |
+
3004909244,
|
| 159 |
+
32081000, 32082000, 32089000, 32091000, 32099000,
|
| 160 |
+
32139000, 32141000,
|
| 161 |
+
33030010, 33030020, 33030030,
|
| 162 |
+
33043000, 33049950,
|
| 163 |
+
33051000, 33053000, 33059000,
|
| 164 |
+
33069000,
|
| 165 |
+
33071010, 33071020, 33072000, 33074900, 33079000,
|
| 166 |
+
34013010, 34013050,
|
| 167 |
+
34023190, 34024990, 34025011, 34025051,
|
| 168 |
+
34029010, 34029030, 34029050,
|
| 169 |
+
34031910, 34031950, 34039900,
|
| 170 |
+
34051000, 34052000, 34054000, 34059000,
|
| 171 |
+
35061050, 35069110, 35069150, 35069900,
|
| 172 |
+
3701300000,
|
| 173 |
+
38085910, 38085940, 38086110, 38086150,
|
| 174 |
+
38086210, 38086250, 38086910, 38086950,
|
| 175 |
+
38089115, 38089125, 38089130, 38089150,
|
| 176 |
+
38089410, 38089450,
|
| 177 |
+
38099100, 38101000,
|
| 178 |
+
38111900, 38112100,
|
| 179 |
+
38140010, 38140020, 38140050,
|
| 180 |
+
38200000, 3824999397,
|
| 181 |
+
7308200035,
|
| 182 |
+
8307906000, 8309900020, 8309900025,
|
| 183 |
+
8412909070, 8412909075, 84148016, 84181000,
|
| 184 |
+
8419501000, 84248990, 8443160000,
|
| 185 |
+
84501100, 84512100,
|
| 186 |
+
84672200, 84672900, 84678100, 84678950,
|
| 187 |
+
8483405020, 8483905020,
|
| 188 |
+
8501640110, 85022000, 8502310000,
|
| 189 |
+
8503009546, 8503009570,
|
| 190 |
+
85043120, 85043140, 85043160,
|
| 191 |
+
85043300, 85043400,
|
| 192 |
+
85049020, 85049041, 85049065, 85049075, 85049096,
|
| 193 |
+
85441900, 85444290, 8544492000, 8544499000,
|
| 194 |
+
8544602000, 8544606000,
|
| 195 |
+
8716390040,
|
| 196 |
+
94017900,
|
| 197 |
+
76101000, 76109000, 76129010,
|
| 198 |
+
7615102015, 7615102025, 7615103015, 7615103025,
|
| 199 |
+
7615105020, 7615105040,
|
| 200 |
+
7615107125, 7615107130, 7615107155, 7615107180,
|
| 201 |
+
7615109100, 7615200000,
|
| 202 |
+
7616109090, 7616991000, 7616995130,
|
| 203 |
+
7616995140, 7616995190,
|
| 204 |
+
7612100000, 7612905000, 7613000000, 76141010
|
| 205 |
+
]
|
app.py
ADDED
|
@@ -0,0 +1,873 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
HTS Checker - Streamlit Application for HTS Tariff Auditing
|
| 3 |
+
Deployed on Hugging Face Spaces
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import streamlit as st
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from io import BytesIO
|
| 9 |
+
import hashlib
|
| 10 |
+
from hts_validator import HTSValidator, validate_dataframe, SCENARIO_SUMMARIES
|
| 11 |
+
from HTS_list import Steel_primary_HTS_list, Aluminum_primary_HTS_list, Copper_primary_HTS_list
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
# Page configuration
|
| 15 |
+
st.set_page_config(
|
| 16 |
+
page_title="HTS Checker - Tariff Audit Tool",
|
| 17 |
+
page_icon="",
|
| 18 |
+
layout="wide"
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
@st.cache_data
|
| 23 |
+
def load_and_validate_excel(file_content, file_name, keywords_hash):
|
| 24 |
+
"""Load Excel and run validation - cached to avoid re-running on filter changes"""
|
| 25 |
+
# Read Excel with HTS columns as string
|
| 26 |
+
df = pd.read_excel(file_content, dtype={
|
| 27 |
+
"Tariff": str,
|
| 28 |
+
"Primary 1": str,
|
| 29 |
+
"Primary 2": str,
|
| 30 |
+
"Primary 3": str,
|
| 31 |
+
"Primary 4": str,
|
| 32 |
+
"Primary 5": str,
|
| 33 |
+
"Primary 6": str,
|
| 34 |
+
})
|
| 35 |
+
|
| 36 |
+
# Clean up HTS columns
|
| 37 |
+
hts_columns = ["Tariff", "Primary 1", "Primary 2", "Primary 3",
|
| 38 |
+
"Primary 4", "Primary 5", "Primary 6"]
|
| 39 |
+
for col in hts_columns:
|
| 40 |
+
if col in df.columns:
|
| 41 |
+
df[col] = df[col].astype(str).str.replace(r'\.0$', '', regex=True)
|
| 42 |
+
df[col] = df[col].replace('nan', '')
|
| 43 |
+
|
| 44 |
+
return df
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
@st.cache_data
|
| 48 |
+
def run_validation(df_hash, _df, _validator):
|
| 49 |
+
"""Run validation - cached based on dataframe hash"""
|
| 50 |
+
results = validate_dataframe(_df, _validator)
|
| 51 |
+
return results
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def get_df_hash(df):
|
| 55 |
+
"""Get hash of dataframe for caching"""
|
| 56 |
+
return hashlib.md5(pd.util.hash_pandas_object(df).values.tobytes()).hexdigest()
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def get_keywords_hash(keywords):
|
| 60 |
+
"""Get hash of keywords for cache invalidation"""
|
| 61 |
+
return hashlib.md5(str(keywords).encode()).hexdigest()
|
| 62 |
+
|
| 63 |
+
# Initialize session state
|
| 64 |
+
if "keywords" not in st.session_state:
|
| 65 |
+
st.session_state.keywords = {
|
| 66 |
+
"metal": ["steel", "stainless steel", "carbon steel", "iron", "metal"],
|
| 67 |
+
"aluminum": ["aluminum", "aluminium"],
|
| 68 |
+
"copper": ["copper"],
|
| 69 |
+
"zinc": ["zinc"],
|
| 70 |
+
"plastics": ["plastic", "abs", "pu", "pvc", "polyester", "nylon"]
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
if "export_cache" not in st.session_state:
|
| 74 |
+
st.session_state.export_cache = []
|
| 75 |
+
|
| 76 |
+
if "validation_results" not in st.session_state:
|
| 77 |
+
st.session_state.validation_results = None
|
| 78 |
+
|
| 79 |
+
if "original_df" not in st.session_state:
|
| 80 |
+
st.session_state.original_df = None
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def get_validator():
|
| 84 |
+
"""Create validator with current keyword settings"""
|
| 85 |
+
return HTSValidator(
|
| 86 |
+
metal_keywords=st.session_state.keywords["metal"],
|
| 87 |
+
aluminum_keywords=st.session_state.keywords["aluminum"],
|
| 88 |
+
copper_keywords=st.session_state.keywords["copper"],
|
| 89 |
+
zinc_keywords=st.session_state.keywords["zinc"],
|
| 90 |
+
plastics_keywords=st.session_state.keywords["plastics"]
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def color_status(val):
|
| 95 |
+
"""Color code status column"""
|
| 96 |
+
if val == "PASS":
|
| 97 |
+
return "background-color: #90EE90" # Light green
|
| 98 |
+
elif val == "FAIL":
|
| 99 |
+
return "background-color: #FFB6C1" # Light red
|
| 100 |
+
elif val == "FLAG":
|
| 101 |
+
return "background-color: #FFFFE0" # Light yellow
|
| 102 |
+
return ""
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def format_hts(hts_value):
|
| 106 |
+
"""Format HTS value as string, removing .0 suffix"""
|
| 107 |
+
if not hts_value:
|
| 108 |
+
return ""
|
| 109 |
+
s = str(hts_value)
|
| 110 |
+
# Remove .0 suffix if present (from float conversion)
|
| 111 |
+
if s.endswith(".0"):
|
| 112 |
+
s = s[:-2]
|
| 113 |
+
return s
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def results_to_dataframe(results):
|
| 117 |
+
"""Convert validation results to DataFrame"""
|
| 118 |
+
data = []
|
| 119 |
+
for r in results:
|
| 120 |
+
# Format additional HTS as strings
|
| 121 |
+
additional_hts_str = ", ".join([format_hts(h) for h in r.additional_hts if h])
|
| 122 |
+
expected_hts_str = ", ".join([format_hts(h) for h in r.expected_hts if h])
|
| 123 |
+
missing_hts_str = ", ".join([format_hts(h) for h in r.missing_hts if h])
|
| 124 |
+
unexpected_hts_str = ", ".join([format_hts(h) for h in r.unexpected_hts if h])
|
| 125 |
+
|
| 126 |
+
data.append({
|
| 127 |
+
"Entry Number": r.entry_number,
|
| 128 |
+
"Description": r.description[:100] + "..." if len(r.description) > 100 else r.description,
|
| 129 |
+
"Full Description": r.description,
|
| 130 |
+
"Primary HTS": format_hts(r.primary_hts),
|
| 131 |
+
"Additional HTS": additional_hts_str,
|
| 132 |
+
"Scenario": r.scenario_id,
|
| 133 |
+
"Scenario Summary": r.scenario_summary,
|
| 134 |
+
"Status": r.status,
|
| 135 |
+
"Expected HTS": expected_hts_str,
|
| 136 |
+
"Missing HTS": missing_hts_str,
|
| 137 |
+
"Unexpected HTS": unexpected_hts_str,
|
| 138 |
+
"Issue": r.issue
|
| 139 |
+
})
|
| 140 |
+
return pd.DataFrame(data)
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def export_to_excel(df, results_df=None):
|
| 144 |
+
"""Export DataFrame to Excel with optional validation results"""
|
| 145 |
+
output = BytesIO()
|
| 146 |
+
|
| 147 |
+
with pd.ExcelWriter(output, engine="openpyxl") as writer:
|
| 148 |
+
if results_df is not None:
|
| 149 |
+
# Merge original data with validation results
|
| 150 |
+
# Use Full Description for export
|
| 151 |
+
export_df = df.copy()
|
| 152 |
+
|
| 153 |
+
# Add validation columns
|
| 154 |
+
if len(results_df) == len(export_df):
|
| 155 |
+
export_df["Scenario ID"] = results_df["Scenario"].values
|
| 156 |
+
export_df["Scenario Summary"] = results_df["Scenario Summary"].values
|
| 157 |
+
export_df["Status"] = results_df["Status"].values
|
| 158 |
+
export_df["Expected HTS"] = results_df["Expected HTS"].values
|
| 159 |
+
export_df["Missing HTS"] = results_df["Missing HTS"].values
|
| 160 |
+
export_df["Unexpected HTS"] = results_df["Unexpected HTS"].values
|
| 161 |
+
export_df["Issue Description"] = results_df["Issue"].values
|
| 162 |
+
|
| 163 |
+
export_df.to_excel(writer, sheet_name="Audit Results", index=False)
|
| 164 |
+
else:
|
| 165 |
+
df.to_excel(writer, sheet_name="Export", index=False)
|
| 166 |
+
|
| 167 |
+
output.seek(0)
|
| 168 |
+
return output
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
# Main app
|
| 172 |
+
st.title("HTS Checker - Tariff Audit Tool")
|
| 173 |
+
st.markdown("Audit primary HTS codes against additional tariffs and description keywords")
|
| 174 |
+
|
| 175 |
+
# Create tabs
|
| 176 |
+
tab1, tab2, tab2b, tab3, tab4, tab5 = st.tabs([
|
| 177 |
+
"Upload & Filter",
|
| 178 |
+
"Validation Results",
|
| 179 |
+
"Unique Combinations",
|
| 180 |
+
"Keyword Management",
|
| 181 |
+
"Export Selection",
|
| 182 |
+
"HTS Reference"
|
| 183 |
+
])
|
| 184 |
+
|
| 185 |
+
# Tab 1: Upload & Filter
|
| 186 |
+
with tab1:
|
| 187 |
+
st.header("Upload Excel File")
|
| 188 |
+
|
| 189 |
+
uploaded_file = st.file_uploader(
|
| 190 |
+
"Upload your entry report Excel file",
|
| 191 |
+
type=["xlsx", "xls"],
|
| 192 |
+
help="Upload the customizable entry report from NetCHB"
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
if uploaded_file is not None:
|
| 196 |
+
try:
|
| 197 |
+
# Use cached loading function
|
| 198 |
+
keywords_hash = get_keywords_hash(st.session_state.keywords)
|
| 199 |
+
df = load_and_validate_excel(uploaded_file, uploaded_file.name, keywords_hash)
|
| 200 |
+
|
| 201 |
+
st.session_state.original_df = df
|
| 202 |
+
st.success(f"Loaded {len(df)} rows")
|
| 203 |
+
|
| 204 |
+
# Display column mapping info
|
| 205 |
+
with st.expander("Column Mapping"):
|
| 206 |
+
st.markdown("""
|
| 207 |
+
**Expected Columns:**
|
| 208 |
+
- Column E: `Description` - Product description for keyword matching
|
| 209 |
+
- Column F: `Tariff` - 10-digit Primary HTS code
|
| 210 |
+
- Columns I-N: `Primary 1-6` - Additional HTS codes
|
| 211 |
+
""")
|
| 212 |
+
|
| 213 |
+
st.write("**Detected columns:**", df.columns.tolist())
|
| 214 |
+
|
| 215 |
+
# Filter controls
|
| 216 |
+
st.subheader("Filter Options")
|
| 217 |
+
|
| 218 |
+
col1, col2 = st.columns(2)
|
| 219 |
+
|
| 220 |
+
with col1:
|
| 221 |
+
hts_filter = st.text_input(
|
| 222 |
+
"Filter by Primary HTS (partial match)",
|
| 223 |
+
placeholder="e.g., 7301 or 730120",
|
| 224 |
+
help="Enter partial HTS to filter entries"
|
| 225 |
+
)
|
| 226 |
+
|
| 227 |
+
with col2:
|
| 228 |
+
desc_exclude = st.text_input(
|
| 229 |
+
"Exclude by description keyword",
|
| 230 |
+
placeholder="e.g., polyester",
|
| 231 |
+
help="Exclude entries containing this keyword in description"
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
# Apply filters
|
| 235 |
+
filtered_df = df.copy()
|
| 236 |
+
|
| 237 |
+
if hts_filter:
|
| 238 |
+
tariff_col = "Tariff" if "Tariff" in df.columns else df.columns[5]
|
| 239 |
+
filtered_df = filtered_df[
|
| 240 |
+
filtered_df[tariff_col].astype(str).str.contains(hts_filter, na=False)
|
| 241 |
+
]
|
| 242 |
+
|
| 243 |
+
if desc_exclude:
|
| 244 |
+
desc_col = "Description" if "Description" in df.columns else df.columns[4]
|
| 245 |
+
filtered_df = filtered_df[
|
| 246 |
+
~filtered_df[desc_col].astype(str).str.lower().str.contains(
|
| 247 |
+
desc_exclude.lower(), na=False
|
| 248 |
+
)
|
| 249 |
+
]
|
| 250 |
+
|
| 251 |
+
st.write(f"**Showing {len(filtered_df)} of {len(df)} entries**")
|
| 252 |
+
|
| 253 |
+
# Display filtered data
|
| 254 |
+
if len(filtered_df) > 0:
|
| 255 |
+
# Show key columns
|
| 256 |
+
display_cols = ["Entry Number", "Description", "Tariff",
|
| 257 |
+
"Primary 1", "Primary 2", "Primary 3",
|
| 258 |
+
"Primary 4", "Primary 5", "Primary 6"]
|
| 259 |
+
display_cols = [c for c in display_cols if c in filtered_df.columns]
|
| 260 |
+
|
| 261 |
+
st.dataframe(
|
| 262 |
+
filtered_df[display_cols].head(100),
|
| 263 |
+
use_container_width=True
|
| 264 |
+
)
|
| 265 |
+
|
| 266 |
+
if len(filtered_df) > 100:
|
| 267 |
+
st.info("Showing first 100 rows.")
|
| 268 |
+
|
| 269 |
+
# Run validation ONCE on full dataset (cached), then filter results
|
| 270 |
+
if "cached_full_results" not in st.session_state or st.session_state.get("cached_file_name") != uploaded_file.name:
|
| 271 |
+
with st.spinner("Validating all entries (one-time)..."):
|
| 272 |
+
validator = get_validator()
|
| 273 |
+
full_results = validate_dataframe(df, validator)
|
| 274 |
+
full_results_df = results_to_dataframe(full_results)
|
| 275 |
+
st.session_state.cached_full_results = full_results_df
|
| 276 |
+
st.session_state.cached_file_name = uploaded_file.name
|
| 277 |
+
|
| 278 |
+
# Filter cached results based on current filters
|
| 279 |
+
full_results_df = st.session_state.cached_full_results
|
| 280 |
+
filtered_indices = filtered_df.index.tolist()
|
| 281 |
+
filtered_results_df = full_results_df.iloc[filtered_indices].copy()
|
| 282 |
+
|
| 283 |
+
st.session_state.validation_results = filtered_results_df
|
| 284 |
+
st.session_state.filtered_df = filtered_df
|
| 285 |
+
|
| 286 |
+
st.success(f"Showing {len(filtered_df)} entries. Go to 'Validation Results' tab to review.")
|
| 287 |
+
|
| 288 |
+
except Exception as e:
|
| 289 |
+
st.error(f"Error loading file: {str(e)}")
|
| 290 |
+
|
| 291 |
+
# Tab 2: Validation Results
|
| 292 |
+
with tab2:
|
| 293 |
+
st.header("Validation Results")
|
| 294 |
+
|
| 295 |
+
if st.session_state.validation_results is None:
|
| 296 |
+
st.info("Upload a file and run validation first.")
|
| 297 |
+
else:
|
| 298 |
+
# Results are already a DataFrame now (cached)
|
| 299 |
+
results_df = st.session_state.validation_results.copy()
|
| 300 |
+
|
| 301 |
+
# Summary statistics
|
| 302 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 303 |
+
with col1:
|
| 304 |
+
pass_count = len(results_df[results_df["Status"] == "PASS"])
|
| 305 |
+
st.metric("PASS", pass_count)
|
| 306 |
+
with col2:
|
| 307 |
+
fail_count = len(results_df[results_df["Status"] == "FAIL"])
|
| 308 |
+
st.metric("FAIL", fail_count)
|
| 309 |
+
with col3:
|
| 310 |
+
flag_count = len(results_df[results_df["Status"] == "FLAG"])
|
| 311 |
+
st.metric("FLAG", flag_count)
|
| 312 |
+
with col4:
|
| 313 |
+
none_count = len(results_df[results_df["Scenario"] == "NONE"])
|
| 314 |
+
st.metric("No Match", none_count)
|
| 315 |
+
|
| 316 |
+
# Filter by status
|
| 317 |
+
st.subheader("Filter Results")
|
| 318 |
+
|
| 319 |
+
col1, col2 = st.columns(2)
|
| 320 |
+
with col1:
|
| 321 |
+
status_filter = st.multiselect(
|
| 322 |
+
"Filter by Status",
|
| 323 |
+
options=["PASS", "FAIL", "FLAG"],
|
| 324 |
+
default=["FAIL", "FLAG"]
|
| 325 |
+
)
|
| 326 |
+
|
| 327 |
+
with col2:
|
| 328 |
+
scenario_filter = st.multiselect(
|
| 329 |
+
"Filter by Scenario",
|
| 330 |
+
options=list(SCENARIO_SUMMARIES.keys()),
|
| 331 |
+
default=[]
|
| 332 |
+
)
|
| 333 |
+
|
| 334 |
+
# Apply filters
|
| 335 |
+
display_df = results_df.copy()
|
| 336 |
+
|
| 337 |
+
if status_filter:
|
| 338 |
+
display_df = display_df[display_df["Status"].isin(status_filter)]
|
| 339 |
+
|
| 340 |
+
if scenario_filter:
|
| 341 |
+
display_df = display_df[display_df["Scenario"].isin(scenario_filter)]
|
| 342 |
+
|
| 343 |
+
# Exclude "NONE" scenario by default
|
| 344 |
+
show_none = st.checkbox("Show 'No Match' entries", value=False)
|
| 345 |
+
if not show_none:
|
| 346 |
+
display_df = display_df[display_df["Scenario"] != "NONE"]
|
| 347 |
+
|
| 348 |
+
st.write(f"**Showing {len(display_df)} results**")
|
| 349 |
+
|
| 350 |
+
# Display results table
|
| 351 |
+
if len(display_df) > 0:
|
| 352 |
+
# Select columns to display
|
| 353 |
+
display_columns = [
|
| 354 |
+
"Entry Number", "Description", "Primary HTS",
|
| 355 |
+
"Additional HTS", "Scenario", "Status", "Issue"
|
| 356 |
+
]
|
| 357 |
+
|
| 358 |
+
# Interactive filtering section
|
| 359 |
+
st.markdown("**Interactive Filters:**")
|
| 360 |
+
filter_col1, filter_col2, filter_col3 = st.columns(3)
|
| 361 |
+
|
| 362 |
+
with filter_col1:
|
| 363 |
+
hts_search = st.text_input(
|
| 364 |
+
"Filter by Primary HTS",
|
| 365 |
+
placeholder="e.g., 7301 or 8302",
|
| 366 |
+
key="results_hts_filter"
|
| 367 |
+
)
|
| 368 |
+
|
| 369 |
+
with filter_col2:
|
| 370 |
+
desc_search = st.text_input(
|
| 371 |
+
"Filter by Description",
|
| 372 |
+
placeholder="e.g., steel, aluminum",
|
| 373 |
+
key="results_desc_filter"
|
| 374 |
+
)
|
| 375 |
+
|
| 376 |
+
with filter_col3:
|
| 377 |
+
additional_hts_search = st.text_input(
|
| 378 |
+
"Filter by Additional HTS",
|
| 379 |
+
placeholder="e.g., 99038191",
|
| 380 |
+
key="results_additional_filter"
|
| 381 |
+
)
|
| 382 |
+
|
| 383 |
+
# Apply interactive filters
|
| 384 |
+
interactive_df = display_df.copy()
|
| 385 |
+
|
| 386 |
+
if hts_search:
|
| 387 |
+
interactive_df = interactive_df[
|
| 388 |
+
interactive_df["Primary HTS"].astype(str).str.contains(hts_search, case=False, na=False)
|
| 389 |
+
]
|
| 390 |
+
|
| 391 |
+
if desc_search:
|
| 392 |
+
interactive_df = interactive_df[
|
| 393 |
+
interactive_df["Description"].astype(str).str.contains(desc_search, case=False, na=False)
|
| 394 |
+
]
|
| 395 |
+
|
| 396 |
+
if additional_hts_search:
|
| 397 |
+
interactive_df = interactive_df[
|
| 398 |
+
interactive_df["Additional HTS"].astype(str).str.contains(additional_hts_search, case=False, na=False)
|
| 399 |
+
]
|
| 400 |
+
|
| 401 |
+
st.write(f"**Filtered: {len(interactive_df)} of {len(display_df)} results**")
|
| 402 |
+
|
| 403 |
+
# Store interactive filtered df for export
|
| 404 |
+
st.session_state.interactive_filtered_df = interactive_df
|
| 405 |
+
|
| 406 |
+
styled_df = interactive_df[display_columns].style.applymap(
|
| 407 |
+
color_status, subset=["Status"]
|
| 408 |
+
)
|
| 409 |
+
|
| 410 |
+
st.dataframe(styled_df, use_container_width=True, height=400)
|
| 411 |
+
|
| 412 |
+
# Scenario legend
|
| 413 |
+
with st.expander("Scenario Legend"):
|
| 414 |
+
for scenario_id, summary in SCENARIO_SUMMARIES.items():
|
| 415 |
+
st.write(f"**{scenario_id}**: {summary}")
|
| 416 |
+
|
| 417 |
+
# Bulk Export Actions
|
| 418 |
+
st.subheader("Add to Export Cache")
|
| 419 |
+
st.markdown("Use bulk actions to add **currently filtered** results to export cache")
|
| 420 |
+
|
| 421 |
+
col1, col2, col3 = st.columns(3)
|
| 422 |
+
|
| 423 |
+
with col1:
|
| 424 |
+
if st.button("Add ALL Filtered to Cache", type="primary"):
|
| 425 |
+
added_count = 0
|
| 426 |
+
for _, row in interactive_df.iterrows():
|
| 427 |
+
row_dict = row.to_dict()
|
| 428 |
+
# Check if not already in cache (by Entry + HTS + Description for uniqueness)
|
| 429 |
+
key = (row_dict.get("Entry Number", ""), row_dict.get("Primary HTS", ""), row_dict.get("Description", ""))
|
| 430 |
+
existing_keys = [(d.get("Entry Number", ""), d.get("Primary HTS", ""), d.get("Description", ""))
|
| 431 |
+
for d in st.session_state.export_cache]
|
| 432 |
+
if key not in existing_keys:
|
| 433 |
+
st.session_state.export_cache.append(row_dict)
|
| 434 |
+
added_count += 1
|
| 435 |
+
st.success(f"Added {added_count} entries to cache ({len(st.session_state.export_cache)} total)")
|
| 436 |
+
|
| 437 |
+
with col2:
|
| 438 |
+
if st.button("Add FAIL Only to Cache"):
|
| 439 |
+
fail_df = interactive_df[interactive_df["Status"] == "FAIL"]
|
| 440 |
+
added_count = 0
|
| 441 |
+
for _, row in fail_df.iterrows():
|
| 442 |
+
row_dict = row.to_dict()
|
| 443 |
+
key = (row_dict.get("Entry Number", ""), row_dict.get("Primary HTS", ""), row_dict.get("Description", ""))
|
| 444 |
+
existing_keys = [(d.get("Entry Number", ""), d.get("Primary HTS", ""), d.get("Description", ""))
|
| 445 |
+
for d in st.session_state.export_cache]
|
| 446 |
+
if key not in existing_keys:
|
| 447 |
+
st.session_state.export_cache.append(row_dict)
|
| 448 |
+
added_count += 1
|
| 449 |
+
st.success(f"Added {added_count} FAIL entries to cache")
|
| 450 |
+
|
| 451 |
+
with col3:
|
| 452 |
+
if st.button("Add FLAG Only to Cache"):
|
| 453 |
+
flag_df = interactive_df[interactive_df["Status"] == "FLAG"]
|
| 454 |
+
added_count = 0
|
| 455 |
+
for _, row in flag_df.iterrows():
|
| 456 |
+
row_dict = row.to_dict()
|
| 457 |
+
key = (row_dict.get("Entry Number", ""), row_dict.get("Primary HTS", ""), row_dict.get("Description", ""))
|
| 458 |
+
existing_keys = [(d.get("Entry Number", ""), d.get("Primary HTS", ""), d.get("Description", ""))
|
| 459 |
+
for d in st.session_state.export_cache]
|
| 460 |
+
if key not in existing_keys:
|
| 461 |
+
st.session_state.export_cache.append(row_dict)
|
| 462 |
+
added_count += 1
|
| 463 |
+
st.success(f"Added {added_count} FLAG entries to cache")
|
| 464 |
+
|
| 465 |
+
# Add by scenario
|
| 466 |
+
st.markdown("**Add by Scenario (from filtered results):**")
|
| 467 |
+
scenario_cols = st.columns(4)
|
| 468 |
+
available_scenarios = interactive_df["Scenario"].unique().tolist()
|
| 469 |
+
|
| 470 |
+
for idx, scenario in enumerate(available_scenarios[:8]): # Limit to 8 buttons
|
| 471 |
+
col_idx = idx % 4
|
| 472 |
+
with scenario_cols[col_idx]:
|
| 473 |
+
scenario_count = len(interactive_df[interactive_df["Scenario"] == scenario])
|
| 474 |
+
if st.button(f"{scenario} ({scenario_count})", key=f"add_scenario_{scenario}"):
|
| 475 |
+
scenario_df = interactive_df[interactive_df["Scenario"] == scenario]
|
| 476 |
+
added_count = 0
|
| 477 |
+
for _, row in scenario_df.iterrows():
|
| 478 |
+
row_dict = row.to_dict()
|
| 479 |
+
key = (row_dict.get("Entry Number", ""), row_dict.get("Primary HTS", ""), row_dict.get("Description", ""))
|
| 480 |
+
existing_keys = [(d.get("Entry Number", ""), d.get("Primary HTS", ""), d.get("Description", ""))
|
| 481 |
+
for d in st.session_state.export_cache]
|
| 482 |
+
if key not in existing_keys:
|
| 483 |
+
st.session_state.export_cache.append(row_dict)
|
| 484 |
+
added_count += 1
|
| 485 |
+
st.success(f"Added {added_count} {scenario} entries to cache")
|
| 486 |
+
|
| 487 |
+
# Show cache status
|
| 488 |
+
st.info(f"Current cache: {len(st.session_state.export_cache)} entries. Go to 'Export Selection' tab to download.")
|
| 489 |
+
|
| 490 |
+
# Tab 2b: Unique Combinations
|
| 491 |
+
with tab2b:
|
| 492 |
+
st.header("Unique HTS + Description Combinations")
|
| 493 |
+
st.markdown("View unique combinations to avoid reviewing duplicates")
|
| 494 |
+
|
| 495 |
+
if st.session_state.validation_results is None:
|
| 496 |
+
st.info("Upload a file and run validation first.")
|
| 497 |
+
else:
|
| 498 |
+
results_df = st.session_state.validation_results.copy()
|
| 499 |
+
|
| 500 |
+
# Filter by status first
|
| 501 |
+
st.subheader("Filter Options")
|
| 502 |
+
col1, col2 = st.columns(2)
|
| 503 |
+
|
| 504 |
+
with col1:
|
| 505 |
+
unique_status_filter = st.multiselect(
|
| 506 |
+
"Filter by Status",
|
| 507 |
+
options=["PASS", "FAIL", "FLAG"],
|
| 508 |
+
default=["FAIL", "FLAG"],
|
| 509 |
+
key="unique_status_filter"
|
| 510 |
+
)
|
| 511 |
+
|
| 512 |
+
with col2:
|
| 513 |
+
unique_scenario_filter = st.multiselect(
|
| 514 |
+
"Filter by Scenario",
|
| 515 |
+
options=list(SCENARIO_SUMMARIES.keys()),
|
| 516 |
+
default=[],
|
| 517 |
+
key="unique_scenario_filter"
|
| 518 |
+
)
|
| 519 |
+
|
| 520 |
+
# Apply filters
|
| 521 |
+
filtered_df = results_df.copy()
|
| 522 |
+
|
| 523 |
+
if unique_status_filter:
|
| 524 |
+
filtered_df = filtered_df[filtered_df["Status"].isin(unique_status_filter)]
|
| 525 |
+
|
| 526 |
+
if unique_scenario_filter:
|
| 527 |
+
filtered_df = filtered_df[filtered_df["Scenario"].isin(unique_scenario_filter)]
|
| 528 |
+
|
| 529 |
+
# Exclude NONE by default
|
| 530 |
+
show_none_unique = st.checkbox("Show 'No Match' entries", value=False, key="show_none_unique")
|
| 531 |
+
if not show_none_unique:
|
| 532 |
+
filtered_df = filtered_df[filtered_df["Scenario"] != "NONE"]
|
| 533 |
+
|
| 534 |
+
if len(filtered_df) > 0:
|
| 535 |
+
# Group by Primary HTS + Description (use Full Description for grouping)
|
| 536 |
+
# Aggregate to get unique combinations
|
| 537 |
+
unique_df = filtered_df.groupby(
|
| 538 |
+
["Primary HTS", "Full Description"], as_index=False
|
| 539 |
+
).agg({
|
| 540 |
+
"Entry Number": "count", # Count occurrences
|
| 541 |
+
"Additional HTS": "first", # Take first (should be same for same HTS+desc)
|
| 542 |
+
"Scenario": "first",
|
| 543 |
+
"Scenario Summary": "first",
|
| 544 |
+
"Status": "first",
|
| 545 |
+
"Expected HTS": "first",
|
| 546 |
+
"Missing HTS": "first",
|
| 547 |
+
"Unexpected HTS": "first",
|
| 548 |
+
"Issue": "first"
|
| 549 |
+
}).rename(columns={"Entry Number": "Count"})
|
| 550 |
+
|
| 551 |
+
# Sort by count descending to show most common first
|
| 552 |
+
unique_df = unique_df.sort_values("Count", ascending=False)
|
| 553 |
+
|
| 554 |
+
# Create shorter description for display
|
| 555 |
+
unique_df["Description"] = unique_df["Full Description"].apply(
|
| 556 |
+
lambda x: x[:80] + "..." if len(str(x)) > 80 else x
|
| 557 |
+
)
|
| 558 |
+
|
| 559 |
+
st.write(f"**{len(unique_df)} unique combinations** (from {len(filtered_df)} total entries)")
|
| 560 |
+
|
| 561 |
+
# Interactive filters for unique view
|
| 562 |
+
st.markdown("**Search Filters:**")
|
| 563 |
+
ucol1, ucol2 = st.columns(2)
|
| 564 |
+
|
| 565 |
+
with ucol1:
|
| 566 |
+
unique_hts_search = st.text_input(
|
| 567 |
+
"Filter by Primary HTS",
|
| 568 |
+
placeholder="e.g., 7301 or 8302",
|
| 569 |
+
key="unique_hts_search"
|
| 570 |
+
)
|
| 571 |
+
|
| 572 |
+
with ucol2:
|
| 573 |
+
unique_desc_search = st.text_input(
|
| 574 |
+
"Filter by Description",
|
| 575 |
+
placeholder="e.g., steel, aluminum",
|
| 576 |
+
key="unique_desc_search"
|
| 577 |
+
)
|
| 578 |
+
|
| 579 |
+
# Apply search filters
|
| 580 |
+
display_unique_df = unique_df.copy()
|
| 581 |
+
|
| 582 |
+
if unique_hts_search:
|
| 583 |
+
display_unique_df = display_unique_df[
|
| 584 |
+
display_unique_df["Primary HTS"].astype(str).str.contains(unique_hts_search, case=False, na=False)
|
| 585 |
+
]
|
| 586 |
+
|
| 587 |
+
if unique_desc_search:
|
| 588 |
+
display_unique_df = display_unique_df[
|
| 589 |
+
display_unique_df["Description"].astype(str).str.contains(unique_desc_search, case=False, na=False)
|
| 590 |
+
]
|
| 591 |
+
|
| 592 |
+
st.write(f"**Showing {len(display_unique_df)} unique combinations**")
|
| 593 |
+
|
| 594 |
+
# Display columns
|
| 595 |
+
display_cols = [
|
| 596 |
+
"Primary HTS", "Description", "Additional HTS",
|
| 597 |
+
"Scenario", "Status", "Count", "Issue"
|
| 598 |
+
]
|
| 599 |
+
|
| 600 |
+
styled_unique = display_unique_df[display_cols].style.applymap(
|
| 601 |
+
color_status, subset=["Status"]
|
| 602 |
+
)
|
| 603 |
+
|
| 604 |
+
st.dataframe(styled_unique, use_container_width=True, height=400)
|
| 605 |
+
|
| 606 |
+
# Bulk export for unique combinations
|
| 607 |
+
st.subheader("Add Unique Combinations to Cache")
|
| 608 |
+
|
| 609 |
+
col1, col2 = st.columns(2)
|
| 610 |
+
|
| 611 |
+
with col1:
|
| 612 |
+
if st.button("Add ALL Unique to Cache", type="primary", key="add_all_unique"):
|
| 613 |
+
added_count = 0
|
| 614 |
+
for _, row in display_unique_df.iterrows():
|
| 615 |
+
row_dict = row.to_dict()
|
| 616 |
+
key = (row_dict.get("Primary HTS", ""), row_dict.get("Full Description", ""))
|
| 617 |
+
existing_keys = [(d.get("Primary HTS", ""), d.get("Full Description", ""))
|
| 618 |
+
for d in st.session_state.export_cache]
|
| 619 |
+
if key not in existing_keys:
|
| 620 |
+
st.session_state.export_cache.append(row_dict)
|
| 621 |
+
added_count += 1
|
| 622 |
+
st.success(f"Added {added_count} unique combinations to cache")
|
| 623 |
+
|
| 624 |
+
with col2:
|
| 625 |
+
if st.button("Add FAIL/FLAG Unique to Cache", key="add_fail_flag_unique"):
|
| 626 |
+
fail_flag_df = display_unique_df[display_unique_df["Status"].isin(["FAIL", "FLAG"])]
|
| 627 |
+
added_count = 0
|
| 628 |
+
for _, row in fail_flag_df.iterrows():
|
| 629 |
+
row_dict = row.to_dict()
|
| 630 |
+
key = (row_dict.get("Primary HTS", ""), row_dict.get("Full Description", ""))
|
| 631 |
+
existing_keys = [(d.get("Primary HTS", ""), d.get("Full Description", ""))
|
| 632 |
+
for d in st.session_state.export_cache]
|
| 633 |
+
if key not in existing_keys:
|
| 634 |
+
st.session_state.export_cache.append(row_dict)
|
| 635 |
+
added_count += 1
|
| 636 |
+
st.success(f"Added {added_count} FAIL/FLAG combinations to cache")
|
| 637 |
+
|
| 638 |
+
st.info(f"Current cache: {len(st.session_state.export_cache)} entries")
|
| 639 |
+
else:
|
| 640 |
+
st.info("No results matching the selected filters.")
|
| 641 |
+
|
| 642 |
+
# Tab 3: Keyword Management
|
| 643 |
+
with tab3:
|
| 644 |
+
st.header("Keyword Management")
|
| 645 |
+
st.markdown("Edit keyword lists used for validation. Changes apply immediately.")
|
| 646 |
+
|
| 647 |
+
col1, col2 = st.columns(2)
|
| 648 |
+
|
| 649 |
+
with col1:
|
| 650 |
+
st.subheader("Metal Keywords")
|
| 651 |
+
metal_text = st.text_area(
|
| 652 |
+
"Metal keywords (one per line)",
|
| 653 |
+
value="\n".join(st.session_state.keywords["metal"]),
|
| 654 |
+
height=150,
|
| 655 |
+
key="metal_input"
|
| 656 |
+
)
|
| 657 |
+
|
| 658 |
+
st.subheader("Aluminum Keywords")
|
| 659 |
+
aluminum_text = st.text_area(
|
| 660 |
+
"Aluminum keywords (one per line)",
|
| 661 |
+
value="\n".join(st.session_state.keywords["aluminum"]),
|
| 662 |
+
height=100,
|
| 663 |
+
key="aluminum_input"
|
| 664 |
+
)
|
| 665 |
+
|
| 666 |
+
st.subheader("Copper Keywords")
|
| 667 |
+
copper_text = st.text_area(
|
| 668 |
+
"Copper keywords (one per line)",
|
| 669 |
+
value="\n".join(st.session_state.keywords["copper"]),
|
| 670 |
+
height=100,
|
| 671 |
+
key="copper_input"
|
| 672 |
+
)
|
| 673 |
+
|
| 674 |
+
with col2:
|
| 675 |
+
st.subheader("Zinc Keywords")
|
| 676 |
+
zinc_text = st.text_area(
|
| 677 |
+
"Zinc keywords (one per line)",
|
| 678 |
+
value="\n".join(st.session_state.keywords["zinc"]),
|
| 679 |
+
height=100,
|
| 680 |
+
key="zinc_input"
|
| 681 |
+
)
|
| 682 |
+
|
| 683 |
+
st.subheader("Plastics Keywords")
|
| 684 |
+
plastics_text = st.text_area(
|
| 685 |
+
"Plastics keywords (one per line)",
|
| 686 |
+
value="\n".join(st.session_state.keywords["plastics"]),
|
| 687 |
+
height=150,
|
| 688 |
+
key="plastics_input"
|
| 689 |
+
)
|
| 690 |
+
|
| 691 |
+
col1, col2 = st.columns(2)
|
| 692 |
+
|
| 693 |
+
with col1:
|
| 694 |
+
if st.button("Save Keywords", type="primary"):
|
| 695 |
+
st.session_state.keywords["metal"] = [
|
| 696 |
+
k.strip() for k in metal_text.split("\n") if k.strip()
|
| 697 |
+
]
|
| 698 |
+
st.session_state.keywords["aluminum"] = [
|
| 699 |
+
k.strip() for k in aluminum_text.split("\n") if k.strip()
|
| 700 |
+
]
|
| 701 |
+
st.session_state.keywords["copper"] = [
|
| 702 |
+
k.strip() for k in copper_text.split("\n") if k.strip()
|
| 703 |
+
]
|
| 704 |
+
st.session_state.keywords["zinc"] = [
|
| 705 |
+
k.strip() for k in zinc_text.split("\n") if k.strip()
|
| 706 |
+
]
|
| 707 |
+
st.session_state.keywords["plastics"] = [
|
| 708 |
+
k.strip() for k in plastics_text.split("\n") if k.strip()
|
| 709 |
+
]
|
| 710 |
+
# Clear cached results to force re-validation
|
| 711 |
+
if "cached_full_results" in st.session_state:
|
| 712 |
+
del st.session_state.cached_full_results
|
| 713 |
+
if "cached_file_name" in st.session_state:
|
| 714 |
+
del st.session_state.cached_file_name
|
| 715 |
+
st.success("Keywords saved! Re-upload file or refresh to apply changes.")
|
| 716 |
+
|
| 717 |
+
with col2:
|
| 718 |
+
if st.button("Reset to Defaults"):
|
| 719 |
+
st.session_state.keywords = {
|
| 720 |
+
"metal": ["steel", "stainless steel", "carbon steel", "iron", "metal"],
|
| 721 |
+
"aluminum": ["aluminum", "aluminium"],
|
| 722 |
+
"copper": ["copper"],
|
| 723 |
+
"zinc": ["zinc"],
|
| 724 |
+
"plastics": ["plastic", "abs", "pu", "pvc", "polyester", "nylon"]
|
| 725 |
+
}
|
| 726 |
+
# Clear cached results
|
| 727 |
+
if "cached_full_results" in st.session_state:
|
| 728 |
+
del st.session_state.cached_full_results
|
| 729 |
+
if "cached_file_name" in st.session_state:
|
| 730 |
+
del st.session_state.cached_file_name
|
| 731 |
+
st.success("Keywords reset to defaults!")
|
| 732 |
+
st.rerun()
|
| 733 |
+
|
| 734 |
+
# Tab 4: Export Selection
|
| 735 |
+
with tab4:
|
| 736 |
+
st.header("Export Selection")
|
| 737 |
+
|
| 738 |
+
if len(st.session_state.export_cache) == 0:
|
| 739 |
+
st.info("No entries in export cache. Select entries from Validation Results tab.")
|
| 740 |
+
else:
|
| 741 |
+
st.write(f"**{len(st.session_state.export_cache)} entries in cache**")
|
| 742 |
+
|
| 743 |
+
# Display cache contents
|
| 744 |
+
cache_df = pd.DataFrame(st.session_state.export_cache)
|
| 745 |
+
st.dataframe(cache_df, use_container_width=True)
|
| 746 |
+
|
| 747 |
+
col1, col2, col3 = st.columns(3)
|
| 748 |
+
|
| 749 |
+
with col1:
|
| 750 |
+
if st.button("Clear Cache"):
|
| 751 |
+
st.session_state.export_cache = []
|
| 752 |
+
st.success("Cache cleared!")
|
| 753 |
+
st.rerun()
|
| 754 |
+
|
| 755 |
+
with col2:
|
| 756 |
+
# Export cached entries only
|
| 757 |
+
if st.button("Export Cache to Excel"):
|
| 758 |
+
excel_data = export_to_excel(cache_df)
|
| 759 |
+
st.download_button(
|
| 760 |
+
label="Download Excel (Cache Only)",
|
| 761 |
+
data=excel_data,
|
| 762 |
+
file_name="hts_audit_cache.xlsx",
|
| 763 |
+
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
| 764 |
+
)
|
| 765 |
+
|
| 766 |
+
# Export full results with original data
|
| 767 |
+
st.subheader("Export Full Results")
|
| 768 |
+
|
| 769 |
+
if st.session_state.validation_results is not None and st.session_state.original_df is not None:
|
| 770 |
+
# validation_results is already a DataFrame now
|
| 771 |
+
results_df = st.session_state.validation_results.copy()
|
| 772 |
+
|
| 773 |
+
# Status filter for export
|
| 774 |
+
export_status = st.multiselect(
|
| 775 |
+
"Export entries with status:",
|
| 776 |
+
options=["PASS", "FAIL", "FLAG"],
|
| 777 |
+
default=["FAIL", "FLAG"],
|
| 778 |
+
key="export_status_filter"
|
| 779 |
+
)
|
| 780 |
+
|
| 781 |
+
# Create filtered export
|
| 782 |
+
if export_status:
|
| 783 |
+
filtered_results = results_df[results_df["Status"].isin(export_status)]
|
| 784 |
+
filtered_indices = filtered_results.index.tolist()
|
| 785 |
+
|
| 786 |
+
if hasattr(st.session_state, "filtered_df"):
|
| 787 |
+
export_original = st.session_state.filtered_df.iloc[filtered_indices].copy()
|
| 788 |
+
else:
|
| 789 |
+
export_original = st.session_state.original_df.iloc[filtered_indices].copy()
|
| 790 |
+
|
| 791 |
+
st.write(f"**{len(filtered_results)} entries will be exported**")
|
| 792 |
+
|
| 793 |
+
if st.button("Generate Full Export", type="primary"):
|
| 794 |
+
excel_data = export_to_excel(export_original, filtered_results)
|
| 795 |
+
st.download_button(
|
| 796 |
+
label="Download Full Excel Report",
|
| 797 |
+
data=excel_data,
|
| 798 |
+
file_name="hts_audit_full_report.xlsx",
|
| 799 |
+
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
| 800 |
+
)
|
| 801 |
+
else:
|
| 802 |
+
st.info("Run validation first to enable full export.")
|
| 803 |
+
|
| 804 |
+
# Tab 5: HTS Reference
|
| 805 |
+
with tab5:
|
| 806 |
+
st.header("HTS Reference Lists")
|
| 807 |
+
st.markdown("Reference lists of Steel, Aluminum, and Copper HTS codes used for validation")
|
| 808 |
+
|
| 809 |
+
# Search filter
|
| 810 |
+
hts_search = st.text_input(
|
| 811 |
+
"Search HTS code",
|
| 812 |
+
placeholder="Enter HTS to search across all lists",
|
| 813 |
+
key="hts_reference_search"
|
| 814 |
+
)
|
| 815 |
+
|
| 816 |
+
col1, col2, col3 = st.columns(3)
|
| 817 |
+
|
| 818 |
+
with col1:
|
| 819 |
+
st.subheader(f"Steel HTS ({len(Steel_primary_HTS_list)})")
|
| 820 |
+
steel_list = [str(h) for h in Steel_primary_HTS_list]
|
| 821 |
+
if hts_search:
|
| 822 |
+
steel_list = [h for h in steel_list if hts_search in h]
|
| 823 |
+
steel_df = pd.DataFrame({"Steel HTS": steel_list})
|
| 824 |
+
st.dataframe(steel_df, use_container_width=True, height=400)
|
| 825 |
+
|
| 826 |
+
with col2:
|
| 827 |
+
st.subheader(f"Aluminum HTS ({len(Aluminum_primary_HTS_list)})")
|
| 828 |
+
aluminum_list = [str(h) for h in Aluminum_primary_HTS_list]
|
| 829 |
+
if hts_search:
|
| 830 |
+
aluminum_list = [h for h in aluminum_list if hts_search in h]
|
| 831 |
+
aluminum_df = pd.DataFrame({"Aluminum HTS": aluminum_list})
|
| 832 |
+
st.dataframe(aluminum_df, use_container_width=True, height=400)
|
| 833 |
+
|
| 834 |
+
with col3:
|
| 835 |
+
st.subheader(f"Copper HTS ({len(Copper_primary_HTS_list)})")
|
| 836 |
+
copper_list = [str(h) for h in Copper_primary_HTS_list]
|
| 837 |
+
if hts_search:
|
| 838 |
+
copper_list = [h for h in copper_list if hts_search in h]
|
| 839 |
+
copper_df = pd.DataFrame({"Copper HTS": copper_list})
|
| 840 |
+
st.dataframe(copper_df, use_container_width=True, height=400)
|
| 841 |
+
|
| 842 |
+
# Show overlap info
|
| 843 |
+
st.subheader("HTS Overlap Analysis")
|
| 844 |
+
steel_set = set(str(h) for h in Steel_primary_HTS_list)
|
| 845 |
+
aluminum_set = set(str(h) for h in Aluminum_primary_HTS_list)
|
| 846 |
+
copper_set = set(str(h) for h in Copper_primary_HTS_list)
|
| 847 |
+
|
| 848 |
+
steel_aluminum = steel_set & aluminum_set
|
| 849 |
+
aluminum_copper = aluminum_set & copper_set
|
| 850 |
+
steel_copper = steel_set & copper_set
|
| 851 |
+
|
| 852 |
+
col1, col2, col3 = st.columns(3)
|
| 853 |
+
with col1:
|
| 854 |
+
st.metric("Steel & Aluminum Overlap", len(steel_aluminum))
|
| 855 |
+
if steel_aluminum:
|
| 856 |
+
with st.expander("View overlapping HTS"):
|
| 857 |
+
st.write(sorted(steel_aluminum))
|
| 858 |
+
|
| 859 |
+
with col2:
|
| 860 |
+
st.metric("Aluminum & Copper Overlap", len(aluminum_copper))
|
| 861 |
+
if aluminum_copper:
|
| 862 |
+
with st.expander("View overlapping HTS"):
|
| 863 |
+
st.write(sorted(aluminum_copper))
|
| 864 |
+
|
| 865 |
+
with col3:
|
| 866 |
+
st.metric("Steel & Copper Overlap", len(steel_copper))
|
| 867 |
+
if steel_copper:
|
| 868 |
+
with st.expander("View overlapping HTS"):
|
| 869 |
+
st.write(sorted(steel_copper))
|
| 870 |
+
|
| 871 |
+
# Footer
|
| 872 |
+
st.markdown("---")
|
| 873 |
+
st.markdown("HTS Checker v1.0 - Tariff Audit Tool")
|
hts_validator.py
ADDED
|
@@ -0,0 +1,731 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
HTS Validator - Core validation logic for HTS tariff auditing
|
| 3 |
+
Validates primary HTS codes against additional HTS and description keywords
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import re
|
| 7 |
+
from typing import Dict, List, Optional, Tuple, Set
|
| 8 |
+
from dataclasses import dataclass
|
| 9 |
+
from HTS_list import Steel_primary_HTS_list, Aluminum_primary_HTS_list, Copper_primary_HTS_list
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# Key Additional HTS codes
|
| 13 |
+
STEEL_232_CODES = {"99038190", "99038191"}
|
| 14 |
+
ALUMINUM_232_CODES = {"99038507", "99038508"}
|
| 15 |
+
COPPER_CODES = {"99037801", "99037802"}
|
| 16 |
+
GENERAL_301_CODE = "99030133"
|
| 17 |
+
MISMATCH_CODE = "99030125"
|
| 18 |
+
|
| 19 |
+
# Scenario summaries
|
| 20 |
+
SCENARIO_SUMMARIES = {
|
| 21 |
+
"S1": "Steel HTS + 232 tariff applied - verify 99030133 present, no 99030125",
|
| 22 |
+
"S2": "Metal keyword but NOT steel HTS - should apply 99030125, no 232 tariffs",
|
| 23 |
+
"S3": "Steel HTS but NO metal keyword - description mismatch, should apply 99030125",
|
| 24 |
+
"S4": "Aluminum HTS + 232 tariff applied - verify 99030133 present",
|
| 25 |
+
"S5": "Aluminum keyword but NOT aluminum HTS - should apply 99030125, no 232 tariffs",
|
| 26 |
+
"S6": "Aluminum HTS but NO aluminum keyword - description mismatch, should apply 99030125",
|
| 27 |
+
"S7": "Dual Steel+Aluminum HTS - matches keyword, apply corresponding 232",
|
| 28 |
+
"S7a": "Dual Steel+Aluminum HTS + BOTH keywords - flag for manual review",
|
| 29 |
+
"S8": "Dual Steel+Aluminum HTS + NO keywords - should apply 99030125",
|
| 30 |
+
"S9": "Copper keyword but NOT copper HTS - potential misclassification",
|
| 31 |
+
"S10": "Copper HTS but NO copper keyword - description mismatch",
|
| 32 |
+
"S11": "Dual Aluminum+Copper HTS + copper keyword - apply 99030133 + copper tariffs",
|
| 33 |
+
"S12": "Dual Aluminum+Copper HTS + aluminum keyword - apply 99030133 + aluminum 232",
|
| 34 |
+
"S13": "Zinc keyword - should ONLY apply 99030125, no 232 tariffs allowed",
|
| 35 |
+
"S14": "Plastics keyword + metal HTS - override, should ONLY apply 99030125",
|
| 36 |
+
"COPPER_OK": "Copper HTS + copper keyword - verify copper tariffs applied",
|
| 37 |
+
"NONE": "No applicable scenario - entry does not match any validation rules",
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
@dataclass
|
| 42 |
+
class ValidationResult:
|
| 43 |
+
"""Result of validating a single entry"""
|
| 44 |
+
entry_number: str
|
| 45 |
+
description: str
|
| 46 |
+
primary_hts: str
|
| 47 |
+
additional_hts: List[str]
|
| 48 |
+
scenario_id: str
|
| 49 |
+
scenario_summary: str
|
| 50 |
+
status: str # PASS, FAIL, FLAG
|
| 51 |
+
expected_hts: List[str]
|
| 52 |
+
missing_hts: List[str]
|
| 53 |
+
unexpected_hts: List[str]
|
| 54 |
+
issue: str
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class HTSValidator:
|
| 58 |
+
"""Validates HTS codes against descriptions and additional tariffs"""
|
| 59 |
+
|
| 60 |
+
def __init__(self,
|
| 61 |
+
metal_keywords: Optional[List[str]] = None,
|
| 62 |
+
aluminum_keywords: Optional[List[str]] = None,
|
| 63 |
+
copper_keywords: Optional[List[str]] = None,
|
| 64 |
+
zinc_keywords: Optional[List[str]] = None,
|
| 65 |
+
plastics_keywords: Optional[List[str]] = None):
|
| 66 |
+
"""Initialize validator with keyword lists"""
|
| 67 |
+
|
| 68 |
+
# Default keyword lists
|
| 69 |
+
self.metal_keywords = metal_keywords or [
|
| 70 |
+
"steel", "stainless steel", "carbon steel", "iron", "metal"
|
| 71 |
+
]
|
| 72 |
+
self.aluminum_keywords = aluminum_keywords or [
|
| 73 |
+
"aluminum", "aluminium"
|
| 74 |
+
]
|
| 75 |
+
self.copper_keywords = copper_keywords or [
|
| 76 |
+
"copper"
|
| 77 |
+
]
|
| 78 |
+
self.zinc_keywords = zinc_keywords or [
|
| 79 |
+
"zinc"
|
| 80 |
+
]
|
| 81 |
+
self.plastics_keywords = plastics_keywords or [
|
| 82 |
+
"plastic", "abs", "pu", "pvc", "polyester", "nylon"
|
| 83 |
+
]
|
| 84 |
+
|
| 85 |
+
# Convert HTS lists to string sets for matching
|
| 86 |
+
self.steel_hts_set = self._convert_hts_list(Steel_primary_HTS_list)
|
| 87 |
+
self.aluminum_hts_set = self._convert_hts_list(Aluminum_primary_HTS_list)
|
| 88 |
+
self.copper_hts_set = self._convert_hts_list(Copper_primary_HTS_list)
|
| 89 |
+
|
| 90 |
+
def _convert_hts_list(self, hts_list: List) -> Set[str]:
|
| 91 |
+
"""Convert HTS list to set of strings"""
|
| 92 |
+
return {str(hts) for hts in hts_list}
|
| 93 |
+
|
| 94 |
+
def _hts_matches_list(self, primary_hts: str, hts_set: Set[str]) -> bool:
|
| 95 |
+
"""Check if primary HTS matches any entry in HTS set using prefix matching"""
|
| 96 |
+
primary_str = str(primary_hts).replace(".", "").strip()
|
| 97 |
+
|
| 98 |
+
for list_hts in hts_set:
|
| 99 |
+
list_hts_str = str(list_hts).replace(".", "").strip()
|
| 100 |
+
# Prefix match: if list entry is 8 digits, match any 10-digit starting with it
|
| 101 |
+
if len(list_hts_str) <= len(primary_str):
|
| 102 |
+
if primary_str.startswith(list_hts_str):
|
| 103 |
+
return True
|
| 104 |
+
else:
|
| 105 |
+
# List entry is longer, check if primary starts with it
|
| 106 |
+
if list_hts_str.startswith(primary_str):
|
| 107 |
+
return True
|
| 108 |
+
return False
|
| 109 |
+
|
| 110 |
+
def _contains_keywords(self, text: str, keywords: List[str]) -> bool:
|
| 111 |
+
"""Check if text contains any of the keywords (case-insensitive, word boundary)"""
|
| 112 |
+
if not text:
|
| 113 |
+
return False
|
| 114 |
+
text_lower = text.lower()
|
| 115 |
+
for kw in keywords:
|
| 116 |
+
# Use word boundary matching to avoid partial matches (e.g., "pu" in "punch")
|
| 117 |
+
# \b matches word boundaries
|
| 118 |
+
pattern = r'\b' + re.escape(kw.lower()) + r'\b'
|
| 119 |
+
if re.search(pattern, text_lower):
|
| 120 |
+
return True
|
| 121 |
+
return False
|
| 122 |
+
|
| 123 |
+
def _get_additional_hts_set(self, additional_hts: List[str]) -> Set[str]:
|
| 124 |
+
"""Convert additional HTS list to normalized set"""
|
| 125 |
+
result = set()
|
| 126 |
+
for hts in additional_hts:
|
| 127 |
+
if hts:
|
| 128 |
+
# Remove decimal points and convert to string
|
| 129 |
+
normalized = str(hts).replace(".", "").strip()
|
| 130 |
+
# Remove trailing .0 from floats
|
| 131 |
+
if normalized.endswith("0") and len(normalized) > 8:
|
| 132 |
+
# Check if it's a float representation
|
| 133 |
+
try:
|
| 134 |
+
float_val = float(hts)
|
| 135 |
+
normalized = str(int(float_val))
|
| 136 |
+
except (ValueError, TypeError):
|
| 137 |
+
pass
|
| 138 |
+
result.add(normalized)
|
| 139 |
+
return result
|
| 140 |
+
|
| 141 |
+
def _check_hts_present(self, hts_code: str, additional_set: Set[str]) -> bool:
|
| 142 |
+
"""Check if an HTS code is present in additional HTS set"""
|
| 143 |
+
return hts_code in additional_set
|
| 144 |
+
|
| 145 |
+
def _check_any_hts_present(self, hts_codes: Set[str], additional_set: Set[str]) -> bool:
|
| 146 |
+
"""Check if any of the HTS codes are present"""
|
| 147 |
+
return bool(hts_codes & additional_set)
|
| 148 |
+
|
| 149 |
+
def validate_entry(self, entry_number: str, description: str,
|
| 150 |
+
primary_hts: str, additional_hts: List[str]) -> ValidationResult:
|
| 151 |
+
"""Validate a single entry against all scenarios"""
|
| 152 |
+
|
| 153 |
+
# Normalize data
|
| 154 |
+
primary_str = str(primary_hts).replace(".", "").strip() if primary_hts else ""
|
| 155 |
+
desc = str(description) if description else ""
|
| 156 |
+
additional_set = self._get_additional_hts_set(additional_hts)
|
| 157 |
+
|
| 158 |
+
# Check which HTS lists the primary belongs to
|
| 159 |
+
in_steel = self._hts_matches_list(primary_str, self.steel_hts_set)
|
| 160 |
+
in_aluminum = self._hts_matches_list(primary_str, self.aluminum_hts_set)
|
| 161 |
+
in_copper = self._hts_matches_list(primary_str, self.copper_hts_set)
|
| 162 |
+
|
| 163 |
+
# Check description keywords
|
| 164 |
+
has_metal_kw = self._contains_keywords(desc, self.metal_keywords)
|
| 165 |
+
has_aluminum_kw = self._contains_keywords(desc, self.aluminum_keywords)
|
| 166 |
+
has_copper_kw = self._contains_keywords(desc, self.copper_keywords)
|
| 167 |
+
has_zinc_kw = self._contains_keywords(desc, self.zinc_keywords)
|
| 168 |
+
has_plastics_kw = self._contains_keywords(desc, self.plastics_keywords)
|
| 169 |
+
|
| 170 |
+
# Check which additional HTS are applied
|
| 171 |
+
has_steel_232 = self._check_any_hts_present(STEEL_232_CODES, additional_set)
|
| 172 |
+
has_aluminum_232 = self._check_any_hts_present(ALUMINUM_232_CODES, additional_set)
|
| 173 |
+
has_copper_tariff = self._check_any_hts_present(COPPER_CODES, additional_set)
|
| 174 |
+
has_301 = self._check_hts_present(GENERAL_301_CODE, additional_set)
|
| 175 |
+
has_mismatch = self._check_hts_present(MISMATCH_CODE, additional_set)
|
| 176 |
+
|
| 177 |
+
# Apply validation rules in priority order
|
| 178 |
+
return self._apply_validation_rules(
|
| 179 |
+
entry_number=entry_number,
|
| 180 |
+
description=desc,
|
| 181 |
+
primary_hts=primary_str,
|
| 182 |
+
additional_hts=list(additional_set),
|
| 183 |
+
in_steel=in_steel,
|
| 184 |
+
in_aluminum=in_aluminum,
|
| 185 |
+
in_copper=in_copper,
|
| 186 |
+
has_metal_kw=has_metal_kw,
|
| 187 |
+
has_aluminum_kw=has_aluminum_kw,
|
| 188 |
+
has_copper_kw=has_copper_kw,
|
| 189 |
+
has_zinc_kw=has_zinc_kw,
|
| 190 |
+
has_plastics_kw=has_plastics_kw,
|
| 191 |
+
has_steel_232=has_steel_232,
|
| 192 |
+
has_aluminum_232=has_aluminum_232,
|
| 193 |
+
has_copper_tariff=has_copper_tariff,
|
| 194 |
+
has_301=has_301,
|
| 195 |
+
has_mismatch=has_mismatch,
|
| 196 |
+
additional_set=additional_set
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
def _apply_validation_rules(self, entry_number: str, description: str,
|
| 200 |
+
primary_hts: str, additional_hts: List[str],
|
| 201 |
+
in_steel: bool, in_aluminum: bool, in_copper: bool,
|
| 202 |
+
has_metal_kw: bool, has_aluminum_kw: bool,
|
| 203 |
+
has_copper_kw: bool, has_zinc_kw: bool,
|
| 204 |
+
has_plastics_kw: bool, has_steel_232: bool,
|
| 205 |
+
has_aluminum_232: bool, has_copper_tariff: bool,
|
| 206 |
+
has_301: bool, has_mismatch: bool,
|
| 207 |
+
additional_set: Set[str]) -> ValidationResult:
|
| 208 |
+
"""Apply all validation rules and return result"""
|
| 209 |
+
|
| 210 |
+
# Priority 1: Special overrides (zinc, plastics)
|
| 211 |
+
|
| 212 |
+
# S13: Zinc keyword - only 99030125, no 232 tariffs
|
| 213 |
+
if has_zinc_kw:
|
| 214 |
+
expected = [MISMATCH_CODE]
|
| 215 |
+
issues = []
|
| 216 |
+
|
| 217 |
+
if not has_mismatch:
|
| 218 |
+
issues.append("Missing 99030125")
|
| 219 |
+
if has_steel_232 or has_aluminum_232 or has_copper_tariff:
|
| 220 |
+
issues.append("Should NOT have 232/copper tariffs with zinc")
|
| 221 |
+
|
| 222 |
+
status = "PASS" if not issues else "FAIL"
|
| 223 |
+
return ValidationResult(
|
| 224 |
+
entry_number=entry_number,
|
| 225 |
+
description=description,
|
| 226 |
+
primary_hts=primary_hts,
|
| 227 |
+
additional_hts=additional_hts,
|
| 228 |
+
scenario_id="S13",
|
| 229 |
+
scenario_summary=SCENARIO_SUMMARIES["S13"],
|
| 230 |
+
status=status,
|
| 231 |
+
expected_hts=expected,
|
| 232 |
+
missing_hts=[MISMATCH_CODE] if not has_mismatch else [],
|
| 233 |
+
unexpected_hts=self._get_unexpected_232(additional_set),
|
| 234 |
+
issue="; ".join(issues) if issues else "Correct - zinc with only 99030125"
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
# S14: Plastics keyword + metal HTS - only 99030125
|
| 238 |
+
if has_plastics_kw and (in_steel or in_aluminum):
|
| 239 |
+
expected = [MISMATCH_CODE]
|
| 240 |
+
issues = []
|
| 241 |
+
|
| 242 |
+
if not has_mismatch:
|
| 243 |
+
issues.append("Missing 99030125")
|
| 244 |
+
if has_steel_232 or has_aluminum_232:
|
| 245 |
+
issues.append("Should NOT have 232 tariffs with plastics material")
|
| 246 |
+
|
| 247 |
+
status = "PASS" if not issues else "FAIL"
|
| 248 |
+
return ValidationResult(
|
| 249 |
+
entry_number=entry_number,
|
| 250 |
+
description=description,
|
| 251 |
+
primary_hts=primary_hts,
|
| 252 |
+
additional_hts=additional_hts,
|
| 253 |
+
scenario_id="S14",
|
| 254 |
+
scenario_summary=SCENARIO_SUMMARIES["S14"],
|
| 255 |
+
status=status,
|
| 256 |
+
expected_hts=expected,
|
| 257 |
+
missing_hts=[MISMATCH_CODE] if not has_mismatch else [],
|
| 258 |
+
unexpected_hts=self._get_unexpected_232(additional_set),
|
| 259 |
+
issue="; ".join(issues) if issues else "Correct - plastics with only 99030125"
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
# Priority 2: Dual list scenarios
|
| 263 |
+
|
| 264 |
+
# Dual Aluminum + Copper
|
| 265 |
+
if in_aluminum and in_copper:
|
| 266 |
+
# S11: Copper keyword
|
| 267 |
+
if has_copper_kw:
|
| 268 |
+
expected = [GENERAL_301_CODE] + list(COPPER_CODES)
|
| 269 |
+
issues = []
|
| 270 |
+
|
| 271 |
+
if not has_301:
|
| 272 |
+
issues.append("Missing 99030133")
|
| 273 |
+
if not has_copper_tariff:
|
| 274 |
+
issues.append("Missing copper tariff (99037801/02)")
|
| 275 |
+
if has_aluminum_232:
|
| 276 |
+
issues.append("Should NOT have aluminum 232 when description says copper")
|
| 277 |
+
|
| 278 |
+
status = "PASS" if not issues else "FAIL"
|
| 279 |
+
return ValidationResult(
|
| 280 |
+
entry_number=entry_number,
|
| 281 |
+
description=description,
|
| 282 |
+
primary_hts=primary_hts,
|
| 283 |
+
additional_hts=additional_hts,
|
| 284 |
+
scenario_id="S11",
|
| 285 |
+
scenario_summary=SCENARIO_SUMMARIES["S11"],
|
| 286 |
+
status=status,
|
| 287 |
+
expected_hts=expected,
|
| 288 |
+
missing_hts=self._get_missing_codes([GENERAL_301_CODE], has_301, COPPER_CODES, has_copper_tariff),
|
| 289 |
+
unexpected_hts=list(ALUMINUM_232_CODES & additional_set),
|
| 290 |
+
issue="; ".join(issues) if issues else "Correct - dual AL/CU with copper keyword"
|
| 291 |
+
)
|
| 292 |
+
|
| 293 |
+
# S12: Aluminum keyword
|
| 294 |
+
if has_aluminum_kw:
|
| 295 |
+
expected = [GENERAL_301_CODE] + list(ALUMINUM_232_CODES)
|
| 296 |
+
issues = []
|
| 297 |
+
|
| 298 |
+
if not has_301:
|
| 299 |
+
issues.append("Missing 99030133")
|
| 300 |
+
if not has_aluminum_232:
|
| 301 |
+
issues.append("Missing aluminum 232 tariff (99038507/08)")
|
| 302 |
+
if has_copper_tariff:
|
| 303 |
+
issues.append("Should NOT have copper tariff when description says aluminum")
|
| 304 |
+
|
| 305 |
+
status = "PASS" if not issues else "FAIL"
|
| 306 |
+
return ValidationResult(
|
| 307 |
+
entry_number=entry_number,
|
| 308 |
+
description=description,
|
| 309 |
+
primary_hts=primary_hts,
|
| 310 |
+
additional_hts=additional_hts,
|
| 311 |
+
scenario_id="S12",
|
| 312 |
+
scenario_summary=SCENARIO_SUMMARIES["S12"],
|
| 313 |
+
status=status,
|
| 314 |
+
expected_hts=expected,
|
| 315 |
+
missing_hts=self._get_missing_codes([GENERAL_301_CODE], has_301, ALUMINUM_232_CODES, has_aluminum_232),
|
| 316 |
+
unexpected_hts=list(COPPER_CODES & additional_set),
|
| 317 |
+
issue="; ".join(issues) if issues else "Correct - dual AL/CU with aluminum keyword"
|
| 318 |
+
)
|
| 319 |
+
|
| 320 |
+
# Dual Steel + Aluminum
|
| 321 |
+
if in_steel and in_aluminum:
|
| 322 |
+
# S7a: Both keywords - flag for review
|
| 323 |
+
if has_metal_kw and has_aluminum_kw:
|
| 324 |
+
return ValidationResult(
|
| 325 |
+
entry_number=entry_number,
|
| 326 |
+
description=description,
|
| 327 |
+
primary_hts=primary_hts,
|
| 328 |
+
additional_hts=additional_hts,
|
| 329 |
+
scenario_id="S7a",
|
| 330 |
+
scenario_summary=SCENARIO_SUMMARIES["S7a"],
|
| 331 |
+
status="FLAG",
|
| 332 |
+
expected_hts=[],
|
| 333 |
+
missing_hts=[],
|
| 334 |
+
unexpected_hts=[],
|
| 335 |
+
issue="AMBIGUOUS: Description contains both steel and aluminum keywords - manual review required"
|
| 336 |
+
)
|
| 337 |
+
|
| 338 |
+
# S7: Steel keyword
|
| 339 |
+
if has_metal_kw:
|
| 340 |
+
expected = [GENERAL_301_CODE] + list(STEEL_232_CODES)
|
| 341 |
+
issues = []
|
| 342 |
+
|
| 343 |
+
if not has_301:
|
| 344 |
+
issues.append("Missing 99030133")
|
| 345 |
+
if not has_steel_232:
|
| 346 |
+
issues.append("Missing steel 232 tariff (99038190/91)")
|
| 347 |
+
if has_aluminum_232:
|
| 348 |
+
issues.append("Should NOT have aluminum 232 when description says steel")
|
| 349 |
+
if has_mismatch:
|
| 350 |
+
issues.append("Should NOT have 99030125 with correct steel classification")
|
| 351 |
+
|
| 352 |
+
status = "PASS" if not issues else "FAIL"
|
| 353 |
+
return ValidationResult(
|
| 354 |
+
entry_number=entry_number,
|
| 355 |
+
description=description,
|
| 356 |
+
primary_hts=primary_hts,
|
| 357 |
+
additional_hts=additional_hts,
|
| 358 |
+
scenario_id="S7",
|
| 359 |
+
scenario_summary=SCENARIO_SUMMARIES["S7"],
|
| 360 |
+
status=status,
|
| 361 |
+
expected_hts=expected,
|
| 362 |
+
missing_hts=self._get_missing_codes([GENERAL_301_CODE], has_301, STEEL_232_CODES, has_steel_232),
|
| 363 |
+
unexpected_hts=list(ALUMINUM_232_CODES & additional_set) + ([MISMATCH_CODE] if has_mismatch else []),
|
| 364 |
+
issue="; ".join(issues) if issues else "Correct - dual ST/AL with steel keyword"
|
| 365 |
+
)
|
| 366 |
+
|
| 367 |
+
# S7: Aluminum keyword
|
| 368 |
+
if has_aluminum_kw:
|
| 369 |
+
expected = [GENERAL_301_CODE] + list(ALUMINUM_232_CODES)
|
| 370 |
+
issues = []
|
| 371 |
+
|
| 372 |
+
if not has_301:
|
| 373 |
+
issues.append("Missing 99030133")
|
| 374 |
+
if not has_aluminum_232:
|
| 375 |
+
issues.append("Missing aluminum 232 tariff (99038507/08)")
|
| 376 |
+
if has_steel_232:
|
| 377 |
+
issues.append("Should NOT have steel 232 when description says aluminum")
|
| 378 |
+
if has_mismatch:
|
| 379 |
+
issues.append("Should NOT have 99030125 with correct aluminum classification")
|
| 380 |
+
|
| 381 |
+
status = "PASS" if not issues else "FAIL"
|
| 382 |
+
return ValidationResult(
|
| 383 |
+
entry_number=entry_number,
|
| 384 |
+
description=description,
|
| 385 |
+
primary_hts=primary_hts,
|
| 386 |
+
additional_hts=additional_hts,
|
| 387 |
+
scenario_id="S7",
|
| 388 |
+
scenario_summary=SCENARIO_SUMMARIES["S7"],
|
| 389 |
+
status=status,
|
| 390 |
+
expected_hts=expected,
|
| 391 |
+
missing_hts=self._get_missing_codes([GENERAL_301_CODE], has_301, ALUMINUM_232_CODES, has_aluminum_232),
|
| 392 |
+
unexpected_hts=list(STEEL_232_CODES & additional_set) + ([MISMATCH_CODE] if has_mismatch else []),
|
| 393 |
+
issue="; ".join(issues) if issues else "Correct - dual ST/AL with aluminum keyword"
|
| 394 |
+
)
|
| 395 |
+
|
| 396 |
+
# S8: Neither keyword
|
| 397 |
+
expected = [MISMATCH_CODE]
|
| 398 |
+
issues = []
|
| 399 |
+
|
| 400 |
+
if not has_mismatch:
|
| 401 |
+
issues.append("Missing 99030125")
|
| 402 |
+
if has_steel_232 or has_aluminum_232:
|
| 403 |
+
issues.append("Should NOT have 232 tariffs without steel/aluminum keyword")
|
| 404 |
+
|
| 405 |
+
status = "PASS" if not issues else "FAIL"
|
| 406 |
+
return ValidationResult(
|
| 407 |
+
entry_number=entry_number,
|
| 408 |
+
description=description,
|
| 409 |
+
primary_hts=primary_hts,
|
| 410 |
+
additional_hts=additional_hts,
|
| 411 |
+
scenario_id="S8",
|
| 412 |
+
scenario_summary=SCENARIO_SUMMARIES["S8"],
|
| 413 |
+
status=status,
|
| 414 |
+
expected_hts=expected,
|
| 415 |
+
missing_hts=[MISMATCH_CODE] if not has_mismatch else [],
|
| 416 |
+
unexpected_hts=self._get_unexpected_232(additional_set),
|
| 417 |
+
issue="; ".join(issues) if issues else "Correct - dual ST/AL with no keyword, has 99030125"
|
| 418 |
+
)
|
| 419 |
+
|
| 420 |
+
# Priority 3: Single list scenarios
|
| 421 |
+
|
| 422 |
+
# Steel scenarios (S1, S2, S3)
|
| 423 |
+
if in_steel and not in_aluminum and not in_copper:
|
| 424 |
+
if has_metal_kw:
|
| 425 |
+
# S1: Steel HTS + metal keyword + 232 tariff
|
| 426 |
+
if has_steel_232:
|
| 427 |
+
expected = [GENERAL_301_CODE]
|
| 428 |
+
issues = []
|
| 429 |
+
|
| 430 |
+
if not has_301:
|
| 431 |
+
issues.append("Missing 99030133")
|
| 432 |
+
if has_mismatch:
|
| 433 |
+
issues.append("Should NOT have 99030125 with correct steel classification")
|
| 434 |
+
|
| 435 |
+
status = "PASS" if not issues else "FAIL"
|
| 436 |
+
return ValidationResult(
|
| 437 |
+
entry_number=entry_number,
|
| 438 |
+
description=description,
|
| 439 |
+
primary_hts=primary_hts,
|
| 440 |
+
additional_hts=additional_hts,
|
| 441 |
+
scenario_id="S1",
|
| 442 |
+
scenario_summary=SCENARIO_SUMMARIES["S1"],
|
| 443 |
+
status=status,
|
| 444 |
+
expected_hts=[GENERAL_301_CODE] + list(STEEL_232_CODES),
|
| 445 |
+
missing_hts=[GENERAL_301_CODE] if not has_301 else [],
|
| 446 |
+
unexpected_hts=[MISMATCH_CODE] if has_mismatch else [],
|
| 447 |
+
issue="; ".join(issues) if issues else "Correct - steel HTS + keyword + 232"
|
| 448 |
+
)
|
| 449 |
+
else:
|
| 450 |
+
# Steel HTS + metal keyword but no 232 - should have 232
|
| 451 |
+
issues = ["Missing steel 232 tariff (99038190/91)"]
|
| 452 |
+
if not has_301:
|
| 453 |
+
issues.append("Missing 99030133")
|
| 454 |
+
|
| 455 |
+
return ValidationResult(
|
| 456 |
+
entry_number=entry_number,
|
| 457 |
+
description=description,
|
| 458 |
+
primary_hts=primary_hts,
|
| 459 |
+
additional_hts=additional_hts,
|
| 460 |
+
scenario_id="S1",
|
| 461 |
+
scenario_summary=SCENARIO_SUMMARIES["S1"],
|
| 462 |
+
status="FAIL",
|
| 463 |
+
expected_hts=[GENERAL_301_CODE] + list(STEEL_232_CODES),
|
| 464 |
+
missing_hts=[GENERAL_301_CODE] + list(STEEL_232_CODES) if not has_301 else list(STEEL_232_CODES),
|
| 465 |
+
unexpected_hts=[],
|
| 466 |
+
issue="; ".join(issues)
|
| 467 |
+
)
|
| 468 |
+
else:
|
| 469 |
+
# S3: Steel HTS but NO metal keyword
|
| 470 |
+
expected = [MISMATCH_CODE]
|
| 471 |
+
issues = []
|
| 472 |
+
|
| 473 |
+
if not has_mismatch:
|
| 474 |
+
issues.append("Missing 99030125 - steel HTS without metal description")
|
| 475 |
+
|
| 476 |
+
status = "FLAG" # Always flag for review
|
| 477 |
+
return ValidationResult(
|
| 478 |
+
entry_number=entry_number,
|
| 479 |
+
description=description,
|
| 480 |
+
primary_hts=primary_hts,
|
| 481 |
+
additional_hts=additional_hts,
|
| 482 |
+
scenario_id="S3",
|
| 483 |
+
scenario_summary=SCENARIO_SUMMARIES["S3"],
|
| 484 |
+
status=status,
|
| 485 |
+
expected_hts=expected,
|
| 486 |
+
missing_hts=[MISMATCH_CODE] if not has_mismatch else [],
|
| 487 |
+
unexpected_hts=[],
|
| 488 |
+
issue="HTS in steel list but description has no metal keywords - verify classification"
|
| 489 |
+
)
|
| 490 |
+
|
| 491 |
+
# S2: Metal keyword but NOT in steel list
|
| 492 |
+
if has_metal_kw and not in_steel:
|
| 493 |
+
expected = [MISMATCH_CODE]
|
| 494 |
+
issues = []
|
| 495 |
+
|
| 496 |
+
if not has_mismatch:
|
| 497 |
+
issues.append("Missing 99030125")
|
| 498 |
+
if has_steel_232:
|
| 499 |
+
issues.append("Should NOT have steel 232 tariff - HTS not in steel list")
|
| 500 |
+
|
| 501 |
+
status = "PASS" if not issues else "FAIL"
|
| 502 |
+
return ValidationResult(
|
| 503 |
+
entry_number=entry_number,
|
| 504 |
+
description=description,
|
| 505 |
+
primary_hts=primary_hts,
|
| 506 |
+
additional_hts=additional_hts,
|
| 507 |
+
scenario_id="S2",
|
| 508 |
+
scenario_summary=SCENARIO_SUMMARIES["S2"],
|
| 509 |
+
status=status,
|
| 510 |
+
expected_hts=expected,
|
| 511 |
+
missing_hts=[MISMATCH_CODE] if not has_mismatch else [],
|
| 512 |
+
unexpected_hts=list(STEEL_232_CODES & additional_set),
|
| 513 |
+
issue="; ".join(issues) if issues else "Correct - metal keyword with non-steel HTS"
|
| 514 |
+
)
|
| 515 |
+
|
| 516 |
+
# Aluminum scenarios (S4, S5, S6)
|
| 517 |
+
if in_aluminum and not in_steel and not in_copper:
|
| 518 |
+
if has_aluminum_kw:
|
| 519 |
+
# S4: Aluminum HTS + aluminum keyword + 232 tariff
|
| 520 |
+
if has_aluminum_232:
|
| 521 |
+
expected = [GENERAL_301_CODE]
|
| 522 |
+
issues = []
|
| 523 |
+
|
| 524 |
+
if not has_301:
|
| 525 |
+
issues.append("Missing 99030133")
|
| 526 |
+
|
| 527 |
+
status = "PASS" if not issues else "FAIL"
|
| 528 |
+
return ValidationResult(
|
| 529 |
+
entry_number=entry_number,
|
| 530 |
+
description=description,
|
| 531 |
+
primary_hts=primary_hts,
|
| 532 |
+
additional_hts=additional_hts,
|
| 533 |
+
scenario_id="S4",
|
| 534 |
+
scenario_summary=SCENARIO_SUMMARIES["S4"],
|
| 535 |
+
status=status,
|
| 536 |
+
expected_hts=[GENERAL_301_CODE] + list(ALUMINUM_232_CODES),
|
| 537 |
+
missing_hts=[GENERAL_301_CODE] if not has_301 else [],
|
| 538 |
+
unexpected_hts=[],
|
| 539 |
+
issue="; ".join(issues) if issues else "Correct - aluminum HTS + keyword + 232"
|
| 540 |
+
)
|
| 541 |
+
else:
|
| 542 |
+
# Aluminum HTS + keyword but no 232
|
| 543 |
+
issues = ["Missing aluminum 232 tariff (99038507/08)"]
|
| 544 |
+
if not has_301:
|
| 545 |
+
issues.append("Missing 99030133")
|
| 546 |
+
|
| 547 |
+
return ValidationResult(
|
| 548 |
+
entry_number=entry_number,
|
| 549 |
+
description=description,
|
| 550 |
+
primary_hts=primary_hts,
|
| 551 |
+
additional_hts=additional_hts,
|
| 552 |
+
scenario_id="S4",
|
| 553 |
+
scenario_summary=SCENARIO_SUMMARIES["S4"],
|
| 554 |
+
status="FAIL",
|
| 555 |
+
expected_hts=[GENERAL_301_CODE] + list(ALUMINUM_232_CODES),
|
| 556 |
+
missing_hts=[GENERAL_301_CODE] + list(ALUMINUM_232_CODES) if not has_301 else list(ALUMINUM_232_CODES),
|
| 557 |
+
unexpected_hts=[],
|
| 558 |
+
issue="; ".join(issues)
|
| 559 |
+
)
|
| 560 |
+
else:
|
| 561 |
+
# S6: Aluminum HTS but NO aluminum keyword
|
| 562 |
+
expected = [MISMATCH_CODE]
|
| 563 |
+
issues = []
|
| 564 |
+
|
| 565 |
+
if not has_mismatch:
|
| 566 |
+
issues.append("Missing 99030125 - aluminum HTS without aluminum description")
|
| 567 |
+
|
| 568 |
+
status = "FLAG"
|
| 569 |
+
return ValidationResult(
|
| 570 |
+
entry_number=entry_number,
|
| 571 |
+
description=description,
|
| 572 |
+
primary_hts=primary_hts,
|
| 573 |
+
additional_hts=additional_hts,
|
| 574 |
+
scenario_id="S6",
|
| 575 |
+
scenario_summary=SCENARIO_SUMMARIES["S6"],
|
| 576 |
+
status=status,
|
| 577 |
+
expected_hts=expected,
|
| 578 |
+
missing_hts=[MISMATCH_CODE] if not has_mismatch else [],
|
| 579 |
+
unexpected_hts=[],
|
| 580 |
+
issue="HTS in aluminum list but description has no aluminum keywords - verify classification"
|
| 581 |
+
)
|
| 582 |
+
|
| 583 |
+
# S5: Aluminum keyword but NOT in aluminum list
|
| 584 |
+
if has_aluminum_kw and not in_aluminum:
|
| 585 |
+
expected = [MISMATCH_CODE]
|
| 586 |
+
issues = []
|
| 587 |
+
|
| 588 |
+
if not has_mismatch:
|
| 589 |
+
issues.append("Missing 99030125")
|
| 590 |
+
if has_aluminum_232:
|
| 591 |
+
issues.append("Should NOT have aluminum 232 tariff - HTS not in aluminum list")
|
| 592 |
+
|
| 593 |
+
status = "PASS" if not issues else "FAIL"
|
| 594 |
+
return ValidationResult(
|
| 595 |
+
entry_number=entry_number,
|
| 596 |
+
description=description,
|
| 597 |
+
primary_hts=primary_hts,
|
| 598 |
+
additional_hts=additional_hts,
|
| 599 |
+
scenario_id="S5",
|
| 600 |
+
scenario_summary=SCENARIO_SUMMARIES["S5"],
|
| 601 |
+
status=status,
|
| 602 |
+
expected_hts=expected,
|
| 603 |
+
missing_hts=[MISMATCH_CODE] if not has_mismatch else [],
|
| 604 |
+
unexpected_hts=list(ALUMINUM_232_CODES & additional_set),
|
| 605 |
+
issue="; ".join(issues) if issues else "Correct - aluminum keyword with non-aluminum HTS"
|
| 606 |
+
)
|
| 607 |
+
|
| 608 |
+
# Copper scenarios (S9, S10, COPPER_OK)
|
| 609 |
+
if in_copper and not in_steel and not in_aluminum:
|
| 610 |
+
if has_copper_kw:
|
| 611 |
+
# COPPER_OK: Copper HTS + copper keyword
|
| 612 |
+
expected = list(COPPER_CODES)
|
| 613 |
+
issues = []
|
| 614 |
+
|
| 615 |
+
if not has_copper_tariff:
|
| 616 |
+
issues.append("Missing copper tariff (99037801/02)")
|
| 617 |
+
|
| 618 |
+
status = "PASS" if not issues else "FAIL"
|
| 619 |
+
return ValidationResult(
|
| 620 |
+
entry_number=entry_number,
|
| 621 |
+
description=description,
|
| 622 |
+
primary_hts=primary_hts,
|
| 623 |
+
additional_hts=additional_hts,
|
| 624 |
+
scenario_id="COPPER_OK",
|
| 625 |
+
scenario_summary=SCENARIO_SUMMARIES["COPPER_OK"],
|
| 626 |
+
status=status,
|
| 627 |
+
expected_hts=expected,
|
| 628 |
+
missing_hts=list(COPPER_CODES) if not has_copper_tariff else [],
|
| 629 |
+
unexpected_hts=[],
|
| 630 |
+
issue="; ".join(issues) if issues else "Correct - copper HTS + keyword"
|
| 631 |
+
)
|
| 632 |
+
else:
|
| 633 |
+
# S10: Copper HTS but NO copper keyword
|
| 634 |
+
status = "FLAG"
|
| 635 |
+
return ValidationResult(
|
| 636 |
+
entry_number=entry_number,
|
| 637 |
+
description=description,
|
| 638 |
+
primary_hts=primary_hts,
|
| 639 |
+
additional_hts=additional_hts,
|
| 640 |
+
scenario_id="S10",
|
| 641 |
+
scenario_summary=SCENARIO_SUMMARIES["S10"],
|
| 642 |
+
status=status,
|
| 643 |
+
expected_hts=[],
|
| 644 |
+
missing_hts=[],
|
| 645 |
+
unexpected_hts=[],
|
| 646 |
+
issue="HTS in copper list but description has no copper keywords - verify classification"
|
| 647 |
+
)
|
| 648 |
+
|
| 649 |
+
# S9: Copper keyword but NOT in copper list
|
| 650 |
+
if has_copper_kw and not in_copper:
|
| 651 |
+
status = "FLAG"
|
| 652 |
+
return ValidationResult(
|
| 653 |
+
entry_number=entry_number,
|
| 654 |
+
description=description,
|
| 655 |
+
primary_hts=primary_hts,
|
| 656 |
+
additional_hts=additional_hts,
|
| 657 |
+
scenario_id="S9",
|
| 658 |
+
scenario_summary=SCENARIO_SUMMARIES["S9"],
|
| 659 |
+
status=status,
|
| 660 |
+
expected_hts=[],
|
| 661 |
+
missing_hts=[],
|
| 662 |
+
unexpected_hts=[],
|
| 663 |
+
issue="Description mentions copper but HTS not in copper list - potential misclassification"
|
| 664 |
+
)
|
| 665 |
+
|
| 666 |
+
# No applicable scenario
|
| 667 |
+
return ValidationResult(
|
| 668 |
+
entry_number=entry_number,
|
| 669 |
+
description=description,
|
| 670 |
+
primary_hts=primary_hts,
|
| 671 |
+
additional_hts=additional_hts,
|
| 672 |
+
scenario_id="NONE",
|
| 673 |
+
scenario_summary=SCENARIO_SUMMARIES["NONE"],
|
| 674 |
+
status="PASS",
|
| 675 |
+
expected_hts=[],
|
| 676 |
+
missing_hts=[],
|
| 677 |
+
unexpected_hts=[],
|
| 678 |
+
issue="No metal-related validation required"
|
| 679 |
+
)
|
| 680 |
+
|
| 681 |
+
def _get_unexpected_232(self, additional_set: Set[str]) -> List[str]:
|
| 682 |
+
"""Get list of 232 tariffs that shouldn't be present"""
|
| 683 |
+
all_232 = STEEL_232_CODES | ALUMINUM_232_CODES | COPPER_CODES
|
| 684 |
+
return list(all_232 & additional_set)
|
| 685 |
+
|
| 686 |
+
def _get_missing_codes(self, fixed_codes: List[str], has_fixed: bool,
|
| 687 |
+
variable_codes: Set[str], has_variable: bool) -> List[str]:
|
| 688 |
+
"""Get list of missing codes"""
|
| 689 |
+
missing = []
|
| 690 |
+
if not has_fixed:
|
| 691 |
+
missing.extend(fixed_codes)
|
| 692 |
+
if not has_variable:
|
| 693 |
+
missing.extend(list(variable_codes))
|
| 694 |
+
return missing
|
| 695 |
+
|
| 696 |
+
|
| 697 |
+
def validate_dataframe(df, validator: HTSValidator,
|
| 698 |
+
description_col: str = "Description",
|
| 699 |
+
tariff_col: str = "Tariff",
|
| 700 |
+
entry_col: str = "Entry Number",
|
| 701 |
+
additional_cols: List[str] = None) -> List[ValidationResult]:
|
| 702 |
+
"""Validate all entries in a DataFrame"""
|
| 703 |
+
|
| 704 |
+
if additional_cols is None:
|
| 705 |
+
additional_cols = ["Primary 1", "Primary 2", "Primary 3",
|
| 706 |
+
"Primary 4", "Primary 5", "Primary 6"]
|
| 707 |
+
|
| 708 |
+
results = []
|
| 709 |
+
|
| 710 |
+
for idx, row in df.iterrows():
|
| 711 |
+
entry_number = str(row.get(entry_col, f"Row_{idx}"))
|
| 712 |
+
description = str(row.get(description_col, ""))
|
| 713 |
+
primary_hts = str(row.get(tariff_col, ""))
|
| 714 |
+
|
| 715 |
+
# Get additional HTS codes
|
| 716 |
+
additional_hts = []
|
| 717 |
+
for col in additional_cols:
|
| 718 |
+
if col in row and row[col] is not None:
|
| 719 |
+
val = row[col]
|
| 720 |
+
if str(val).strip() and str(val).lower() != "nan":
|
| 721 |
+
additional_hts.append(str(val))
|
| 722 |
+
|
| 723 |
+
result = validator.validate_entry(
|
| 724 |
+
entry_number=entry_number,
|
| 725 |
+
description=description,
|
| 726 |
+
primary_hts=primary_hts,
|
| 727 |
+
additional_hts=additional_hts
|
| 728 |
+
)
|
| 729 |
+
results.append(result)
|
| 730 |
+
|
| 731 |
+
return results
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
pandas
|
| 3 |
+
openpyxl
|