HaLim commited on
Commit
ef24926
·
1 Parent(s): 24c313b

Add data processing for kit hierarchy

Browse files
notebook/analyze_Realdata.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebook/data_preprocess.ipynb CHANGED
@@ -1,829 +1 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "id": "f68ac913",
6
- "metadata": {},
7
- "source": [
8
- "# Processing "
9
- ]
10
- },
11
- {
12
- "cell_type": "markdown",
13
- "id": "251abe9b",
14
- "metadata": {},
15
- "source": [
16
- "import pandas"
17
- ]
18
- },
19
- {
20
- "cell_type": "code",
21
- "execution_count": 78,
22
- "id": "2709a4af",
23
- "metadata": {},
24
- "outputs": [
25
- {
26
- "name": "stdout",
27
- "output_type": "stream",
28
- "text": [
29
- "/Users/halimjun/Coding_local/SD_roster_real/notebook\n",
30
- "<class 'pandas.core.frame.DataFrame'>\n",
31
- "RangeIndex: 2602 entries, 0 to 2601\n",
32
- "Data columns (total 13 columns):\n",
33
- " # Column Non-Null Count Dtype \n",
34
- "--- ------ -------------- ----- \n",
35
- " 0 # 2602 non-null int64 \n",
36
- " 1 Master Kit 2602 non-null object \n",
37
- " 2 Master Kit Description 2602 non-null object \n",
38
- " 3 Sub kit 1347 non-null object \n",
39
- " 4 Sub kit description 1347 non-null object \n",
40
- " 5 Prepack 1028 non-null object \n",
41
- " 6 Prepack Description 1028 non-null object \n",
42
- " 7 Item Number 2286 non-null float64\n",
43
- " 8 Component number 2602 non-null object \n",
44
- " 9 Object description 2602 non-null object \n",
45
- " 10 Comp. Qty (CUn) 2602 non-null int64 \n",
46
- " 11 Component unit 2279 non-null object \n",
47
- " 12 Batch management 1118 non-null object \n",
48
- "dtypes: float64(1), int64(2), object(10)\n",
49
- "memory usage: 264.4+ KB\n"
50
- ]
51
- },
52
- {
53
- "data": {
54
- "text/html": [
55
- "<div>\n",
56
- "<style scoped>\n",
57
- " .dataframe tbody tr th:only-of-type {\n",
58
- " vertical-align: middle;\n",
59
- " }\n",
60
- "\n",
61
- " .dataframe tbody tr th {\n",
62
- " vertical-align: top;\n",
63
- " }\n",
64
- "\n",
65
- " .dataframe thead th {\n",
66
- " text-align: right;\n",
67
- " }\n",
68
- "</style>\n",
69
- "<table border=\"1\" class=\"dataframe\">\n",
70
- " <thead>\n",
71
- " <tr style=\"text-align: right;\">\n",
72
- " <th></th>\n",
73
- " <th>#</th>\n",
74
- " <th>Master Kit</th>\n",
75
- " <th>Master Kit Description</th>\n",
76
- " <th>Sub kit</th>\n",
77
- " <th>Sub kit description</th>\n",
78
- " <th>Prepack</th>\n",
79
- " <th>Prepack Description</th>\n",
80
- " <th>Item Number</th>\n",
81
- " <th>Component number</th>\n",
82
- " <th>Object description</th>\n",
83
- " <th>Comp. Qty (CUn)</th>\n",
84
- " <th>Component unit</th>\n",
85
- " <th>Batch management</th>\n",
86
- " </tr>\n",
87
- " </thead>\n",
88
- " <tbody>\n",
89
- " <tr>\n",
90
- " <th>30</th>\n",
91
- " <td>31</td>\n",
92
- " <td>S9901040</td>\n",
93
- " <td>IEHK 2024,Basic Medicine&amp;Renewable UNIT</td>\n",
94
- " <td>S9991041</td>\n",
95
- " <td>IEHK 2024,Basic Medicine&amp;Renewabl SUB1/3</td>\n",
96
- " <td>S9991044</td>\n",
97
- " <td>PP 1/2 IEHK 2024,Basic Medicine&amp;Renewabl</td>\n",
98
- " <td>20.0</td>\n",
99
- " <td>S0512025</td>\n",
100
- " <td>Bandage,gauze,8cmx4m,roll</td>\n",
101
- " <td>50</td>\n",
102
- " <td>EA</td>\n",
103
- " <td>X</td>\n",
104
- " </tr>\n",
105
- " <tr>\n",
106
- " <th>32</th>\n",
107
- " <td>33</td>\n",
108
- " <td>S9901040</td>\n",
109
- " <td>IEHK 2024,Basic Medicine&amp;Renewable UNIT</td>\n",
110
- " <td>S9991041</td>\n",
111
- " <td>IEHK 2024,Basic Medicine&amp;Renewabl SUB1/3</td>\n",
112
- " <td>S9991044</td>\n",
113
- " <td>PP 1/2 IEHK 2024,Basic Medicine&amp;Renewabl</td>\n",
114
- " <td>40.0</td>\n",
115
- " <td>S1504008</td>\n",
116
- " <td>Omeprazole 20mg capsules (e/c) PAC/10x10</td>\n",
117
- " <td>5</td>\n",
118
- " <td>PAC</td>\n",
119
- " <td>X</td>\n",
120
- " </tr>\n",
121
- " <tr>\n",
122
- " <th>33</th>\n",
123
- " <td>34</td>\n",
124
- " <td>S9901040</td>\n",
125
- " <td>IEHK 2024,Basic Medicine&amp;Renewable UNIT</td>\n",
126
- " <td>S9991041</td>\n",
127
- " <td>IEHK 2024,Basic Medicine&amp;Renewabl SUB1/3</td>\n",
128
- " <td>S9991044</td>\n",
129
- " <td>PP 1/2 IEHK 2024,Basic Medicine&amp;Renewabl</td>\n",
130
- " <td>50.0</td>\n",
131
- " <td>S1555370</td>\n",
132
- " <td>Albendazole 400mg chewable tabs/PAC-100</td>\n",
133
- " <td>2</td>\n",
134
- " <td>PAC</td>\n",
135
- " <td>X</td>\n",
136
- " </tr>\n",
137
- " </tbody>\n",
138
- "</table>\n",
139
- "</div>"
140
- ],
141
- "text/plain": [
142
- " # Master Kit Master Kit Description Sub kit \\\n",
143
- "30 31 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT S9991041 \n",
144
- "32 33 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT S9991041 \n",
145
- "33 34 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT S9991041 \n",
146
- "\n",
147
- " Sub kit description Prepack \\\n",
148
- "30 IEHK 2024,Basic Medicine&Renewabl SUB1/3 S9991044 \n",
149
- "32 IEHK 2024,Basic Medicine&Renewabl SUB1/3 S9991044 \n",
150
- "33 IEHK 2024,Basic Medicine&Renewabl SUB1/3 S9991044 \n",
151
- "\n",
152
- " Prepack Description Item Number Component number \\\n",
153
- "30 PP 1/2 IEHK 2024,Basic Medicine&Renewabl 20.0 S0512025 \n",
154
- "32 PP 1/2 IEHK 2024,Basic Medicine&Renewabl 40.0 S1504008 \n",
155
- "33 PP 1/2 IEHK 2024,Basic Medicine&Renewabl 50.0 S1555370 \n",
156
- "\n",
157
- " Object description Comp. Qty (CUn) Component unit \\\n",
158
- "30 Bandage,gauze,8cmx4m,roll 50 EA \n",
159
- "32 Omeprazole 20mg capsules (e/c) PAC/10x10 5 PAC \n",
160
- "33 Albendazole 400mg chewable tabs/PAC-100 2 PAC \n",
161
- "\n",
162
- " Batch management \n",
163
- "30 X \n",
164
- "32 X \n",
165
- "33 X "
166
- ]
167
- },
168
- "execution_count": 78,
169
- "metadata": {},
170
- "output_type": "execute_result"
171
- }
172
- ],
173
- "source": [
174
- "import pandas as pd\n",
175
- "!pwd\n",
176
- "df = pd.read_csv('../data/real_data_excel/converted_csv/Kit_Composition_and_relation.csv')\n",
177
- "df.columns\n",
178
- "df.info()\n",
179
- "df.describe()\n",
180
- "df.isnull().sum()\n",
181
- "df.dropna(inplace=True)\n",
182
- "df.head(3)\n",
183
- "# Kit name | Kit level (master, subkit, prepack) | master -> if only by itself, say\"long line\", other -> null, subkit -> \"long line\", prepack -> \"mini load\"\n"
184
- ]
185
- },
186
- {
187
- "cell_type": "code",
188
- "execution_count": 79,
189
- "id": "75349f36",
190
- "metadata": {},
191
- "outputs": [
192
- {
193
- "name": "stderr",
194
- "output_type": "stream",
195
- "text": [
196
- "/var/folders/t7/ldqv1xt97rs4jyjvhxf4shgw0000gn/T/ipykernel_12116/2577623884.py:2: SettingWithCopyWarning: \n",
197
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
198
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
199
- "\n",
200
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
201
- " master[\"kit_type\"] = \"master\"\n",
202
- "/var/folders/t7/ldqv1xt97rs4jyjvhxf4shgw0000gn/T/ipykernel_12116/2577623884.py:3: SettingWithCopyWarning: \n",
203
- "A value is trying to be set on a copy of a slice from a DataFrame\n",
204
- "\n",
205
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
206
- " master.rename(columns={\"Master Kit\": \"kit_name\", \"Master Kit Description\": \"kit_description\"}, inplace=True)\n",
207
- "/var/folders/t7/ldqv1xt97rs4jyjvhxf4shgw0000gn/T/ipykernel_12116/2577623884.py:6: SettingWithCopyWarning: \n",
208
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
209
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
210
- "\n",
211
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
212
- " subkit[\"kit_type\"] = \"subkit\"\n",
213
- "/var/folders/t7/ldqv1xt97rs4jyjvhxf4shgw0000gn/T/ipykernel_12116/2577623884.py:7: SettingWithCopyWarning: \n",
214
- "A value is trying to be set on a copy of a slice from a DataFrame\n",
215
- "\n",
216
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
217
- " subkit.rename(columns={\"Sub kit\": \"kit_name\", \"Sub kit Description\": \"kit_description\"}, inplace=True)\n",
218
- "/var/folders/t7/ldqv1xt97rs4jyjvhxf4shgw0000gn/T/ipykernel_12116/2577623884.py:11: SettingWithCopyWarning: \n",
219
- "A value is trying to be set on a copy of a slice from a DataFrame.\n",
220
- "Try using .loc[row_indexer,col_indexer] = value instead\n",
221
- "\n",
222
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
223
- " prepack[\"kit_type\"] = \"prepack\"\n",
224
- "/var/folders/t7/ldqv1xt97rs4jyjvhxf4shgw0000gn/T/ipykernel_12116/2577623884.py:12: SettingWithCopyWarning: \n",
225
- "A value is trying to be set on a copy of a slice from a DataFrame\n",
226
- "\n",
227
- "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
228
- " prepack.rename(columns={\"Prepack\": \"kit_name\", \"Prepack Description\": \"kit_description\"}, inplace=True)\n"
229
- ]
230
- },
231
- {
232
- "data": {
233
- "text/html": [
234
- "<div>\n",
235
- "<style scoped>\n",
236
- " .dataframe tbody tr th:only-of-type {\n",
237
- " vertical-align: middle;\n",
238
- " }\n",
239
- "\n",
240
- " .dataframe tbody tr th {\n",
241
- " vertical-align: top;\n",
242
- " }\n",
243
- "\n",
244
- " .dataframe thead th {\n",
245
- " text-align: right;\n",
246
- " }\n",
247
- "</style>\n",
248
- "<table border=\"1\" class=\"dataframe\">\n",
249
- " <thead>\n",
250
- " <tr style=\"text-align: right;\">\n",
251
- " <th></th>\n",
252
- " <th>kit_name</th>\n",
253
- " <th>kit_description</th>\n",
254
- " <th>kit_type</th>\n",
255
- " </tr>\n",
256
- " </thead>\n",
257
- " <tbody>\n",
258
- " <tr>\n",
259
- " <th>30</th>\n",
260
- " <td>S9901040</td>\n",
261
- " <td>IEHK 2024,Basic Medicine&amp;Renewable UNIT</td>\n",
262
- " <td>master</td>\n",
263
- " </tr>\n",
264
- " <tr>\n",
265
- " <th>32</th>\n",
266
- " <td>S9901040</td>\n",
267
- " <td>IEHK 2024,Basic Medicine&amp;Renewable UNIT</td>\n",
268
- " <td>master</td>\n",
269
- " </tr>\n",
270
- " <tr>\n",
271
- " <th>33</th>\n",
272
- " <td>S9901040</td>\n",
273
- " <td>IEHK 2024,Basic Medicine&amp;Renewable UNIT</td>\n",
274
- " <td>master</td>\n",
275
- " </tr>\n",
276
- " <tr>\n",
277
- " <th>36</th>\n",
278
- " <td>S9901040</td>\n",
279
- " <td>IEHK 2024,Basic Medicine&amp;Renewable UNIT</td>\n",
280
- " <td>master</td>\n",
281
- " </tr>\n",
282
- " <tr>\n",
283
- " <th>37</th>\n",
284
- " <td>S9901040</td>\n",
285
- " <td>IEHK 2024,Basic Medicine&amp;Renewable UNIT</td>\n",
286
- " <td>master</td>\n",
287
- " </tr>\n",
288
- " </tbody>\n",
289
- "</table>\n",
290
- "</div>"
291
- ],
292
- "text/plain": [
293
- " kit_name kit_description kit_type\n",
294
- "30 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT master\n",
295
- "32 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT master\n",
296
- "33 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT master\n",
297
- "36 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT master\n",
298
- "37 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT master"
299
- ]
300
- },
301
- "execution_count": 79,
302
- "metadata": {},
303
- "output_type": "execute_result"
304
- }
305
- ],
306
- "source": [
307
- "\n",
308
- "\n",
309
- "master = df[[\"Master Kit\", \"Master Kit Description\"]]\n",
310
- "master[\"kit_type\"] = \"master\"\n",
311
- "master.rename(columns={\"Master Kit\": \"kit_name\", \"Master Kit Description\": \"kit_description\"}, inplace=True)\n",
312
- "\n",
313
- "subkit = df[[\"Sub kit\", \"Sub kit description\"]]\n",
314
- "subkit[\"kit_type\"] = \"subkit\"\n",
315
- "subkit.rename(columns={\"Sub kit\": \"kit_name\", \"Sub kit Description\": \"kit_description\"}, inplace=True)\n",
316
- "subkit.columns = [\"kit_name\", \"kit_description\", \"kit_type\"]\n",
317
- "\n",
318
- "prepack = df[[\"Prepack\", \"Prepack Description\"]]\n",
319
- "prepack[\"kit_type\"] = \"prepack\"\n",
320
- "prepack.rename(columns={\"Prepack\": \"kit_name\", \"Prepack Description\": \"kit_description\"}, inplace=True)\n",
321
- "\n",
322
- "\n",
323
- "cleaned_df = pd.concat([master, subkit, prepack])\n",
324
- "cleaned_df.head()"
325
- ]
326
- },
327
- {
328
- "cell_type": "code",
329
- "execution_count": 80,
330
- "id": "2e6e197d",
331
- "metadata": {},
332
- "outputs": [],
333
- "source": [
334
- "\n",
335
- "cleaned_df[['kit_name','kit_description','kit_type']].drop_duplicates()\n",
336
- "tmp = cleaned_df.groupby('kit_name').count()['kit_type'].reset_index()\n",
337
- "standalone_masterkit_list = tmp.loc[tmp['kit_type']==1,'kit_name']\n",
338
- "standalone_masterkit_list\n",
339
- "\n",
340
- "cleaned_df.loc[cleaned_df['kit_name'].isin(standalone_masterkit_list),'line_type'] = 'long line'\n",
341
- "cleaned_df.loc[cleaned_df['kit_type']=='prepack','line_type'] = 'mini load'\n",
342
- "cleaned_df.loc[cleaned_df['kit_type']=='subkit','line_type'] = 'long line'"
343
- ]
344
- },
345
- {
346
- "cell_type": "code",
347
- "execution_count": 81,
348
- "id": "9ff00aa6",
349
- "metadata": {},
350
- "outputs": [],
351
- "source": [
352
- "cleaned_df.loc[cleaned_df['line_type']=='mini load', 'line_id'] = 7\n",
353
- "cleaned_df.loc[cleaned_df['line_type']=='long line', 'line_id'] = 6\n",
354
- "cleaned_df.to_csv('../data/real_data_excel/converted_csv/Kit_Composition_and_relation_cleaned_with_line_type_and_id.csv', index=False)"
355
- ]
356
- },
357
- {
358
- "cell_type": "code",
359
- "execution_count": 83,
360
- "id": "3d8d547c",
361
- "metadata": {},
362
- "outputs": [
363
- {
364
- "data": {
365
- "text/html": [
366
- "<div>\n",
367
- "<style scoped>\n",
368
- " .dataframe tbody tr th:only-of-type {\n",
369
- " vertical-align: middle;\n",
370
- " }\n",
371
- "\n",
372
- " .dataframe tbody tr th {\n",
373
- " vertical-align: top;\n",
374
- " }\n",
375
- "\n",
376
- " .dataframe thead th {\n",
377
- " text-align: right;\n",
378
- " }\n",
379
- "</style>\n",
380
- "<table border=\"1\" class=\"dataframe\">\n",
381
- " <thead>\n",
382
- " <tr style=\"text-align: right;\">\n",
383
- " <th></th>\n",
384
- " <th>Master Kit</th>\n",
385
- " <th>Sub kit</th>\n",
386
- " <th>Prepack</th>\n",
387
- " </tr>\n",
388
- " </thead>\n",
389
- " <tbody>\n",
390
- " <tr>\n",
391
- " <th>30</th>\n",
392
- " <td>S9901040</td>\n",
393
- " <td>S9991041</td>\n",
394
- " <td>S9991044</td>\n",
395
- " </tr>\n",
396
- " <tr>\n",
397
- " <th>36</th>\n",
398
- " <td>S9901040</td>\n",
399
- " <td>S9991042</td>\n",
400
- " <td>S9991045</td>\n",
401
- " </tr>\n",
402
- " <tr>\n",
403
- " <th>216</th>\n",
404
- " <td>S9902219</td>\n",
405
- " <td>S9999898</td>\n",
406
- " <td>S9999941</td>\n",
407
- " </tr>\n",
408
- " <tr>\n",
409
- " <th>225</th>\n",
410
- " <td>S9902219</td>\n",
411
- " <td>S9999898</td>\n",
412
- " <td>S9999942</td>\n",
413
- " </tr>\n",
414
- " <tr>\n",
415
- " <th>232</th>\n",
416
- " <td>S9902219</td>\n",
417
- " <td>S9999898</td>\n",
418
- " <td>S9999940</td>\n",
419
- " </tr>\n",
420
- " <tr>\n",
421
- " <th>418</th>\n",
422
- " <td>S9902450</td>\n",
423
- " <td>S9992451</td>\n",
424
- " <td>S9992450</td>\n",
425
- " </tr>\n",
426
- " <tr>\n",
427
- " <th>508</th>\n",
428
- " <td>S9902470</td>\n",
429
- " <td>S9992470</td>\n",
430
- " <td>S9992414</td>\n",
431
- " </tr>\n",
432
- " <tr>\n",
433
- " <th>514</th>\n",
434
- " <td>S9902470</td>\n",
435
- " <td>S9992470</td>\n",
436
- " <td>S9992415</td>\n",
437
- " </tr>\n",
438
- " <tr>\n",
439
- " <th>574</th>\n",
440
- " <td>S9902480</td>\n",
441
- " <td>S9992483</td>\n",
442
- " <td>S9992480</td>\n",
443
- " </tr>\n",
444
- " <tr>\n",
445
- " <th>673</th>\n",
446
- " <td>S9903003</td>\n",
447
- " <td>S9999144</td>\n",
448
- " <td>S9999149</td>\n",
449
- " </tr>\n",
450
- " <tr>\n",
451
- " <th>676</th>\n",
452
- " <td>S9903003</td>\n",
453
- " <td>S9999145</td>\n",
454
- " <td>S9999143</td>\n",
455
- " </tr>\n",
456
- " <tr>\n",
457
- " <th>684</th>\n",
458
- " <td>S9903003</td>\n",
459
- " <td>S9999145</td>\n",
460
- " <td>S9999138</td>\n",
461
- " </tr>\n",
462
- " <tr>\n",
463
- " <th>692</th>\n",
464
- " <td>S9903003</td>\n",
465
- " <td>S9999145</td>\n",
466
- " <td>S9999137</td>\n",
467
- " </tr>\n",
468
- " <tr>\n",
469
- " <th>743</th>\n",
470
- " <td>S9906706</td>\n",
471
- " <td>S9999701</td>\n",
472
- " <td>S9999703</td>\n",
473
- " </tr>\n",
474
- " <tr>\n",
475
- " <th>752</th>\n",
476
- " <td>S9906706</td>\n",
477
- " <td>S9999701</td>\n",
478
- " <td>S9999704</td>\n",
479
- " </tr>\n",
480
- " <tr>\n",
481
- " <th>759</th>\n",
482
- " <td>S9906706</td>\n",
483
- " <td>S9999701</td>\n",
484
- " <td>S9999705</td>\n",
485
- " </tr>\n",
486
- " <tr>\n",
487
- " <th>762</th>\n",
488
- " <td>S9906706</td>\n",
489
- " <td>S9999702</td>\n",
490
- " <td>S9999299</td>\n",
491
- " </tr>\n",
492
- " <tr>\n",
493
- " <th>807</th>\n",
494
- " <td>S9906708</td>\n",
495
- " <td>S9999721</td>\n",
496
- " <td>S9999728</td>\n",
497
- " </tr>\n",
498
- " <tr>\n",
499
- " <th>814</th>\n",
500
- " <td>S9906708</td>\n",
501
- " <td>S9999726</td>\n",
502
- " <td>S9999729</td>\n",
503
- " </tr>\n",
504
- " <tr>\n",
505
- " <th>853</th>\n",
506
- " <td>S9906710</td>\n",
507
- " <td>S9996710</td>\n",
508
- " <td>S9986712</td>\n",
509
- " </tr>\n",
510
- " <tr>\n",
511
- " <th>858</th>\n",
512
- " <td>S9906710</td>\n",
513
- " <td>S9996710</td>\n",
514
- " <td>S9986711</td>\n",
515
- " </tr>\n",
516
- " <tr>\n",
517
- " <th>859</th>\n",
518
- " <td>S9906710</td>\n",
519
- " <td>S9996711</td>\n",
520
- " <td>S9986711</td>\n",
521
- " </tr>\n",
522
- " <tr>\n",
523
- " <th>862</th>\n",
524
- " <td>S9906710</td>\n",
525
- " <td>S9996711</td>\n",
526
- " <td>S9986713</td>\n",
527
- " </tr>\n",
528
- " <tr>\n",
529
- " <th>864</th>\n",
530
- " <td>S9906710</td>\n",
531
- " <td>S9996711</td>\n",
532
- " <td>S9986714</td>\n",
533
- " </tr>\n",
534
- " <tr>\n",
535
- " <th>871</th>\n",
536
- " <td>S9906710</td>\n",
537
- " <td>S9996711</td>\n",
538
- " <td>S9986715</td>\n",
539
- " </tr>\n",
540
- " <tr>\n",
541
- " <th>906</th>\n",
542
- " <td>S9906712</td>\n",
543
- " <td>S9999491</td>\n",
544
- " <td>S9999494</td>\n",
545
- " </tr>\n",
546
- " <tr>\n",
547
- " <th>917</th>\n",
548
- " <td>S9906712</td>\n",
549
- " <td>S9999491</td>\n",
550
- " <td>S9999495</td>\n",
551
- " </tr>\n",
552
- " <tr>\n",
553
- " <th>923</th>\n",
554
- " <td>S9906712</td>\n",
555
- " <td>S9999491</td>\n",
556
- " <td>S9999496</td>\n",
557
- " </tr>\n",
558
- " <tr>\n",
559
- " <th>968</th>\n",
560
- " <td>S9906713</td>\n",
561
- " <td>S9999506</td>\n",
562
- " <td>S9999503</td>\n",
563
- " </tr>\n",
564
- " <tr>\n",
565
- " <th>973</th>\n",
566
- " <td>S9906713</td>\n",
567
- " <td>S9999507</td>\n",
568
- " <td>S9999504</td>\n",
569
- " </tr>\n",
570
- " <tr>\n",
571
- " <th>1135</th>\n",
572
- " <td>S9906729</td>\n",
573
- " <td>S9996722</td>\n",
574
- " <td>S9996726</td>\n",
575
- " </tr>\n",
576
- " <tr>\n",
577
- " <th>1252</th>\n",
578
- " <td>S9906733</td>\n",
579
- " <td>S9993731</td>\n",
580
- " <td>S9993736</td>\n",
581
- " </tr>\n",
582
- " <tr>\n",
583
- " <th>1258</th>\n",
584
- " <td>S9906733</td>\n",
585
- " <td>S9993731</td>\n",
586
- " <td>S9993737</td>\n",
587
- " </tr>\n",
588
- " <tr>\n",
589
- " <th>1266</th>\n",
590
- " <td>S9906733</td>\n",
591
- " <td>S9993732</td>\n",
592
- " <td>S9993739</td>\n",
593
- " </tr>\n",
594
- " <tr>\n",
595
- " <th>1268</th>\n",
596
- " <td>S9906733</td>\n",
597
- " <td>S9993733</td>\n",
598
- " <td>S9993738</td>\n",
599
- " </tr>\n",
600
- " <tr>\n",
601
- " <th>1279</th>\n",
602
- " <td>S9906733</td>\n",
603
- " <td>S9993734</td>\n",
604
- " <td>S9993730</td>\n",
605
- " </tr>\n",
606
- " <tr>\n",
607
- " <th>1315</th>\n",
608
- " <td>S9906734</td>\n",
609
- " <td>S9994732</td>\n",
610
- " <td>S9994735</td>\n",
611
- " </tr>\n",
612
- " <tr>\n",
613
- " <th>1325</th>\n",
614
- " <td>S9906734</td>\n",
615
- " <td>S9994732</td>\n",
616
- " <td>S9994736</td>\n",
617
- " </tr>\n",
618
- " <tr>\n",
619
- " <th>1330</th>\n",
620
- " <td>S9906734</td>\n",
621
- " <td>S9994733</td>\n",
622
- " <td>S9994737</td>\n",
623
- " </tr>\n",
624
- " <tr>\n",
625
- " <th>1338</th>\n",
626
- " <td>S9906734</td>\n",
627
- " <td>S9994738</td>\n",
628
- " <td>S9994738</td>\n",
629
- " </tr>\n",
630
- " <tr>\n",
631
- " <th>1431</th>\n",
632
- " <td>S9906737</td>\n",
633
- " <td>S9997731</td>\n",
634
- " <td>S9997739</td>\n",
635
- " </tr>\n",
636
- " <tr>\n",
637
- " <th>1434</th>\n",
638
- " <td>S9906737</td>\n",
639
- " <td>S9997731</td>\n",
640
- " <td>S9997730</td>\n",
641
- " </tr>\n",
642
- " <tr>\n",
643
- " <th>1487</th>\n",
644
- " <td>S9906753</td>\n",
645
- " <td>S9996752</td>\n",
646
- " <td>S9996751</td>\n",
647
- " </tr>\n",
648
- " <tr>\n",
649
- " <th>1557</th>\n",
650
- " <td>S9906791</td>\n",
651
- " <td>S9999751</td>\n",
652
- " <td>S9999691</td>\n",
653
- " </tr>\n",
654
- " <tr>\n",
655
- " <th>1570</th>\n",
656
- " <td>S9906791</td>\n",
657
- " <td>S9999751</td>\n",
658
- " <td>S9999692</td>\n",
659
- " </tr>\n",
660
- " <tr>\n",
661
- " <th>1581</th>\n",
662
- " <td>S9906791</td>\n",
663
- " <td>S9999752</td>\n",
664
- " <td>S9999693</td>\n",
665
- " </tr>\n",
666
- " <tr>\n",
667
- " <th>1584</th>\n",
668
- " <td>S9906791</td>\n",
669
- " <td>S9999753</td>\n",
670
- " <td>S9999694</td>\n",
671
- " </tr>\n",
672
- " <tr>\n",
673
- " <th>1652</th>\n",
674
- " <td>S9906800</td>\n",
675
- " <td>S9986721</td>\n",
676
- " <td>S9986716</td>\n",
677
- " </tr>\n",
678
- " <tr>\n",
679
- " <th>1655</th>\n",
680
- " <td>S9906800</td>\n",
681
- " <td>S9986722</td>\n",
682
- " <td>S9986716</td>\n",
683
- " </tr>\n",
684
- " <tr>\n",
685
- " <th>1658</th>\n",
686
- " <td>S9906800</td>\n",
687
- " <td>S9986722</td>\n",
688
- " <td>S9986717</td>\n",
689
- " </tr>\n",
690
- " <tr>\n",
691
- " <th>1666</th>\n",
692
- " <td>S9906800</td>\n",
693
- " <td>S9986722</td>\n",
694
- " <td>S9986710</td>\n",
695
- " </tr>\n",
696
- " <tr>\n",
697
- " <th>1767</th>\n",
698
- " <td>S9908300</td>\n",
699
- " <td>S9999995</td>\n",
700
- " <td>S9999944</td>\n",
701
- " </tr>\n",
702
- " <tr>\n",
703
- " <th>1773</th>\n",
704
- " <td>S9908300</td>\n",
705
- " <td>S9999995</td>\n",
706
- " <td>S9999945</td>\n",
707
- " </tr>\n",
708
- " <tr>\n",
709
- " <th>1834</th>\n",
710
- " <td>S9908302</td>\n",
711
- " <td>S9999883</td>\n",
712
- " <td>S9999938</td>\n",
713
- " </tr>\n",
714
- " <tr>\n",
715
- " <th>1838</th>\n",
716
- " <td>S9908302</td>\n",
717
- " <td>S9999884</td>\n",
718
- " <td>S9999939</td>\n",
719
- " </tr>\n",
720
- " <tr>\n",
721
- " <th>1846</th>\n",
722
- " <td>S9908302</td>\n",
723
- " <td>S9999884</td>\n",
724
- " <td>S9999299</td>\n",
725
- " </tr>\n",
726
- " </tbody>\n",
727
- "</table>\n",
728
- "</div>"
729
- ],
730
- "text/plain": [
731
- " Master Kit Sub kit Prepack\n",
732
- "30 S9901040 S9991041 S9991044\n",
733
- "36 S9901040 S9991042 S9991045\n",
734
- "216 S9902219 S9999898 S9999941\n",
735
- "225 S9902219 S9999898 S9999942\n",
736
- "232 S9902219 S9999898 S9999940\n",
737
- "418 S9902450 S9992451 S9992450\n",
738
- "508 S9902470 S9992470 S9992414\n",
739
- "514 S9902470 S9992470 S9992415\n",
740
- "574 S9902480 S9992483 S9992480\n",
741
- "673 S9903003 S9999144 S9999149\n",
742
- "676 S9903003 S9999145 S9999143\n",
743
- "684 S9903003 S9999145 S9999138\n",
744
- "692 S9903003 S9999145 S9999137\n",
745
- "743 S9906706 S9999701 S9999703\n",
746
- "752 S9906706 S9999701 S9999704\n",
747
- "759 S9906706 S9999701 S9999705\n",
748
- "762 S9906706 S9999702 S9999299\n",
749
- "807 S9906708 S9999721 S9999728\n",
750
- "814 S9906708 S9999726 S9999729\n",
751
- "853 S9906710 S9996710 S9986712\n",
752
- "858 S9906710 S9996710 S9986711\n",
753
- "859 S9906710 S9996711 S9986711\n",
754
- "862 S9906710 S9996711 S9986713\n",
755
- "864 S9906710 S9996711 S9986714\n",
756
- "871 S9906710 S9996711 S9986715\n",
757
- "906 S9906712 S9999491 S9999494\n",
758
- "917 S9906712 S9999491 S9999495\n",
759
- "923 S9906712 S9999491 S9999496\n",
760
- "968 S9906713 S9999506 S9999503\n",
761
- "973 S9906713 S9999507 S9999504\n",
762
- "1135 S9906729 S9996722 S9996726\n",
763
- "1252 S9906733 S9993731 S9993736\n",
764
- "1258 S9906733 S9993731 S9993737\n",
765
- "1266 S9906733 S9993732 S9993739\n",
766
- "1268 S9906733 S9993733 S9993738\n",
767
- "1279 S9906733 S9993734 S9993730\n",
768
- "1315 S9906734 S9994732 S9994735\n",
769
- "1325 S9906734 S9994732 S9994736\n",
770
- "1330 S9906734 S9994733 S9994737\n",
771
- "1338 S9906734 S9994738 S9994738\n",
772
- "1431 S9906737 S9997731 S9997739\n",
773
- "1434 S9906737 S9997731 S9997730\n",
774
- "1487 S9906753 S9996752 S9996751\n",
775
- "1557 S9906791 S9999751 S9999691\n",
776
- "1570 S9906791 S9999751 S9999692\n",
777
- "1581 S9906791 S9999752 S9999693\n",
778
- "1584 S9906791 S9999753 S9999694\n",
779
- "1652 S9906800 S9986721 S9986716\n",
780
- "1655 S9906800 S9986722 S9986716\n",
781
- "1658 S9906800 S9986722 S9986717\n",
782
- "1666 S9906800 S9986722 S9986710\n",
783
- "1767 S9908300 S9999995 S9999944\n",
784
- "1773 S9908300 S9999995 S9999945\n",
785
- "1834 S9908302 S9999883 S9999938\n",
786
- "1838 S9908302 S9999884 S9999939\n",
787
- "1846 S9908302 S9999884 S9999299"
788
- ]
789
- },
790
- "execution_count": 83,
791
- "metadata": {},
792
- "output_type": "execute_result"
793
- }
794
- ],
795
- "source": [
796
- "df[[\"Master Kit\", \"Sub kit\", \"Prepack\"]].drop_duplicates()"
797
- ]
798
- },
799
- {
800
- "cell_type": "code",
801
- "execution_count": null,
802
- "id": "76aa422c",
803
- "metadata": {},
804
- "outputs": [],
805
- "source": []
806
- }
807
- ],
808
- "metadata": {
809
- "kernelspec": {
810
- "display_name": "clean_env_cpd",
811
- "language": "python",
812
- "name": "clean_env_cpd"
813
- },
814
- "language_info": {
815
- "codemirror_mode": {
816
- "name": "ipython",
817
- "version": 3
818
- },
819
- "file_extension": ".py",
820
- "mimetype": "text/x-python",
821
- "name": "python",
822
- "nbconvert_exporter": "python",
823
- "pygments_lexer": "ipython3",
824
- "version": "3.10.0"
825
- }
826
- },
827
- "nbformat": 4,
828
- "nbformat_minor": 5
829
- }
 
1
+