HaLim commited on
Commit
24c313b
·
1 Parent(s): 26ebf77

Preprocessing for kit hierarchy data

Browse files
Files changed (1) hide show
  1. notebook/data_preprocess.ipynb +404 -140
notebook/data_preprocess.ipynb CHANGED
@@ -18,7 +18,7 @@
18
  },
19
  {
20
  "cell_type": "code",
21
- "execution_count": 67,
22
  "id": "2709a4af",
23
  "metadata": {},
24
  "outputs": [
@@ -165,7 +165,7 @@
165
  "33 X "
166
  ]
167
  },
168
- "execution_count": 67,
169
  "metadata": {},
170
  "output_type": "execute_result"
171
  }
@@ -185,7 +185,7 @@
185
  },
186
  {
187
  "cell_type": "code",
188
- "execution_count": 68,
189
  "id": "75349f36",
190
  "metadata": {},
191
  "outputs": [
@@ -193,35 +193,35 @@
193
  "name": "stderr",
194
  "output_type": "stream",
195
  "text": [
196
- "/var/folders/t7/ldqv1xt97rs4jyjvhxf4shgw0000gn/T/ipykernel_12116/237579764.py:2: SettingWithCopyWarning: \n",
197
  "A value is trying to be set on a copy of a slice from a DataFrame.\n",
198
  "Try using .loc[row_indexer,col_indexer] = value instead\n",
199
  "\n",
200
  "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
201
  " master[\"kit_type\"] = \"master\"\n",
202
- "/var/folders/t7/ldqv1xt97rs4jyjvhxf4shgw0000gn/T/ipykernel_12116/237579764.py:3: SettingWithCopyWarning: \n",
203
  "A value is trying to be set on a copy of a slice from a DataFrame\n",
204
  "\n",
205
  "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
206
  " master.rename(columns={\"Master Kit\": \"kit_name\", \"Master Kit Description\": \"kit_description\"}, inplace=True)\n",
207
- "/var/folders/t7/ldqv1xt97rs4jyjvhxf4shgw0000gn/T/ipykernel_12116/237579764.py:6: SettingWithCopyWarning: \n",
208
  "A value is trying to be set on a copy of a slice from a DataFrame.\n",
209
  "Try using .loc[row_indexer,col_indexer] = value instead\n",
210
  "\n",
211
  "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
212
  " subkit[\"kit_type\"] = \"subkit\"\n",
213
- "/var/folders/t7/ldqv1xt97rs4jyjvhxf4shgw0000gn/T/ipykernel_12116/237579764.py:7: SettingWithCopyWarning: \n",
214
  "A value is trying to be set on a copy of a slice from a DataFrame\n",
215
  "\n",
216
  "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
217
  " subkit.rename(columns={\"Sub kit\": \"kit_name\", \"Sub kit Description\": \"kit_description\"}, inplace=True)\n",
218
- "/var/folders/t7/ldqv1xt97rs4jyjvhxf4shgw0000gn/T/ipykernel_12116/237579764.py:11: SettingWithCopyWarning: \n",
219
  "A value is trying to be set on a copy of a slice from a DataFrame.\n",
220
  "Try using .loc[row_indexer,col_indexer] = value instead\n",
221
  "\n",
222
  "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
223
  " prepack[\"kit_type\"] = \"prepack\"\n",
224
- "/var/folders/t7/ldqv1xt97rs4jyjvhxf4shgw0000gn/T/ipykernel_12116/237579764.py:12: SettingWithCopyWarning: \n",
225
  "A value is trying to be set on a copy of a slice from a DataFrame\n",
226
  "\n",
227
  "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
@@ -285,65 +285,20 @@
285
  " <td>IEHK 2024,Basic Medicine&amp;Renewable UNIT</td>\n",
286
  " <td>master</td>\n",
287
  " </tr>\n",
288
- " <tr>\n",
289
- " <th>...</th>\n",
290
- " <td>...</td>\n",
291
- " <td>...</td>\n",
292
- " <td>...</td>\n",
293
- " </tr>\n",
294
- " <tr>\n",
295
- " <th>1840</th>\n",
296
- " <td>S9999939</td>\n",
297
- " <td>Prepack 2/2 Obstetric 3-renew. S9908302</td>\n",
298
- " <td>prepack</td>\n",
299
- " </tr>\n",
300
- " <tr>\n",
301
- " <th>1841</th>\n",
302
- " <td>S9999939</td>\n",
303
- " <td>Prepack 2/2 Obstetric 3-renew. S9908302</td>\n",
304
- " <td>prepack</td>\n",
305
- " </tr>\n",
306
- " <tr>\n",
307
- " <th>1842</th>\n",
308
- " <td>S9999939</td>\n",
309
- " <td>Prepack 2/2 Obstetric 3-renew. S9908302</td>\n",
310
- " <td>prepack</td>\n",
311
- " </tr>\n",
312
- " <tr>\n",
313
- " <th>1843</th>\n",
314
- " <td>S9999939</td>\n",
315
- " <td>Prepack 2/2 Obstetric 3-renew. S9908302</td>\n",
316
- " <td>prepack</td>\n",
317
- " </tr>\n",
318
- " <tr>\n",
319
- " <th>1846</th>\n",
320
- " <td>S9999299</td>\n",
321
- " <td>BAG for 50 x S0322010 CH12,ster,disp</td>\n",
322
- " <td>prepack</td>\n",
323
- " </tr>\n",
324
  " </tbody>\n",
325
  "</table>\n",
326
- "<p>675 rows × 3 columns</p>\n",
327
  "</div>"
328
  ],
329
  "text/plain": [
330
- " kit_name kit_description kit_type\n",
331
- "30 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT master\n",
332
- "32 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT master\n",
333
- "33 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT master\n",
334
- "36 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT master\n",
335
- "37 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT master\n",
336
- "... ... ... ...\n",
337
- "1840 S9999939 Prepack 2/2 Obstetric 3-renew. S9908302 prepack\n",
338
- "1841 S9999939 Prepack 2/2 Obstetric 3-renew. S9908302 prepack\n",
339
- "1842 S9999939 Prepack 2/2 Obstetric 3-renew. S9908302 prepack\n",
340
- "1843 S9999939 Prepack 2/2 Obstetric 3-renew. S9908302 prepack\n",
341
- "1846 S9999299 BAG for 50 x S0322010 CH12,ster,disp prepack\n",
342
- "\n",
343
- "[675 rows x 3 columns]"
344
  ]
345
  },
346
- "execution_count": 68,
347
  "metadata": {},
348
  "output_type": "execute_result"
349
  }
@@ -365,15 +320,13 @@
365
  "prepack.rename(columns={\"Prepack\": \"kit_name\", \"Prepack Description\": \"kit_description\"}, inplace=True)\n",
366
  "\n",
367
  "\n",
368
- "df = pd.concat([master, subkit, prepack])\n",
369
- "df.head()\n",
370
- "cleaned_df = df.copy()\n",
371
- "cleaned_df"
372
  ]
373
  },
374
  {
375
  "cell_type": "code",
376
- "execution_count": 69,
377
  "id": "2e6e197d",
378
  "metadata": {},
379
  "outputs": [],
@@ -386,15 +339,26 @@
386
  "\n",
387
  "cleaned_df.loc[cleaned_df['kit_name'].isin(standalone_masterkit_list),'line_type'] = 'long line'\n",
388
  "cleaned_df.loc[cleaned_df['kit_type']=='prepack','line_type'] = 'mini load'\n",
389
- "cleaned_df.loc[cleaned_df['kit_type']=='subkit','line_type'] = 'long line'\n",
390
- "cleaned_df.to_csv('../data/real_data_excel/converted_csv/Kit_Composition_and_relation_cleaned_with_line_type.csv', index=False)"
391
  ]
392
  },
393
  {
394
  "cell_type": "code",
395
- "execution_count": 70,
396
  "id": "9ff00aa6",
397
  "metadata": {},
 
 
 
 
 
 
 
 
 
 
 
 
398
  "outputs": [
399
  {
400
  "data": {
@@ -417,125 +381,425 @@
417
  " <thead>\n",
418
  " <tr style=\"text-align: right;\">\n",
419
  " <th></th>\n",
420
- " <th>kit_name</th>\n",
421
- " <th>kit_description</th>\n",
422
- " <th>kit_type</th>\n",
423
- " <th>line_type</th>\n",
424
  " </tr>\n",
425
  " </thead>\n",
426
  " <tbody>\n",
427
  " <tr>\n",
428
  " <th>30</th>\n",
429
  " <td>S9901040</td>\n",
430
- " <td>IEHK 2024,Basic Medicine&amp;Renewable UNIT</td>\n",
431
- " <td>master</td>\n",
432
- " <td>NaN</td>\n",
433
  " </tr>\n",
434
  " <tr>\n",
435
- " <th>32</th>\n",
436
  " <td>S9901040</td>\n",
437
- " <td>IEHK 2024,Basic Medicine&amp;Renewable UNIT</td>\n",
438
- " <td>master</td>\n",
439
- " <td>NaN</td>\n",
440
  " </tr>\n",
441
  " <tr>\n",
442
- " <th>33</th>\n",
443
- " <td>S9901040</td>\n",
444
- " <td>IEHK 2024,Basic Medicine&amp;Renewable UNIT</td>\n",
445
- " <td>master</td>\n",
446
- " <td>NaN</td>\n",
447
  " </tr>\n",
448
  " <tr>\n",
449
- " <th>36</th>\n",
450
- " <td>S9901040</td>\n",
451
- " <td>IEHK 2024,Basic Medicine&amp;Renewable UNIT</td>\n",
452
- " <td>master</td>\n",
453
- " <td>NaN</td>\n",
454
  " </tr>\n",
455
  " <tr>\n",
456
- " <th>37</th>\n",
457
- " <td>S9901040</td>\n",
458
- " <td>IEHK 2024,Basic Medicine&amp;Renewable UNIT</td>\n",
459
- " <td>master</td>\n",
460
- " <td>NaN</td>\n",
461
  " </tr>\n",
462
  " <tr>\n",
463
- " <th>...</th>\n",
464
- " <td>...</td>\n",
465
- " <td>...</td>\n",
466
- " <td>...</td>\n",
467
- " <td>...</td>\n",
468
  " </tr>\n",
469
  " <tr>\n",
470
- " <th>1840</th>\n",
471
- " <td>S9999939</td>\n",
472
- " <td>Prepack 2/2 Obstetric 3-renew. S9908302</td>\n",
473
- " <td>prepack</td>\n",
474
- " <td>mini load</td>\n",
475
  " </tr>\n",
476
  " <tr>\n",
477
- " <th>1841</th>\n",
478
- " <td>S9999939</td>\n",
479
- " <td>Prepack 2/2 Obstetric 3-renew. S9908302</td>\n",
480
- " <td>prepack</td>\n",
481
- " <td>mini load</td>\n",
482
  " </tr>\n",
483
  " <tr>\n",
484
- " <th>1842</th>\n",
485
- " <td>S9999939</td>\n",
486
- " <td>Prepack 2/2 Obstetric 3-renew. S9908302</td>\n",
487
- " <td>prepack</td>\n",
488
- " <td>mini load</td>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
489
  " </tr>\n",
490
  " <tr>\n",
491
- " <th>1843</th>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
  " <td>S9999939</td>\n",
493
- " <td>Prepack 2/2 Obstetric 3-renew. S9908302</td>\n",
494
- " <td>prepack</td>\n",
495
- " <td>mini load</td>\n",
496
  " </tr>\n",
497
  " <tr>\n",
498
  " <th>1846</th>\n",
 
 
499
  " <td>S9999299</td>\n",
500
- " <td>BAG for 50 x S0322010 CH12,ster,disp</td>\n",
501
- " <td>prepack</td>\n",
502
- " <td>mini load</td>\n",
503
  " </tr>\n",
504
  " </tbody>\n",
505
  "</table>\n",
506
- "<p>675 rows × 4 columns</p>\n",
507
  "</div>"
508
  ],
509
  "text/plain": [
510
- " kit_name kit_description kit_type line_type\n",
511
- "30 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT master NaN\n",
512
- "32 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT master NaN\n",
513
- "33 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT master NaN\n",
514
- "36 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT master NaN\n",
515
- "37 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT master NaN\n",
516
- "... ... ... ... ...\n",
517
- "1840 S9999939 Prepack 2/2 Obstetric 3-renew. S9908302 prepack mini load\n",
518
- "1841 S9999939 Prepack 2/2 Obstetric 3-renew. S9908302 prepack mini load\n",
519
- "1842 S9999939 Prepack 2/2 Obstetric 3-renew. S9908302 prepack mini load\n",
520
- "1843 S9999939 Prepack 2/2 Obstetric 3-renew. S9908302 prepack mini load\n",
521
- "1846 S9999299 BAG for 50 x S0322010 CH12,ster,disp prepack mini load\n",
522
- "\n",
523
- "[675 rows x 4 columns]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
524
  ]
525
  },
526
- "execution_count": 70,
527
  "metadata": {},
528
  "output_type": "execute_result"
529
  }
530
  ],
531
  "source": [
532
- "cleaned_df"
533
  ]
534
  },
535
  {
536
  "cell_type": "code",
537
  "execution_count": null,
538
- "id": "3d8d547c",
539
  "metadata": {},
540
  "outputs": [],
541
  "source": []
 
18
  },
19
  {
20
  "cell_type": "code",
21
+ "execution_count": 78,
22
  "id": "2709a4af",
23
  "metadata": {},
24
  "outputs": [
 
165
  "33 X "
166
  ]
167
  },
168
+ "execution_count": 78,
169
  "metadata": {},
170
  "output_type": "execute_result"
171
  }
 
185
  },
186
  {
187
  "cell_type": "code",
188
+ "execution_count": 79,
189
  "id": "75349f36",
190
  "metadata": {},
191
  "outputs": [
 
193
  "name": "stderr",
194
  "output_type": "stream",
195
  "text": [
196
+ "/var/folders/t7/ldqv1xt97rs4jyjvhxf4shgw0000gn/T/ipykernel_12116/2577623884.py:2: SettingWithCopyWarning: \n",
197
  "A value is trying to be set on a copy of a slice from a DataFrame.\n",
198
  "Try using .loc[row_indexer,col_indexer] = value instead\n",
199
  "\n",
200
  "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
201
  " master[\"kit_type\"] = \"master\"\n",
202
+ "/var/folders/t7/ldqv1xt97rs4jyjvhxf4shgw0000gn/T/ipykernel_12116/2577623884.py:3: SettingWithCopyWarning: \n",
203
  "A value is trying to be set on a copy of a slice from a DataFrame\n",
204
  "\n",
205
  "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
206
  " master.rename(columns={\"Master Kit\": \"kit_name\", \"Master Kit Description\": \"kit_description\"}, inplace=True)\n",
207
+ "/var/folders/t7/ldqv1xt97rs4jyjvhxf4shgw0000gn/T/ipykernel_12116/2577623884.py:6: SettingWithCopyWarning: \n",
208
  "A value is trying to be set on a copy of a slice from a DataFrame.\n",
209
  "Try using .loc[row_indexer,col_indexer] = value instead\n",
210
  "\n",
211
  "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
212
  " subkit[\"kit_type\"] = \"subkit\"\n",
213
+ "/var/folders/t7/ldqv1xt97rs4jyjvhxf4shgw0000gn/T/ipykernel_12116/2577623884.py:7: SettingWithCopyWarning: \n",
214
  "A value is trying to be set on a copy of a slice from a DataFrame\n",
215
  "\n",
216
  "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
217
  " subkit.rename(columns={\"Sub kit\": \"kit_name\", \"Sub kit Description\": \"kit_description\"}, inplace=True)\n",
218
+ "/var/folders/t7/ldqv1xt97rs4jyjvhxf4shgw0000gn/T/ipykernel_12116/2577623884.py:11: SettingWithCopyWarning: \n",
219
  "A value is trying to be set on a copy of a slice from a DataFrame.\n",
220
  "Try using .loc[row_indexer,col_indexer] = value instead\n",
221
  "\n",
222
  "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
223
  " prepack[\"kit_type\"] = \"prepack\"\n",
224
+ "/var/folders/t7/ldqv1xt97rs4jyjvhxf4shgw0000gn/T/ipykernel_12116/2577623884.py:12: SettingWithCopyWarning: \n",
225
  "A value is trying to be set on a copy of a slice from a DataFrame\n",
226
  "\n",
227
  "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 
285
  " <td>IEHK 2024,Basic Medicine&amp;Renewable UNIT</td>\n",
286
  " <td>master</td>\n",
287
  " </tr>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  " </tbody>\n",
289
  "</table>\n",
 
290
  "</div>"
291
  ],
292
  "text/plain": [
293
+ " kit_name kit_description kit_type\n",
294
+ "30 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT master\n",
295
+ "32 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT master\n",
296
+ "33 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT master\n",
297
+ "36 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT master\n",
298
+ "37 S9901040 IEHK 2024,Basic Medicine&Renewable UNIT master"
 
 
 
 
 
 
 
 
299
  ]
300
  },
301
+ "execution_count": 79,
302
  "metadata": {},
303
  "output_type": "execute_result"
304
  }
 
320
  "prepack.rename(columns={\"Prepack\": \"kit_name\", \"Prepack Description\": \"kit_description\"}, inplace=True)\n",
321
  "\n",
322
  "\n",
323
+ "cleaned_df = pd.concat([master, subkit, prepack])\n",
324
+ "cleaned_df.head()"
 
 
325
  ]
326
  },
327
  {
328
  "cell_type": "code",
329
+ "execution_count": 80,
330
  "id": "2e6e197d",
331
  "metadata": {},
332
  "outputs": [],
 
339
  "\n",
340
  "cleaned_df.loc[cleaned_df['kit_name'].isin(standalone_masterkit_list),'line_type'] = 'long line'\n",
341
  "cleaned_df.loc[cleaned_df['kit_type']=='prepack','line_type'] = 'mini load'\n",
342
+ "cleaned_df.loc[cleaned_df['kit_type']=='subkit','line_type'] = 'long line'"
 
343
  ]
344
  },
345
  {
346
  "cell_type": "code",
347
+ "execution_count": 81,
348
  "id": "9ff00aa6",
349
  "metadata": {},
350
+ "outputs": [],
351
+ "source": [
352
+ "cleaned_df.loc[cleaned_df['line_type']=='mini load', 'line_id'] = 7\n",
353
+ "cleaned_df.loc[cleaned_df['line_type']=='long line', 'line_id'] = 6\n",
354
+ "cleaned_df.to_csv('../data/real_data_excel/converted_csv/Kit_Composition_and_relation_cleaned_with_line_type_and_id.csv', index=False)"
355
+ ]
356
+ },
357
+ {
358
+ "cell_type": "code",
359
+ "execution_count": 83,
360
+ "id": "3d8d547c",
361
+ "metadata": {},
362
  "outputs": [
363
  {
364
  "data": {
 
381
  " <thead>\n",
382
  " <tr style=\"text-align: right;\">\n",
383
  " <th></th>\n",
384
+ " <th>Master Kit</th>\n",
385
+ " <th>Sub kit</th>\n",
386
+ " <th>Prepack</th>\n",
 
387
  " </tr>\n",
388
  " </thead>\n",
389
  " <tbody>\n",
390
  " <tr>\n",
391
  " <th>30</th>\n",
392
  " <td>S9901040</td>\n",
393
+ " <td>S9991041</td>\n",
394
+ " <td>S9991044</td>\n",
 
395
  " </tr>\n",
396
  " <tr>\n",
397
+ " <th>36</th>\n",
398
  " <td>S9901040</td>\n",
399
+ " <td>S9991042</td>\n",
400
+ " <td>S9991045</td>\n",
 
401
  " </tr>\n",
402
  " <tr>\n",
403
+ " <th>216</th>\n",
404
+ " <td>S9902219</td>\n",
405
+ " <td>S9999898</td>\n",
406
+ " <td>S9999941</td>\n",
 
407
  " </tr>\n",
408
  " <tr>\n",
409
+ " <th>225</th>\n",
410
+ " <td>S9902219</td>\n",
411
+ " <td>S9999898</td>\n",
412
+ " <td>S9999942</td>\n",
 
413
  " </tr>\n",
414
  " <tr>\n",
415
+ " <th>232</th>\n",
416
+ " <td>S9902219</td>\n",
417
+ " <td>S9999898</td>\n",
418
+ " <td>S9999940</td>\n",
 
419
  " </tr>\n",
420
  " <tr>\n",
421
+ " <th>418</th>\n",
422
+ " <td>S9902450</td>\n",
423
+ " <td>S9992451</td>\n",
424
+ " <td>S9992450</td>\n",
 
425
  " </tr>\n",
426
  " <tr>\n",
427
+ " <th>508</th>\n",
428
+ " <td>S9902470</td>\n",
429
+ " <td>S9992470</td>\n",
430
+ " <td>S9992414</td>\n",
 
431
  " </tr>\n",
432
  " <tr>\n",
433
+ " <th>514</th>\n",
434
+ " <td>S9902470</td>\n",
435
+ " <td>S9992470</td>\n",
436
+ " <td>S9992415</td>\n",
 
437
  " </tr>\n",
438
  " <tr>\n",
439
+ " <th>574</th>\n",
440
+ " <td>S9902480</td>\n",
441
+ " <td>S9992483</td>\n",
442
+ " <td>S9992480</td>\n",
443
+ " </tr>\n",
444
+ " <tr>\n",
445
+ " <th>673</th>\n",
446
+ " <td>S9903003</td>\n",
447
+ " <td>S9999144</td>\n",
448
+ " <td>S9999149</td>\n",
449
+ " </tr>\n",
450
+ " <tr>\n",
451
+ " <th>676</th>\n",
452
+ " <td>S9903003</td>\n",
453
+ " <td>S9999145</td>\n",
454
+ " <td>S9999143</td>\n",
455
+ " </tr>\n",
456
+ " <tr>\n",
457
+ " <th>684</th>\n",
458
+ " <td>S9903003</td>\n",
459
+ " <td>S9999145</td>\n",
460
+ " <td>S9999138</td>\n",
461
+ " </tr>\n",
462
+ " <tr>\n",
463
+ " <th>692</th>\n",
464
+ " <td>S9903003</td>\n",
465
+ " <td>S9999145</td>\n",
466
+ " <td>S9999137</td>\n",
467
+ " </tr>\n",
468
+ " <tr>\n",
469
+ " <th>743</th>\n",
470
+ " <td>S9906706</td>\n",
471
+ " <td>S9999701</td>\n",
472
+ " <td>S9999703</td>\n",
473
+ " </tr>\n",
474
+ " <tr>\n",
475
+ " <th>752</th>\n",
476
+ " <td>S9906706</td>\n",
477
+ " <td>S9999701</td>\n",
478
+ " <td>S9999704</td>\n",
479
+ " </tr>\n",
480
+ " <tr>\n",
481
+ " <th>759</th>\n",
482
+ " <td>S9906706</td>\n",
483
+ " <td>S9999701</td>\n",
484
+ " <td>S9999705</td>\n",
485
+ " </tr>\n",
486
+ " <tr>\n",
487
+ " <th>762</th>\n",
488
+ " <td>S9906706</td>\n",
489
+ " <td>S9999702</td>\n",
490
+ " <td>S9999299</td>\n",
491
+ " </tr>\n",
492
+ " <tr>\n",
493
+ " <th>807</th>\n",
494
+ " <td>S9906708</td>\n",
495
+ " <td>S9999721</td>\n",
496
+ " <td>S9999728</td>\n",
497
  " </tr>\n",
498
  " <tr>\n",
499
+ " <th>814</th>\n",
500
+ " <td>S9906708</td>\n",
501
+ " <td>S9999726</td>\n",
502
+ " <td>S9999729</td>\n",
503
+ " </tr>\n",
504
+ " <tr>\n",
505
+ " <th>853</th>\n",
506
+ " <td>S9906710</td>\n",
507
+ " <td>S9996710</td>\n",
508
+ " <td>S9986712</td>\n",
509
+ " </tr>\n",
510
+ " <tr>\n",
511
+ " <th>858</th>\n",
512
+ " <td>S9906710</td>\n",
513
+ " <td>S9996710</td>\n",
514
+ " <td>S9986711</td>\n",
515
+ " </tr>\n",
516
+ " <tr>\n",
517
+ " <th>859</th>\n",
518
+ " <td>S9906710</td>\n",
519
+ " <td>S9996711</td>\n",
520
+ " <td>S9986711</td>\n",
521
+ " </tr>\n",
522
+ " <tr>\n",
523
+ " <th>862</th>\n",
524
+ " <td>S9906710</td>\n",
525
+ " <td>S9996711</td>\n",
526
+ " <td>S9986713</td>\n",
527
+ " </tr>\n",
528
+ " <tr>\n",
529
+ " <th>864</th>\n",
530
+ " <td>S9906710</td>\n",
531
+ " <td>S9996711</td>\n",
532
+ " <td>S9986714</td>\n",
533
+ " </tr>\n",
534
+ " <tr>\n",
535
+ " <th>871</th>\n",
536
+ " <td>S9906710</td>\n",
537
+ " <td>S9996711</td>\n",
538
+ " <td>S9986715</td>\n",
539
+ " </tr>\n",
540
+ " <tr>\n",
541
+ " <th>906</th>\n",
542
+ " <td>S9906712</td>\n",
543
+ " <td>S9999491</td>\n",
544
+ " <td>S9999494</td>\n",
545
+ " </tr>\n",
546
+ " <tr>\n",
547
+ " <th>917</th>\n",
548
+ " <td>S9906712</td>\n",
549
+ " <td>S9999491</td>\n",
550
+ " <td>S9999495</td>\n",
551
+ " </tr>\n",
552
+ " <tr>\n",
553
+ " <th>923</th>\n",
554
+ " <td>S9906712</td>\n",
555
+ " <td>S9999491</td>\n",
556
+ " <td>S9999496</td>\n",
557
+ " </tr>\n",
558
+ " <tr>\n",
559
+ " <th>968</th>\n",
560
+ " <td>S9906713</td>\n",
561
+ " <td>S9999506</td>\n",
562
+ " <td>S9999503</td>\n",
563
+ " </tr>\n",
564
+ " <tr>\n",
565
+ " <th>973</th>\n",
566
+ " <td>S9906713</td>\n",
567
+ " <td>S9999507</td>\n",
568
+ " <td>S9999504</td>\n",
569
+ " </tr>\n",
570
+ " <tr>\n",
571
+ " <th>1135</th>\n",
572
+ " <td>S9906729</td>\n",
573
+ " <td>S9996722</td>\n",
574
+ " <td>S9996726</td>\n",
575
+ " </tr>\n",
576
+ " <tr>\n",
577
+ " <th>1252</th>\n",
578
+ " <td>S9906733</td>\n",
579
+ " <td>S9993731</td>\n",
580
+ " <td>S9993736</td>\n",
581
+ " </tr>\n",
582
+ " <tr>\n",
583
+ " <th>1258</th>\n",
584
+ " <td>S9906733</td>\n",
585
+ " <td>S9993731</td>\n",
586
+ " <td>S9993737</td>\n",
587
+ " </tr>\n",
588
+ " <tr>\n",
589
+ " <th>1266</th>\n",
590
+ " <td>S9906733</td>\n",
591
+ " <td>S9993732</td>\n",
592
+ " <td>S9993739</td>\n",
593
+ " </tr>\n",
594
+ " <tr>\n",
595
+ " <th>1268</th>\n",
596
+ " <td>S9906733</td>\n",
597
+ " <td>S9993733</td>\n",
598
+ " <td>S9993738</td>\n",
599
+ " </tr>\n",
600
+ " <tr>\n",
601
+ " <th>1279</th>\n",
602
+ " <td>S9906733</td>\n",
603
+ " <td>S9993734</td>\n",
604
+ " <td>S9993730</td>\n",
605
+ " </tr>\n",
606
+ " <tr>\n",
607
+ " <th>1315</th>\n",
608
+ " <td>S9906734</td>\n",
609
+ " <td>S9994732</td>\n",
610
+ " <td>S9994735</td>\n",
611
+ " </tr>\n",
612
+ " <tr>\n",
613
+ " <th>1325</th>\n",
614
+ " <td>S9906734</td>\n",
615
+ " <td>S9994732</td>\n",
616
+ " <td>S9994736</td>\n",
617
+ " </tr>\n",
618
+ " <tr>\n",
619
+ " <th>1330</th>\n",
620
+ " <td>S9906734</td>\n",
621
+ " <td>S9994733</td>\n",
622
+ " <td>S9994737</td>\n",
623
+ " </tr>\n",
624
+ " <tr>\n",
625
+ " <th>1338</th>\n",
626
+ " <td>S9906734</td>\n",
627
+ " <td>S9994738</td>\n",
628
+ " <td>S9994738</td>\n",
629
+ " </tr>\n",
630
+ " <tr>\n",
631
+ " <th>1431</th>\n",
632
+ " <td>S9906737</td>\n",
633
+ " <td>S9997731</td>\n",
634
+ " <td>S9997739</td>\n",
635
+ " </tr>\n",
636
+ " <tr>\n",
637
+ " <th>1434</th>\n",
638
+ " <td>S9906737</td>\n",
639
+ " <td>S9997731</td>\n",
640
+ " <td>S9997730</td>\n",
641
+ " </tr>\n",
642
+ " <tr>\n",
643
+ " <th>1487</th>\n",
644
+ " <td>S9906753</td>\n",
645
+ " <td>S9996752</td>\n",
646
+ " <td>S9996751</td>\n",
647
+ " </tr>\n",
648
+ " <tr>\n",
649
+ " <th>1557</th>\n",
650
+ " <td>S9906791</td>\n",
651
+ " <td>S9999751</td>\n",
652
+ " <td>S9999691</td>\n",
653
+ " </tr>\n",
654
+ " <tr>\n",
655
+ " <th>1570</th>\n",
656
+ " <td>S9906791</td>\n",
657
+ " <td>S9999751</td>\n",
658
+ " <td>S9999692</td>\n",
659
+ " </tr>\n",
660
+ " <tr>\n",
661
+ " <th>1581</th>\n",
662
+ " <td>S9906791</td>\n",
663
+ " <td>S9999752</td>\n",
664
+ " <td>S9999693</td>\n",
665
+ " </tr>\n",
666
+ " <tr>\n",
667
+ " <th>1584</th>\n",
668
+ " <td>S9906791</td>\n",
669
+ " <td>S9999753</td>\n",
670
+ " <td>S9999694</td>\n",
671
+ " </tr>\n",
672
+ " <tr>\n",
673
+ " <th>1652</th>\n",
674
+ " <td>S9906800</td>\n",
675
+ " <td>S9986721</td>\n",
676
+ " <td>S9986716</td>\n",
677
+ " </tr>\n",
678
+ " <tr>\n",
679
+ " <th>1655</th>\n",
680
+ " <td>S9906800</td>\n",
681
+ " <td>S9986722</td>\n",
682
+ " <td>S9986716</td>\n",
683
+ " </tr>\n",
684
+ " <tr>\n",
685
+ " <th>1658</th>\n",
686
+ " <td>S9906800</td>\n",
687
+ " <td>S9986722</td>\n",
688
+ " <td>S9986717</td>\n",
689
+ " </tr>\n",
690
+ " <tr>\n",
691
+ " <th>1666</th>\n",
692
+ " <td>S9906800</td>\n",
693
+ " <td>S9986722</td>\n",
694
+ " <td>S9986710</td>\n",
695
+ " </tr>\n",
696
+ " <tr>\n",
697
+ " <th>1767</th>\n",
698
+ " <td>S9908300</td>\n",
699
+ " <td>S9999995</td>\n",
700
+ " <td>S9999944</td>\n",
701
+ " </tr>\n",
702
+ " <tr>\n",
703
+ " <th>1773</th>\n",
704
+ " <td>S9908300</td>\n",
705
+ " <td>S9999995</td>\n",
706
+ " <td>S9999945</td>\n",
707
+ " </tr>\n",
708
+ " <tr>\n",
709
+ " <th>1834</th>\n",
710
+ " <td>S9908302</td>\n",
711
+ " <td>S9999883</td>\n",
712
+ " <td>S9999938</td>\n",
713
+ " </tr>\n",
714
+ " <tr>\n",
715
+ " <th>1838</th>\n",
716
+ " <td>S9908302</td>\n",
717
+ " <td>S9999884</td>\n",
718
  " <td>S9999939</td>\n",
 
 
 
719
  " </tr>\n",
720
  " <tr>\n",
721
  " <th>1846</th>\n",
722
+ " <td>S9908302</td>\n",
723
+ " <td>S9999884</td>\n",
724
  " <td>S9999299</td>\n",
 
 
 
725
  " </tr>\n",
726
  " </tbody>\n",
727
  "</table>\n",
 
728
  "</div>"
729
  ],
730
  "text/plain": [
731
+ " Master Kit Sub kit Prepack\n",
732
+ "30 S9901040 S9991041 S9991044\n",
733
+ "36 S9901040 S9991042 S9991045\n",
734
+ "216 S9902219 S9999898 S9999941\n",
735
+ "225 S9902219 S9999898 S9999942\n",
736
+ "232 S9902219 S9999898 S9999940\n",
737
+ "418 S9902450 S9992451 S9992450\n",
738
+ "508 S9902470 S9992470 S9992414\n",
739
+ "514 S9902470 S9992470 S9992415\n",
740
+ "574 S9902480 S9992483 S9992480\n",
741
+ "673 S9903003 S9999144 S9999149\n",
742
+ "676 S9903003 S9999145 S9999143\n",
743
+ "684 S9903003 S9999145 S9999138\n",
744
+ "692 S9903003 S9999145 S9999137\n",
745
+ "743 S9906706 S9999701 S9999703\n",
746
+ "752 S9906706 S9999701 S9999704\n",
747
+ "759 S9906706 S9999701 S9999705\n",
748
+ "762 S9906706 S9999702 S9999299\n",
749
+ "807 S9906708 S9999721 S9999728\n",
750
+ "814 S9906708 S9999726 S9999729\n",
751
+ "853 S9906710 S9996710 S9986712\n",
752
+ "858 S9906710 S9996710 S9986711\n",
753
+ "859 S9906710 S9996711 S9986711\n",
754
+ "862 S9906710 S9996711 S9986713\n",
755
+ "864 S9906710 S9996711 S9986714\n",
756
+ "871 S9906710 S9996711 S9986715\n",
757
+ "906 S9906712 S9999491 S9999494\n",
758
+ "917 S9906712 S9999491 S9999495\n",
759
+ "923 S9906712 S9999491 S9999496\n",
760
+ "968 S9906713 S9999506 S9999503\n",
761
+ "973 S9906713 S9999507 S9999504\n",
762
+ "1135 S9906729 S9996722 S9996726\n",
763
+ "1252 S9906733 S9993731 S9993736\n",
764
+ "1258 S9906733 S9993731 S9993737\n",
765
+ "1266 S9906733 S9993732 S9993739\n",
766
+ "1268 S9906733 S9993733 S9993738\n",
767
+ "1279 S9906733 S9993734 S9993730\n",
768
+ "1315 S9906734 S9994732 S9994735\n",
769
+ "1325 S9906734 S9994732 S9994736\n",
770
+ "1330 S9906734 S9994733 S9994737\n",
771
+ "1338 S9906734 S9994738 S9994738\n",
772
+ "1431 S9906737 S9997731 S9997739\n",
773
+ "1434 S9906737 S9997731 S9997730\n",
774
+ "1487 S9906753 S9996752 S9996751\n",
775
+ "1557 S9906791 S9999751 S9999691\n",
776
+ "1570 S9906791 S9999751 S9999692\n",
777
+ "1581 S9906791 S9999752 S9999693\n",
778
+ "1584 S9906791 S9999753 S9999694\n",
779
+ "1652 S9906800 S9986721 S9986716\n",
780
+ "1655 S9906800 S9986722 S9986716\n",
781
+ "1658 S9906800 S9986722 S9986717\n",
782
+ "1666 S9906800 S9986722 S9986710\n",
783
+ "1767 S9908300 S9999995 S9999944\n",
784
+ "1773 S9908300 S9999995 S9999945\n",
785
+ "1834 S9908302 S9999883 S9999938\n",
786
+ "1838 S9908302 S9999884 S9999939\n",
787
+ "1846 S9908302 S9999884 S9999299"
788
  ]
789
  },
790
+ "execution_count": 83,
791
  "metadata": {},
792
  "output_type": "execute_result"
793
  }
794
  ],
795
  "source": [
796
+ "df[[\"Master Kit\", \"Sub kit\", \"Prepack\"]].drop_duplicates()"
797
  ]
798
  },
799
  {
800
  "cell_type": "code",
801
  "execution_count": null,
802
+ "id": "76aa422c",
803
  "metadata": {},
804
  "outputs": [],
805
  "source": []