{ "cells": [ { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import json\n", "\n", "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "need = Medicine_Name\n", "Category\n", "Batch_No\n", "Stock_Quantity\n", "Monthly_Sales\n", "Expiry_Date\n", "Purchase_Date\n", "Supplier" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "#I generate data using faker library " ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "data = pd.read_csv(\"data/data.csv\")" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Medicine_Name 12\n", "Category 7\n", "Batch_No 9576\n", "Stock_Quantity 491\n", "Monthly_Sales 196\n", "Expiry_Date 1347\n", "Purchase_Date 731\n", "Supplier 6\n", "dtype: int64" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.nunique()" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(10000, 8)" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.shape" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 10000 entries, 0 to 9999\n", "Data columns (total 8 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Medicine_Name 10000 non-null object\n", " 1 Category 10000 non-null object\n", " 2 Batch_No 10000 non-null object\n", " 3 Stock_Quantity 10000 non-null int64 \n", " 4 Monthly_Sales 10000 non-null int64 \n", " 5 Expiry_Date 10000 non-null object\n", " 6 Purchase_Date 10000 non-null object\n", " 7 Supplier 10000 non-null object\n", "dtypes: int64(2), object(6)\n", "memory usage: 625.1+ KB\n" ] } ], "source": [ "data.info()" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "import datetime\n", "data[\"Expiry_Date\"] = pd.to_datetime(data[\"Expiry_Date\"])\n", "data[\"Purchase_Date\"] = pd.to_datetime(data[\"Purchase_Date\"])" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 10000 entries, 0 to 9999\n", "Data columns (total 8 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Medicine_Name 10000 non-null object \n", " 1 Category 10000 non-null object \n", " 2 Batch_No 10000 non-null object \n", " 3 Stock_Quantity 10000 non-null int64 \n", " 4 Monthly_Sales 10000 non-null int64 \n", " 5 Expiry_Date 10000 non-null datetime64[ns]\n", " 6 Purchase_Date 10000 non-null datetime64[ns]\n", " 7 Supplier 10000 non-null object \n", "dtypes: datetime64[ns](2), int64(2), object(4)\n", "memory usage: 625.1+ KB\n" ] } ], "source": [ "data.info()" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "cat_col = [col for col in data.columns if data[col].dtype ==\"object\"]\n", "num_col = [col for col in data.columns if data[col].dtype !=\"object\"]" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Medicine_Name', 'Category', 'Batch_No', 'Supplier']" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cat_col" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Stock_Quantity', 'Monthly_Sales', 'Expiry_Date', 'Purchase_Date']" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "num_col" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 10000 entries, 0 to 9999\n", "Data columns (total 8 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Medicine_Name 10000 non-null object \n", " 1 Category 10000 non-null object \n", " 2 Batch_No 10000 non-null object \n", " 3 Stock_Quantity 10000 non-null int64 \n", " 4 Monthly_Sales 10000 non-null int64 \n", " 5 Expiry_Date 10000 non-null datetime64[ns]\n", " 6 Purchase_Date 10000 non-null datetime64[ns]\n", " 7 Supplier 10000 non-null object \n", "dtypes: datetime64[ns](2), int64(2), object(4)\n", "memory usage: 625.1+ KB\n" ] } ], "source": [ "data.info()" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Medicine_Name 0\n", "Category 0\n", "Batch_No 0\n", "Stock_Quantity 0\n", "Monthly_Sales 0\n", "Expiry_Date 0\n", "Purchase_Date 0\n", "Supplier 0\n", "dtype: int64" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.isna().sum()" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(Timestamp('2024-06-22 00:00:00'), Timestamp('2028-06-02 00:00:00'))" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[\"Expiry_Date\"].min(), data[\"Expiry_Date\"].max()" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [], "source": [ "today = pd.Timestamp.today()\n", "\n", "expired = data[data[\"Expiry_Date\"] < today]\n" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [], "source": [ "soon_expiring = data[\n", " (data[\"Expiry_Date\"] >= today) &\n", " (data[\"Expiry_Date\"] <= today + pd.Timedelta(days=30))\n", "]" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Medicine_NameCategoryBatch_NoStock_QuantityMonthly_SalesExpiry_DatePurchase_DateSupplier
54Cough SyrupCardiacCOU-8314499182026-01-122024-07-24Pfizer Inc.
92MetforminGastroMET-6823162362026-01-222024-08-29Dr. Reddy's
105ParacetamolDiabetesPAR-52652961752026-01-242024-12-18Abbott India
159InsulinCardiacINS-6815159512026-01-152024-10-05Pfizer Inc.
183CetirizineGastroCET-57974331102026-01-072024-06-02Dr. Reddy's
...........................
9909IbuprofenAnalgesicIBU-80104951512026-01-192024-11-05Abbott India
9917AtorvastatinAntibioticATO-6197377902026-01-142025-02-01Abbott India
9946Vitamin CSupplementVIT-90503521262026-01-242024-09-22Dr. Reddy's
9952AmoxicillinAnalgesicAMO-8366280142026-01-232024-10-31Lupin Ltd.
9994Vitamin CAntihistamineVIT-52273541932026-01-082024-03-24Cipla Ltd.
\n", "

325 rows × 8 columns

\n", "
" ], "text/plain": [ " Medicine_Name Category Batch_No Stock_Quantity Monthly_Sales \\\n", "54 Cough Syrup Cardiac COU-8314 499 18 \n", "92 Metformin Gastro MET-6823 162 36 \n", "105 Paracetamol Diabetes PAR-5265 296 175 \n", "159 Insulin Cardiac INS-6815 159 51 \n", "183 Cetirizine Gastro CET-5797 433 110 \n", "... ... ... ... ... ... \n", "9909 Ibuprofen Analgesic IBU-8010 495 151 \n", "9917 Atorvastatin Antibiotic ATO-6197 377 90 \n", "9946 Vitamin C Supplement VIT-9050 352 126 \n", "9952 Amoxicillin Analgesic AMO-8366 280 14 \n", "9994 Vitamin C Antihistamine VIT-5227 354 193 \n", "\n", " Expiry_Date Purchase_Date Supplier \n", "54 2026-01-12 2024-07-24 Pfizer Inc. \n", "92 2026-01-22 2024-08-29 Dr. Reddy's \n", "105 2026-01-24 2024-12-18 Abbott India \n", "159 2026-01-15 2024-10-05 Pfizer Inc. \n", "183 2026-01-07 2024-06-02 Dr. Reddy's \n", "... ... ... ... \n", "9909 2026-01-19 2024-11-05 Abbott India \n", "9917 2026-01-14 2025-02-01 Abbott India \n", "9946 2026-01-24 2024-09-22 Dr. Reddy's \n", "9952 2026-01-23 2024-10-31 Lupin Ltd. \n", "9994 2026-01-08 2024-03-24 Cipla Ltd. \n", "\n", "[325 rows x 8 columns]" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "soon_expiring" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Medicine_Name', 'Category', 'Batch_No', 'Stock_Quantity',\n", " 'Monthly_Sales', 'Expiry_Date', 'Purchase_Date', 'Supplier'],\n", " dtype='object')" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.columns" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 2024-11-25\n", "1 2023-12-27\n", "2 2025-09-02\n", "3 2025-06-22\n", "4 2025-02-01\n", "Name: Purchase_Date, dtype: datetime64[ns]" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.Purchase_Date.head()" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [], "source": [ "data['purchase_year'] = data.Purchase_Date.dt.year\n", "data['purchase_month'] = data.Purchase_Date.dt.month\n", "data['purchase_date'] = data.Purchase_Date.dt.day" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [], "source": [ "data['formated_purchase_date'] = data[\"Purchase_Date\"]" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [], "source": [ "data.drop(['formated_purchase_date'],axis=1,inplace=True)" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [], "source": [ "data.rename(columns={\"Purchase_Date\":\"formated_purchase_date\"},inplace=True)" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 10000 entries, 0 to 9999\n", "Data columns (total 11 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Medicine_Name 10000 non-null object \n", " 1 Category 10000 non-null object \n", " 2 Batch_No 10000 non-null object \n", " 3 Stock_Quantity 10000 non-null int64 \n", " 4 Monthly_Sales 10000 non-null int64 \n", " 5 Expiry_Date 10000 non-null datetime64[ns]\n", " 6 formated_purchase_date 10000 non-null datetime64[ns]\n", " 7 Supplier 10000 non-null object \n", " 8 purchase_year 10000 non-null int32 \n", " 9 purchase_month 10000 non-null int32 \n", " 10 purchase_date 10000 non-null int32 \n", "dtypes: datetime64[ns](2), int32(3), int64(2), object(4)\n", "memory usage: 742.3+ KB\n" ] } ], "source": [ "data.info()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 2 }