nisharg nargund commited on
Commit
934e96c
·
1 Parent(s): 14ee83f

Upload 4 files

Browse files
medicine-recommend.ipynb ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load\n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the read-only \"../input/\" directory\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))\n\n# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2022-05-26T15:21:29.887162Z","iopub.execute_input":"2022-05-26T15:21:29.887653Z","iopub.status.idle":"2022-05-26T15:21:29.923894Z","shell.execute_reply.started":"2022-05-26T15:21:29.887562Z","shell.execute_reply":"2022-05-26T15:21:29.923084Z"},"trusted":true},"execution_count":1,"outputs":[]},{"cell_type":"code","source":"medicines = pd.read_csv('/kaggle/input/medicine/medicine.csv')","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:22:10.369768Z","iopub.execute_input":"2022-05-26T15:22:10.370195Z","iopub.status.idle":"2022-05-26T15:22:10.423042Z","shell.execute_reply.started":"2022-05-26T15:22:10.370167Z","shell.execute_reply":"2022-05-26T15:22:10.422102Z"},"trusted":true},"execution_count":2,"outputs":[]},{"cell_type":"code","source":"medicines.head()","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:22:17.328665Z","iopub.execute_input":"2022-05-26T15:22:17.329081Z","iopub.status.idle":"2022-05-26T15:22:17.352018Z","shell.execute_reply.started":"2022-05-26T15:22:17.329047Z","shell.execute_reply":"2022-05-26T15:22:17.350989Z"},"trusted":true},"execution_count":3,"outputs":[]},{"cell_type":"code","source":"medicines.shape","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:22:24.309304Z","iopub.execute_input":"2022-05-26T15:22:24.310198Z","iopub.status.idle":"2022-05-26T15:22:24.316390Z","shell.execute_reply.started":"2022-05-26T15:22:24.310152Z","shell.execute_reply":"2022-05-26T15:22:24.315341Z"},"trusted":true},"execution_count":4,"outputs":[]},{"cell_type":"code","source":"medicines.isnull().sum()","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:22:30.316922Z","iopub.execute_input":"2022-05-26T15:22:30.317290Z","iopub.status.idle":"2022-05-26T15:22:30.330892Z","shell.execute_reply.started":"2022-05-26T15:22:30.317261Z","shell.execute_reply":"2022-05-26T15:22:30.329902Z"},"trusted":true},"execution_count":5,"outputs":[]},{"cell_type":"code","source":"medicines.dropna(inplace=True)","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:22:36.541469Z","iopub.execute_input":"2022-05-26T15:22:36.541860Z","iopub.status.idle":"2022-05-26T15:22:36.558756Z","shell.execute_reply.started":"2022-05-26T15:22:36.541830Z","shell.execute_reply":"2022-05-26T15:22:36.557372Z"},"trusted":true},"execution_count":6,"outputs":[]},{"cell_type":"code","source":"medicines.duplicated().sum()","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:27:45.781384Z","iopub.execute_input":"2022-05-26T15:27:45.781805Z","iopub.status.idle":"2022-05-26T15:27:45.801395Z","shell.execute_reply.started":"2022-05-26T15:27:45.781772Z","shell.execute_reply":"2022-05-26T15:27:45.800506Z"},"trusted":true},"execution_count":7,"outputs":[]},{"cell_type":"code","source":"medicines['Description']","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:27:52.263107Z","iopub.execute_input":"2022-05-26T15:27:52.263507Z","iopub.status.idle":"2022-05-26T15:27:52.271658Z","shell.execute_reply.started":"2022-05-26T15:27:52.263462Z","shell.execute_reply":"2022-05-26T15:27:52.270787Z"},"trusted":true},"execution_count":8,"outputs":[]},{"cell_type":"code","source":"medicines['Description'].apply(lambda x:x.split())","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:27:59.453961Z","iopub.execute_input":"2022-05-26T15:27:59.454448Z","iopub.status.idle":"2022-05-26T15:27:59.484759Z","shell.execute_reply.started":"2022-05-26T15:27:59.454413Z","shell.execute_reply":"2022-05-26T15:27:59.483653Z"},"trusted":true},"execution_count":9,"outputs":[]},{"cell_type":"code","source":"medicines['Reason'] = medicines['Reason'].apply(lambda x:x.split())\nmedicines['Description'] = medicines['Description'].apply(lambda x:x.split())","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:28:05.937209Z","iopub.execute_input":"2022-05-26T15:28:05.937681Z","iopub.status.idle":"2022-05-26T15:28:05.968340Z","shell.execute_reply.started":"2022-05-26T15:28:05.937648Z","shell.execute_reply":"2022-05-26T15:28:05.967586Z"},"trusted":true},"execution_count":10,"outputs":[]},{"cell_type":"code","source":"medicines['Description'] = medicines['Description'].apply(lambda x:[i.replace(\" \",\"\") for i in x])","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:28:11.781739Z","iopub.execute_input":"2022-05-26T15:28:11.782091Z","iopub.status.idle":"2022-05-26T15:28:11.919044Z","shell.execute_reply.started":"2022-05-26T15:28:11.782064Z","shell.execute_reply":"2022-05-26T15:28:11.918271Z"},"trusted":true},"execution_count":11,"outputs":[]},{"cell_type":"code","source":"medicines['Description'] = medicines['Description'].apply(lambda x:[i.replace(\" \",\"\") for i in x])","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:28:17.590328Z","iopub.execute_input":"2022-05-26T15:28:17.590729Z","iopub.status.idle":"2022-05-26T15:28:17.626568Z","shell.execute_reply.started":"2022-05-26T15:28:17.590696Z","shell.execute_reply":"2022-05-26T15:28:17.625869Z"},"trusted":true},"execution_count":12,"outputs":[]},{"cell_type":"code","source":"medicines['tags'] = medicines['Description'] + medicines['Reason'] ","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:28:23.479888Z","iopub.execute_input":"2022-05-26T15:28:23.480397Z","iopub.status.idle":"2022-05-26T15:28:23.492088Z","shell.execute_reply.started":"2022-05-26T15:28:23.480365Z","shell.execute_reply":"2022-05-26T15:28:23.491297Z"},"trusted":true},"execution_count":13,"outputs":[]},{"cell_type":"code","source":"new_df = medicines[['index','Drug_Name','tags']]","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:28:29.352375Z","iopub.execute_input":"2022-05-26T15:28:29.352803Z","iopub.status.idle":"2022-05-26T15:28:29.360610Z","shell.execute_reply.started":"2022-05-26T15:28:29.352771Z","shell.execute_reply":"2022-05-26T15:28:29.359860Z"},"trusted":true},"execution_count":14,"outputs":[]},{"cell_type":"code","source":"new_df","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:28:34.877949Z","iopub.execute_input":"2022-05-26T15:28:34.878349Z","iopub.status.idle":"2022-05-26T15:28:34.895458Z","shell.execute_reply.started":"2022-05-26T15:28:34.878317Z","shell.execute_reply":"2022-05-26T15:28:34.894661Z"},"trusted":true},"execution_count":15,"outputs":[]},{"cell_type":"code","source":"new_df['tags'].apply(lambda x:\" \".join(x))","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:28:40.536242Z","iopub.execute_input":"2022-05-26T15:28:40.536622Z","iopub.status.idle":"2022-05-26T15:28:40.555686Z","shell.execute_reply.started":"2022-05-26T15:28:40.536592Z","shell.execute_reply":"2022-05-26T15:28:40.554737Z"},"trusted":true},"execution_count":16,"outputs":[]},{"cell_type":"code","source":"new_df","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:28:45.841331Z","iopub.execute_input":"2022-05-26T15:28:45.841708Z","iopub.status.idle":"2022-05-26T15:28:45.860869Z","shell.execute_reply.started":"2022-05-26T15:28:45.841677Z","shell.execute_reply":"2022-05-26T15:28:45.859984Z"},"trusted":true},"execution_count":17,"outputs":[]},{"cell_type":"code","source":"new_df['tags'] = new_df['tags'].apply(lambda x:\" \".join(x))","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:28:53.401240Z","iopub.execute_input":"2022-05-26T15:28:53.401750Z","iopub.status.idle":"2022-05-26T15:28:53.416558Z","shell.execute_reply.started":"2022-05-26T15:28:53.401710Z","shell.execute_reply":"2022-05-26T15:28:53.415509Z"},"trusted":true},"execution_count":18,"outputs":[]},{"cell_type":"code","source":"new_df","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:28:59.343330Z","iopub.execute_input":"2022-05-26T15:28:59.343874Z","iopub.status.idle":"2022-05-26T15:28:59.357037Z","shell.execute_reply.started":"2022-05-26T15:28:59.343830Z","shell.execute_reply":"2022-05-26T15:28:59.356307Z"},"trusted":true},"execution_count":19,"outputs":[]},{"cell_type":"code","source":"new_df['tags'] = new_df['tags'].apply(lambda x:x.lower())","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:29:03.967097Z","iopub.execute_input":"2022-05-26T15:29:03.967503Z","iopub.status.idle":"2022-05-26T15:29:03.980230Z","shell.execute_reply.started":"2022-05-26T15:29:03.967459Z","shell.execute_reply":"2022-05-26T15:29:03.979538Z"},"trusted":true},"execution_count":20,"outputs":[]},{"cell_type":"code","source":"new_df","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:29:09.899763Z","iopub.execute_input":"2022-05-26T15:29:09.900261Z","iopub.status.idle":"2022-05-26T15:29:09.913513Z","shell.execute_reply.started":"2022-05-26T15:29:09.900231Z","shell.execute_reply":"2022-05-26T15:29:09.912614Z"},"trusted":true},"execution_count":21,"outputs":[]},{"cell_type":"code","source":"!pip install nltk","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:29:15.033480Z","iopub.execute_input":"2022-05-26T15:29:15.034557Z","iopub.status.idle":"2022-05-26T15:29:32.260361Z","shell.execute_reply.started":"2022-05-26T15:29:15.034474Z","shell.execute_reply":"2022-05-26T15:29:32.259461Z"},"trusted":true},"execution_count":22,"outputs":[]},{"cell_type":"code","source":"import nltk","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:29:32.262070Z","iopub.execute_input":"2022-05-26T15:29:32.262390Z","iopub.status.idle":"2022-05-26T15:29:33.396808Z","shell.execute_reply.started":"2022-05-26T15:29:32.262358Z","shell.execute_reply":"2022-05-26T15:29:33.395776Z"},"trusted":true},"execution_count":23,"outputs":[]},{"cell_type":"code","source":"from nltk.stem.porter import PorterStemmer\nps = PorterStemmer()","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:29:44.930510Z","iopub.execute_input":"2022-05-26T15:29:44.930999Z","iopub.status.idle":"2022-05-26T15:29:44.935856Z","shell.execute_reply.started":"2022-05-26T15:29:44.930950Z","shell.execute_reply":"2022-05-26T15:29:44.934888Z"},"trusted":true},"execution_count":24,"outputs":[]},{"cell_type":"code","source":"!pip install -U scikit-learn scipy matplotlib","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:29:57.554233Z","iopub.execute_input":"2022-05-26T15:29:57.554659Z","iopub.status.idle":"2022-05-26T15:30:14.886756Z","shell.execute_reply.started":"2022-05-26T15:29:57.554624Z","shell.execute_reply":"2022-05-26T15:30:14.885564Z"},"trusted":true},"execution_count":25,"outputs":[]},{"cell_type":"code","source":"from sklearn.feature_extraction.text import CountVectorizer\ncv = CountVectorizer(stop_words='english',max_features=5000)","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:30:19.262540Z","iopub.execute_input":"2022-05-26T15:30:19.262947Z","iopub.status.idle":"2022-05-26T15:30:19.267598Z","shell.execute_reply.started":"2022-05-26T15:30:19.262907Z","shell.execute_reply":"2022-05-26T15:30:19.266709Z"},"trusted":true},"execution_count":27,"outputs":[]},{"cell_type":"code","source":"def stem(text):\n y = []\n\n for i in text.split():\n y.append(ps.stem(i))\n\n return \" \".join(y) ","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:30:34.110440Z","iopub.execute_input":"2022-05-26T15:30:34.110883Z","iopub.status.idle":"2022-05-26T15:30:34.115964Z","shell.execute_reply.started":"2022-05-26T15:30:34.110851Z","shell.execute_reply":"2022-05-26T15:30:34.115054Z"},"trusted":true},"execution_count":28,"outputs":[]},{"cell_type":"code","source":"new_df['tags'] = new_df['tags'].apply(stem)","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:30:40.637709Z","iopub.execute_input":"2022-05-26T15:30:40.638132Z","iopub.status.idle":"2022-05-26T15:30:43.759883Z","shell.execute_reply.started":"2022-05-26T15:30:40.638098Z","shell.execute_reply":"2022-05-26T15:30:43.758904Z"},"trusted":true},"execution_count":29,"outputs":[]},{"cell_type":"code","source":"cv.fit_transform(new_df['tags']).toarray().shape","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:30:47.868441Z","iopub.execute_input":"2022-05-26T15:30:47.868994Z","iopub.status.idle":"2022-05-26T15:30:48.096978Z","shell.execute_reply.started":"2022-05-26T15:30:47.868951Z","shell.execute_reply":"2022-05-26T15:30:48.096104Z"},"trusted":true},"execution_count":30,"outputs":[]},{"cell_type":"code","source":"vectors = cv.fit_transform(new_df['tags']).toarray()","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:30:54.494898Z","iopub.execute_input":"2022-05-26T15:30:54.495313Z","iopub.status.idle":"2022-05-26T15:30:54.713078Z","shell.execute_reply.started":"2022-05-26T15:30:54.495280Z","shell.execute_reply":"2022-05-26T15:30:54.712369Z"},"trusted":true},"execution_count":31,"outputs":[]},{"cell_type":"code","source":"cv.get_feature_names()","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:30:59.892872Z","iopub.execute_input":"2022-05-26T15:30:59.893280Z","iopub.status.idle":"2022-05-26T15:30:59.916357Z","shell.execute_reply.started":"2022-05-26T15:30:59.893247Z","shell.execute_reply":"2022-05-26T15:30:59.915644Z"},"trusted":true},"execution_count":32,"outputs":[]},{"cell_type":"code","source":"from sklearn.metrics.pairwise import cosine_similarity","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:31:06.363737Z","iopub.execute_input":"2022-05-26T15:31:06.364175Z","iopub.status.idle":"2022-05-26T15:31:06.368475Z","shell.execute_reply.started":"2022-05-26T15:31:06.364142Z","shell.execute_reply":"2022-05-26T15:31:06.367582Z"},"trusted":true},"execution_count":33,"outputs":[]},{"cell_type":"code","source":"cosine_similarity(vectors)","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:31:12.307555Z","iopub.execute_input":"2022-05-26T15:31:12.307965Z","iopub.status.idle":"2022-05-26T15:31:13.947035Z","shell.execute_reply.started":"2022-05-26T15:31:12.307931Z","shell.execute_reply":"2022-05-26T15:31:13.946057Z"},"trusted":true},"execution_count":34,"outputs":[]},{"cell_type":"code","source":"similarity = cosine_similarity(vectors)","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:31:20.837383Z","iopub.execute_input":"2022-05-26T15:31:20.837817Z","iopub.status.idle":"2022-05-26T15:31:22.665373Z","shell.execute_reply.started":"2022-05-26T15:31:20.837784Z","shell.execute_reply":"2022-05-26T15:31:22.663586Z"},"trusted":true},"execution_count":35,"outputs":[]},{"cell_type":"code","source":"similarity[1]","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:31:25.761176Z","iopub.execute_input":"2022-05-26T15:31:25.762261Z","iopub.status.idle":"2022-05-26T15:31:25.771667Z","shell.execute_reply.started":"2022-05-26T15:31:25.762222Z","shell.execute_reply":"2022-05-26T15:31:25.770795Z"},"trusted":true},"execution_count":36,"outputs":[]},{"cell_type":"code","source":"def recommend(medicine):\n medicine_index = new_df[new_df['Drug_Name'] == medicine].index[0]\n distances = similarity[medicine_index]\n medicines_list = sorted(list(enumerate(distances)),reverse=True,key=lambda x:x[1])[1:6]\n \n for i in medicines_list:\n print(new_df.iloc[i[0]].Drug_Name)\n ","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:31:40.158323Z","iopub.execute_input":"2022-05-26T15:31:40.159068Z","iopub.status.idle":"2022-05-26T15:31:40.166757Z","shell.execute_reply.started":"2022-05-26T15:31:40.159020Z","shell.execute_reply":"2022-05-26T15:31:40.165868Z"},"trusted":true},"execution_count":37,"outputs":[]},{"cell_type":"code","source":"recommend(\"Paracetamol 125mg Syrup 60mlParacetamol 500mg Tablet 10'S\")","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:31:57.565934Z","iopub.execute_input":"2022-05-26T15:31:57.566701Z","iopub.status.idle":"2022-05-26T15:31:57.589837Z","shell.execute_reply.started":"2022-05-26T15:31:57.566659Z","shell.execute_reply":"2022-05-26T15:31:57.588991Z"},"trusted":true},"execution_count":38,"outputs":[]},{"cell_type":"code","source":"import pickle","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:32:35.446511Z","iopub.execute_input":"2022-05-26T15:32:35.447060Z","iopub.status.idle":"2022-05-26T15:32:35.451544Z","shell.execute_reply.started":"2022-05-26T15:32:35.447016Z","shell.execute_reply":"2022-05-26T15:32:35.450612Z"},"trusted":true},"execution_count":39,"outputs":[]},{"cell_type":"code","source":"pickle.dump(new_df.to_dict(),open('medicine_dict.pkl','wb'))\npickle.dump(similarity,open('similarity.pkl','wb'))","metadata":{"execution":{"iopub.status.busy":"2022-05-26T15:32:53.661648Z","iopub.execute_input":"2022-05-26T15:32:53.662097Z","iopub.status.idle":"2022-05-26T15:32:56.474199Z","shell.execute_reply.started":"2022-05-26T15:32:53.662065Z","shell.execute_reply":"2022-05-26T15:32:56.473238Z"},"trusted":true},"execution_count":40,"outputs":[]}]}
medicine.csv ADDED
The diff for this file is too large to render. See raw diff
 
medicine_dict.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdeb69897fd2f95b2d148ed1e82c3e5d265d2f3da407f3f8229195af99ed107c
3
+ size 1313894
similarity.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddf82c866d523bd59cbd9b3adb6cf01562a2a5ffebe5b326724b4c21e360380d
3
+ size 755827363