{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## 1.Required packages" ] }, { "cell_type": "code", "execution_count": 119, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from scipy.sparse import csr_matrix\n", "from sklearn.neighbors import NearestNeighbors\n", "import pickle" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2.Load data" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\USER\\AppData\\Local\\Temp\\ipykernel_8204\\1552887261.py:1: DtypeWarning: Columns (3) have mixed types. Specify dtype option on import or set low_memory=False.\n", " book_df=pd.read_csv(\"books.csv\")\n" ] } ], "source": [ "book_df=pd.read_csv(\"books.csv\")\n", "users_df = pd.read_csv(\"Users.csv\")\n", "rating_df = pd.read_csv(\"Ratings.csv\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3.Basic info" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(271360, 8)\n", "(278858, 3)\n", "(1149780, 3)\n" ] } ], "source": [ "print(book_df.shape)\n", "print(users_df.shape)\n", "print(rating_df.shape)\n" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 271360 entries, 0 to 271359\n", "Data columns (total 8 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 ISBN 271360 non-null object\n", " 1 Book-Title 271360 non-null object\n", " 2 Book-Author 271358 non-null object\n", " 3 Year-Of-Publication 271360 non-null object\n", " 4 Publisher 271358 non-null object\n", " 5 Image-URL-S 271360 non-null object\n", " 6 Image-URL-M 271360 non-null object\n", " 7 Image-URL-L 271357 non-null object\n", "dtypes: object(8)\n", "memory usage: 16.6+ MB\n", "None\n", "====================================================\n", "\n", "RangeIndex: 278858 entries, 0 to 278857\n", "Data columns (total 3 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 User-ID 278858 non-null int64 \n", " 1 Location 278858 non-null object \n", " 2 Age 168096 non-null float64\n", "dtypes: float64(1), int64(1), object(1)\n", "memory usage: 6.4+ MB\n", "None\n", "====================================================\n", "\n", "RangeIndex: 1149780 entries, 0 to 1149779\n", "Data columns (total 3 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 User-ID 1149780 non-null int64 \n", " 1 ISBN 1149780 non-null object\n", " 2 Book-Rating 1149780 non-null int64 \n", "dtypes: int64(2), object(1)\n", "memory usage: 26.3+ MB\n", "None\n", "====================================================\n" ] } ], "source": [ "print(book_df.info())\n", "print(\"====================================================\")\n", "print(users_df.info())\n", "print(\"====================================================\")\n", "print(rating_df.info())\n", "print(\"====================================================\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4.Preprocess (EDA)" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [], "source": [ "book_df.drop([\"Image-URL-S\",\"Image-URL-M\",\"Image-URL-L\"],axis=1,inplace=True)" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ISBNBook-TitleBook-AuthorYear-Of-PublicationPublisher
00195153448Classical MythologyMark P. O. Morford2002Oxford University Press
10002005018Clara CallanRichard Bruce Wright2001HarperFlamingo Canada
20060973129Decision in NormandyCarlo D'Este1991HarperPerennial
30374157065Flu: The Story of the Great Influenza Pandemic...Gina Bari Kolata1999Farrar Straus Giroux
40393045218The Mummies of UrumchiE. J. W. Barber1999W. W. Norton & Company
\n", "
" ], "text/plain": [ " ISBN Book-Title \\\n", "0 0195153448 Classical Mythology \n", "1 0002005018 Clara Callan \n", "2 0060973129 Decision in Normandy \n", "3 0374157065 Flu: The Story of the Great Influenza Pandemic... \n", "4 0393045218 The Mummies of Urumchi \n", "\n", " Book-Author Year-Of-Publication Publisher \n", "0 Mark P. O. Morford 2002 Oxford University Press \n", "1 Richard Bruce Wright 2001 HarperFlamingo Canada \n", "2 Carlo D'Este 1991 HarperPerennial \n", "3 Gina Bari Kolata 1999 Farrar Straus Giroux \n", "4 E. J. W. Barber 1999 W. W. Norton & Company " ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "book_df.head()" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
User-IDLocationAge
01nyc, new york, usaNaN
12stockton, california, usa18.0
23moscow, yukon territory, russiaNaN
34porto, v.n.gaia, portugal17.0
45farnborough, hants, united kingdomNaN
\n", "
" ], "text/plain": [ " User-ID Location Age\n", "0 1 nyc, new york, usa NaN\n", "1 2 stockton, california, usa 18.0\n", "2 3 moscow, yukon territory, russia NaN\n", "3 4 porto, v.n.gaia, portugal 17.0\n", "4 5 farnborough, hants, united kingdom NaN" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "users_df.head(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4.1 checking for null values" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "User-ID 0\n", "Location 0\n", "Age 110762\n", "dtype: int64" ] }, "execution_count": 90, "metadata": {}, "output_type": "execute_result" } ], "source": [ "users_df.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "278858" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "users_df[\"User-ID\"].nunique()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- total 278858 users." ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
User-IDISBNBook-Rating
0276725034545104X0
127672601550612245
\n", "
" ], "text/plain": [ " User-ID ISBN Book-Rating\n", "0 276725 034545104X 0\n", "1 276726 0155061224 5" ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rating_df.head(2)" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "User-ID\n", "11676 13602\n", "198711 7550\n", "153662 6109\n", "98391 5891\n", "35859 5850\n", " ... \n", "274808 201\n", "28634 201\n", "59727 201\n", "268622 201\n", "188951 201\n", "Name: count, Length: 899, dtype: int64" ] }, "execution_count": 96, "metadata": {}, "output_type": "execute_result" } ], "source": [ "review_count=rating_df[\"User-ID\"].value_counts()\n", "msot_review_users=review_count[review_count>200]\n", "msot_review_users" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4.2 Finding more then 200 books riview users" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
User-IDISBNBook-Rating
1456277427002542730X10
145727742700262174570
1458277427003008685X8
145927742700306153210
146027742700600020500
\n", "
" ], "text/plain": [ " User-ID ISBN Book-Rating\n", "1456 277427 002542730X 10\n", "1457 277427 0026217457 0\n", "1458 277427 003008685X 8\n", "1459 277427 0030615321 0\n", "1460 277427 0060002050 0" ] }, "execution_count": 97, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# this are the info who are rated more then 200 books \n", "ratings=rating_df[rating_df[\"User-ID\"].isin(msot_review_users.index)]\n", "ratings.head()" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(526356, 3)" ] }, "execution_count": 98, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ratings.shape" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ISBNBook-TitleBook-AuthorYear-Of-PublicationPublisher
00195153448Classical MythologyMark P. O. Morford2002Oxford University Press
10002005018Clara CallanRichard Bruce Wright2001HarperFlamingo Canada
20060973129Decision in NormandyCarlo D'Este1991HarperPerennial
30374157065Flu: The Story of the Great Influenza Pandemic...Gina Bari Kolata1999Farrar Straus Giroux
40393045218The Mummies of UrumchiE. J. W. Barber1999W. W. Norton & Company
\n", "
" ], "text/plain": [ " ISBN Book-Title \\\n", "0 0195153448 Classical Mythology \n", "1 0002005018 Clara Callan \n", "2 0060973129 Decision in Normandy \n", "3 0374157065 Flu: The Story of the Great Influenza Pandemic... \n", "4 0393045218 The Mummies of Urumchi \n", "\n", " Book-Author Year-Of-Publication Publisher \n", "0 Mark P. O. Morford 2002 Oxford University Press \n", "1 Richard Bruce Wright 2001 HarperFlamingo Canada \n", "2 Carlo D'Este 1991 HarperPerennial \n", "3 Gina Bari Kolata 1999 Farrar Straus Giroux \n", "4 E. J. W. Barber 1999 W. W. Norton & Company " ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ "book_df.head()" ] }, { "cell_type": "code", "execution_count": 100, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
User-IDISBNBook-RatingBook-TitleBook-AuthorYear-Of-PublicationPublisher
0277427002542730X10Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inc
127742700262174570Vegetarian Times Complete CookbookLucy Moll1995John Wiley & Sons
\n", "
" ], "text/plain": [ " User-ID ISBN Book-Rating \\\n", "0 277427 002542730X 10 \n", "1 277427 0026217457 0 \n", "\n", " Book-Title Book-Author \\\n", "0 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner \n", "1 Vegetarian Times Complete Cookbook Lucy Moll \n", "\n", " Year-Of-Publication Publisher \n", "0 1994 John Wiley & Sons Inc \n", "1 1995 John Wiley & Sons " ] }, "execution_count": 100, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rating_with_books = ratings.merge(book_df, on=\"ISBN\")\n", "rating_with_books.head(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4.3 Finding of cout for each book" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Book-Titlenum_of_rating
0A Light in the Storm: The Civil War Diary of ...2
1Always Have Popsicles1
2Apple Magic (The Collector's series)1
3Beyond IBM: Leadership Marketing and Finance ...1
4Clifford Visita El Hospital (Clifford El Gran...1
\n", "
" ], "text/plain": [ " Book-Title num_of_rating\n", "0 A Light in the Storm: The Civil War Diary of ... 2\n", "1 Always Have Popsicles 1\n", "2 Apple Magic (The Collector's series) 1\n", "3 Beyond IBM: Leadership Marketing and Finance ... 1\n", "4 Clifford Visita El Hospital (Clifford El Gran... 1" ] }, "execution_count": 102, "metadata": {}, "output_type": "execute_result" } ], "source": [ "num_rating = rating_with_books.groupby(\"Book-Title\")[\"Book-Rating\"].count().reset_index()\n", "num_rating.rename(columns={\"Book-Rating\":\"num_of_rating\"},inplace=True)\n", "num_rating.head(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4.4 Merge rating with book and number of ratings" ] }, { "cell_type": "code", "execution_count": 103, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
User-IDISBNBook-RatingBook-TitleBook-AuthorYear-Of-PublicationPublishernum_of_rating
0277427002542730X10Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inc82
127742700262174570Vegetarian Times Complete CookbookLucy Moll1995John Wiley & Sons7
2277427003008685X8PioneersJames Fenimore Cooper1974Thomson Learning1
327742700306153210Ask for May, Settle for June (A Doonesbury book)G. B. Trudeau1982Henry Holt & Co1
427742700600020500On a Wicked Dawn (Cynster Novels)Stephanie Laurens2002Avon Books13
\n", "
" ], "text/plain": [ " User-ID ISBN Book-Rating \\\n", "0 277427 002542730X 10 \n", "1 277427 0026217457 0 \n", "2 277427 003008685X 8 \n", "3 277427 0030615321 0 \n", "4 277427 0060002050 0 \n", "\n", " Book-Title Book-Author \\\n", "0 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner \n", "1 Vegetarian Times Complete Cookbook Lucy Moll \n", "2 Pioneers James Fenimore Cooper \n", "3 Ask for May, Settle for June (A Doonesbury book) G. B. Trudeau \n", "4 On a Wicked Dawn (Cynster Novels) Stephanie Laurens \n", "\n", " Year-Of-Publication Publisher num_of_rating \n", "0 1994 John Wiley & Sons Inc 82 \n", "1 1995 John Wiley & Sons 7 \n", "2 1974 Thomson Learning 1 \n", "3 1982 Henry Holt & Co 1 \n", "4 2002 Avon Books 13 " ] }, "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ "final_rating=rating_with_books.merge(num_rating, on=\"Book-Title\")\n", "final_rating.head()" ] }, { "cell_type": "code", "execution_count": 104, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(487671, 8)" ] }, "execution_count": 104, "metadata": {}, "output_type": "execute_result" } ], "source": [ "final_rating.shape" ] }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
User-IDISBNBook-RatingBook-TitleBook-AuthorYear-Of-PublicationPublishernum_of_rating
0277427002542730X10Politically Correct Bedtime Stories: Modern Ta...James Finn Garner1994John Wiley & Sons Inc82
1327742700609305350The Poisonwood Bible: A NovelBarbara Kingsolver1999Perennial133
1527742700609344170Bel Canto: A NovelAnn Patchett2002Perennial108
\n", "
" ], "text/plain": [ " User-ID ISBN Book-Rating \\\n", "0 277427 002542730X 10 \n", "13 277427 0060930535 0 \n", "15 277427 0060934417 0 \n", "\n", " Book-Title Book-Author \\\n", "0 Politically Correct Bedtime Stories: Modern Ta... James Finn Garner \n", "13 The Poisonwood Bible: A Novel Barbara Kingsolver \n", "15 Bel Canto: A Novel Ann Patchett \n", "\n", " Year-Of-Publication Publisher num_of_rating \n", "0 1994 John Wiley & Sons Inc 82 \n", "13 1999 Perennial 133 \n", "15 2002 Perennial 108 " ] }, "execution_count": 105, "metadata": {}, "output_type": "execute_result" } ], "source": [ "final_rating = final_rating[final_rating[\"num_of_rating\"]>=50]\n", "final_rating.head(3)" ] }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(61853, 8)" ] }, "execution_count": 106, "metadata": {}, "output_type": "execute_result" } ], "source": [ "final_rating.shape" ] }, { "cell_type": "code", "execution_count": 108, "metadata": {}, "outputs": [], "source": [ "final_rating.drop_duplicates([\"User-ID\",\"Book-Title\"],inplace=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(59850, 8)" ] }, "execution_count": 109, "metadata": {}, "output_type": "execute_result" } ], "source": [ "final_rating.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4.5 Creating pivot table" ] }, { "cell_type": "code", "execution_count": 110, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
User-ID254227627662977336337574017438562426251...274004274061274301274308274808275970277427277478277639278418
Book-Title
19849.0NaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaN0.0NaNNaNNaNNaN
1st to Die: A NovelNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2nd ChanceNaN10.0NaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaN0.0NaNNaNNaNNaN0.0NaN
4 BlondesNaNNaNNaNNaNNaNNaNNaNNaNNaN0.0...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
84 Charing Cross RoadNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaN10.0NaNNaNNaNNaN
..................................................................
Year of WondersNaNNaNNaN7.0NaNNaNNaNNaN7.0NaN...NaNNaNNaNNaNNaN0.0NaNNaNNaNNaN
You Belong To MeNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Zen and the Art of Motorcycle Maintenance: An Inquiry into ValuesNaNNaNNaNNaN0.0NaNNaNNaNNaN0.0...NaNNaNNaNNaNNaN0.0NaNNaNNaNNaN
ZoyaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\\O\\\" Is for Outlaw\"NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN8.0NaNNaNNaNNaNNaNNaNNaN
\n", "

742 rows × 888 columns

\n", "
" ], "text/plain": [ "User-ID 254 2276 2766 \\\n", "Book-Title \n", "1984 9.0 NaN NaN \n", "1st to Die: A Novel NaN NaN NaN \n", "2nd Chance NaN 10.0 NaN \n", "4 Blondes NaN NaN NaN \n", "84 Charing Cross Road NaN NaN NaN \n", "... ... ... ... \n", "Year of Wonders NaN NaN NaN \n", "You Belong To Me NaN NaN NaN \n", "Zen and the Art of Motorcycle Maintenance: An I... NaN NaN NaN \n", "Zoya NaN NaN NaN \n", "\\O\\\" Is for Outlaw\" NaN NaN NaN \n", "\n", "User-ID 2977 3363 3757 \\\n", "Book-Title \n", "1984 NaN NaN NaN \n", "1st to Die: A Novel NaN NaN NaN \n", "2nd Chance NaN NaN NaN \n", "4 Blondes NaN NaN NaN \n", "84 Charing Cross Road NaN NaN NaN \n", "... ... ... ... \n", "Year of Wonders 7.0 NaN NaN \n", "You Belong To Me NaN NaN NaN \n", "Zen and the Art of Motorcycle Maintenance: An I... NaN 0.0 NaN \n", "Zoya NaN NaN NaN \n", "\\O\\\" Is for Outlaw\" NaN NaN NaN \n", "\n", "User-ID 4017 4385 6242 \\\n", "Book-Title \n", "1984 NaN NaN NaN \n", "1st to Die: A Novel NaN NaN NaN \n", "2nd Chance NaN NaN NaN \n", "4 Blondes NaN NaN NaN \n", "84 Charing Cross Road NaN NaN NaN \n", "... ... ... ... \n", "Year of Wonders NaN NaN 7.0 \n", "You Belong To Me NaN NaN NaN \n", "Zen and the Art of Motorcycle Maintenance: An I... NaN NaN NaN \n", "Zoya NaN NaN NaN \n", "\\O\\\" Is for Outlaw\" NaN NaN NaN \n", "\n", "User-ID 6251 ... 274004 \\\n", "Book-Title ... \n", "1984 NaN ... NaN \n", "1st to Die: A Novel NaN ... NaN \n", "2nd Chance NaN ... NaN \n", "4 Blondes 0.0 ... NaN \n", "84 Charing Cross Road NaN ... NaN \n", "... ... ... ... \n", "Year of Wonders NaN ... NaN \n", "You Belong To Me NaN ... NaN \n", "Zen and the Art of Motorcycle Maintenance: An I... 0.0 ... NaN \n", "Zoya NaN ... NaN \n", "\\O\\\" Is for Outlaw\" NaN ... NaN \n", "\n", "User-ID 274061 274301 274308 \\\n", "Book-Title \n", "1984 NaN NaN NaN \n", "1st to Die: A Novel NaN NaN NaN \n", "2nd Chance NaN NaN 0.0 \n", "4 Blondes NaN NaN NaN \n", "84 Charing Cross Road NaN NaN NaN \n", "... ... ... ... \n", "Year of Wonders NaN NaN NaN \n", "You Belong To Me NaN NaN NaN \n", "Zen and the Art of Motorcycle Maintenance: An I... NaN NaN NaN \n", "Zoya NaN NaN NaN \n", "\\O\\\" Is for Outlaw\" NaN 8.0 NaN \n", "\n", "User-ID 274808 275970 277427 \\\n", "Book-Title \n", "1984 NaN 0.0 NaN \n", "1st to Die: A Novel NaN NaN NaN \n", "2nd Chance NaN NaN NaN \n", "4 Blondes NaN NaN NaN \n", "84 Charing Cross Road NaN 10.0 NaN \n", "... ... ... ... \n", "Year of Wonders NaN 0.0 NaN \n", "You Belong To Me NaN NaN NaN \n", "Zen and the Art of Motorcycle Maintenance: An I... NaN 0.0 NaN \n", "Zoya NaN NaN NaN \n", "\\O\\\" Is for Outlaw\" NaN NaN NaN \n", "\n", "User-ID 277478 277639 278418 \n", "Book-Title \n", "1984 NaN NaN NaN \n", "1st to Die: A Novel NaN NaN NaN \n", "2nd Chance NaN 0.0 NaN \n", "4 Blondes NaN NaN NaN \n", "84 Charing Cross Road NaN NaN NaN \n", "... ... ... ... \n", "Year of Wonders NaN NaN NaN \n", "You Belong To Me NaN NaN NaN \n", "Zen and the Art of Motorcycle Maintenance: An I... NaN NaN NaN \n", "Zoya NaN NaN NaN \n", "\\O\\\" Is for Outlaw\" NaN NaN NaN \n", "\n", "[742 rows x 888 columns]" ] }, "execution_count": 110, "metadata": {}, "output_type": "execute_result" } ], "source": [ "book_pivot_tale = final_rating.pivot_table(columns=\"User-ID\",\n", " index=\"Book-Title\",\n", " values=\"Book-Rating\")\n", "\n", "book_pivot_tale" ] }, { "cell_type": "code", "execution_count": 112, "metadata": {}, "outputs": [], "source": [ "book_pivot_tale.fillna(0,inplace=True)" ] }, { "cell_type": "code", "execution_count": 113, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
User-ID254227627662977336337574017438562426251...274004274061274301274308274808275970277427277478277639278418
Book-Title
19849.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
1st to Die: A Novel0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
2nd Chance0.010.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
4 Blondes0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
84 Charing Cross Road0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.010.00.00.00.00.0
..................................................................
Year of Wonders0.00.00.07.00.00.00.00.07.00.0...0.00.00.00.00.00.00.00.00.00.0
You Belong To Me0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
Zen and the Art of Motorcycle Maintenance: An Inquiry into Values0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
Zoya0.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\\O\\\" Is for Outlaw\"0.00.00.00.00.00.00.00.00.00.0...0.00.08.00.00.00.00.00.00.00.0
\n", "

742 rows × 888 columns

\n", "
" ], "text/plain": [ "User-ID 254 2276 2766 \\\n", "Book-Title \n", "1984 9.0 0.0 0.0 \n", "1st to Die: A Novel 0.0 0.0 0.0 \n", "2nd Chance 0.0 10.0 0.0 \n", "4 Blondes 0.0 0.0 0.0 \n", "84 Charing Cross Road 0.0 0.0 0.0 \n", "... ... ... ... \n", "Year of Wonders 0.0 0.0 0.0 \n", "You Belong To Me 0.0 0.0 0.0 \n", "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n", "Zoya 0.0 0.0 0.0 \n", "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n", "\n", "User-ID 2977 3363 3757 \\\n", "Book-Title \n", "1984 0.0 0.0 0.0 \n", "1st to Die: A Novel 0.0 0.0 0.0 \n", "2nd Chance 0.0 0.0 0.0 \n", "4 Blondes 0.0 0.0 0.0 \n", "84 Charing Cross Road 0.0 0.0 0.0 \n", "... ... ... ... \n", "Year of Wonders 7.0 0.0 0.0 \n", "You Belong To Me 0.0 0.0 0.0 \n", "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n", "Zoya 0.0 0.0 0.0 \n", "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n", "\n", "User-ID 4017 4385 6242 \\\n", "Book-Title \n", "1984 0.0 0.0 0.0 \n", "1st to Die: A Novel 0.0 0.0 0.0 \n", "2nd Chance 0.0 0.0 0.0 \n", "4 Blondes 0.0 0.0 0.0 \n", "84 Charing Cross Road 0.0 0.0 0.0 \n", "... ... ... ... \n", "Year of Wonders 0.0 0.0 7.0 \n", "You Belong To Me 0.0 0.0 0.0 \n", "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n", "Zoya 0.0 0.0 0.0 \n", "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n", "\n", "User-ID 6251 ... 274004 \\\n", "Book-Title ... \n", "1984 0.0 ... 0.0 \n", "1st to Die: A Novel 0.0 ... 0.0 \n", "2nd Chance 0.0 ... 0.0 \n", "4 Blondes 0.0 ... 0.0 \n", "84 Charing Cross Road 0.0 ... 0.0 \n", "... ... ... ... \n", "Year of Wonders 0.0 ... 0.0 \n", "You Belong To Me 0.0 ... 0.0 \n", "Zen and the Art of Motorcycle Maintenance: An I... 0.0 ... 0.0 \n", "Zoya 0.0 ... 0.0 \n", "\\O\\\" Is for Outlaw\" 0.0 ... 0.0 \n", "\n", "User-ID 274061 274301 274308 \\\n", "Book-Title \n", "1984 0.0 0.0 0.0 \n", "1st to Die: A Novel 0.0 0.0 0.0 \n", "2nd Chance 0.0 0.0 0.0 \n", "4 Blondes 0.0 0.0 0.0 \n", "84 Charing Cross Road 0.0 0.0 0.0 \n", "... ... ... ... \n", "Year of Wonders 0.0 0.0 0.0 \n", "You Belong To Me 0.0 0.0 0.0 \n", "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n", "Zoya 0.0 0.0 0.0 \n", "\\O\\\" Is for Outlaw\" 0.0 8.0 0.0 \n", "\n", "User-ID 274808 275970 277427 \\\n", "Book-Title \n", "1984 0.0 0.0 0.0 \n", "1st to Die: A Novel 0.0 0.0 0.0 \n", "2nd Chance 0.0 0.0 0.0 \n", "4 Blondes 0.0 0.0 0.0 \n", "84 Charing Cross Road 0.0 10.0 0.0 \n", "... ... ... ... \n", "Year of Wonders 0.0 0.0 0.0 \n", "You Belong To Me 0.0 0.0 0.0 \n", "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n", "Zoya 0.0 0.0 0.0 \n", "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n", "\n", "User-ID 277478 277639 278418 \n", "Book-Title \n", "1984 0.0 0.0 0.0 \n", "1st to Die: A Novel 0.0 0.0 0.0 \n", "2nd Chance 0.0 0.0 0.0 \n", "4 Blondes 0.0 0.0 0.0 \n", "84 Charing Cross Road 0.0 0.0 0.0 \n", "... ... ... ... \n", "Year of Wonders 0.0 0.0 0.0 \n", "You Belong To Me 0.0 0.0 0.0 \n", "Zen and the Art of Motorcycle Maintenance: An I... 0.0 0.0 0.0 \n", "Zoya 0.0 0.0 0.0 \n", "\\O\\\" Is for Outlaw\" 0.0 0.0 0.0 \n", "\n", "[742 rows x 888 columns]" ] }, "execution_count": 113, "metadata": {}, "output_type": "execute_result" } ], "source": [ "book_pivot_tale" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4.6 scipy matrix creation" ] }, { "cell_type": "code", "execution_count": 118, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<742x888 sparse matrix of type ''\n", "\twith 14961 stored elements in Compressed Sparse Row format>" ] }, "execution_count": 118, "metadata": {}, "output_type": "execute_result" } ], "source": [ "books_sparse_table = csr_matrix(book_pivot_tale)\n", "books_sparse_table" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 5. Model Building" ] }, { "cell_type": "code", "execution_count": 124, "metadata": {}, "outputs": [], "source": [ "model = NearestNeighbors(algorithm=\"brute\")" ] }, { "cell_type": "code", "execution_count": 125, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
NearestNeighbors(algorithm='brute')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "NearestNeighbors(algorithm='brute')" ] }, "execution_count": 125, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit(books_sparse_table)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 6. suggestions" ] }, { "cell_type": "code", "execution_count": 130, "metadata": {}, "outputs": [], "source": [ "distance,suggestion=model.kneighbors(book_pivot_tale.iloc[200,:].values.reshape(1,-1),n_neighbors=6)" ] }, { "cell_type": "code", "execution_count": 133, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 0. , 22.24859546, 22.60530911, 23.43074903, 23.68543856,\n", " 24.14539294]])" ] }, "execution_count": 133, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# distance of simpilar books\n", "distance" ] }, { "cell_type": "code", "execution_count": 135, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[200, 372, 485, 320, 184, 536]], dtype=int64)" ] }, "execution_count": 135, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# book id suggestions \n", "suggestion" ] }, { "cell_type": "code", "execution_count": 137, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['Fatal Cure', 'No Safe Place', 'Table For Two', 'Long After Midnight',\n", " 'Exclusive', 'The Cradle Will Fall'],\n", " dtype='object', name='Book-Title')\n" ] } ], "source": [ "for i in suggestion:\n", " print(book_pivot_tale.index[i])" ] }, { "cell_type": "code", "execution_count": 139, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['1984', '1st to Die: A Novel', '2nd Chance', '4 Blondes',\n", " '84 Charing Cross Road', 'A Bend in the Road', 'A Case of Need',\n", " 'A Child Called \\It\\\": One Child's Courage to Survive\"',\n", " 'A Civil Action', 'A Cry In The Night',\n", " ...\n", " 'Winter Solstice', 'Wish You Well', 'Without Remorse',\n", " 'Wizard and Glass (The Dark Tower, Book 4)', 'Wuthering Heights',\n", " 'Year of Wonders', 'You Belong To Me',\n", " 'Zen and the Art of Motorcycle Maintenance: An Inquiry into Values',\n", " 'Zoya', '\\O\\\" Is for Outlaw\"'],\n", " dtype='object', name='Book-Title', length=742)" ] }, "execution_count": 139, "metadata": {}, "output_type": "execute_result" } ], "source": [ "book_names = book_pivot_tale.index\n", "book_names" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 7. Save the requirements" ] }, { "cell_type": "code", "execution_count": 142, "metadata": {}, "outputs": [], "source": [ "pickle.dump(model,open(\"model.pkl\",\"wb\"))\n", "pickle.dump(book_names,open(\"book_names.pkl\",\"wb\"))\n", "pickle.dump(final_rating,open(\"final_rating.pkl\",\"wb\"))\n", "pickle.dump(book_pivot_tale,open(\"book_pivot_tale.pkl\",\"wb\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 8. Creating recomandation" ] }, { "cell_type": "code", "execution_count": 150, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['Zoya', 'Fine Things', 'Exclusive', 'Secrets', 'The Cradle Will Fall',\n", " 'No Safe Place'],\n", " dtype='object', name='Book-Title')\n" ] } ], "source": [ "def recommended_book(book_name):\n", " book_id = np.where(book_pivot_tale.index==book_name)[0][0]\n", " distance,suggestion=model.kneighbors(book_pivot_tale.iloc[book_id,:].values.reshape(1,-1),n_neighbors=6)\n", " for i in suggestion:\n", " books=book_pivot_tale.index[i]\n", " print(books)\n", "\n", " \n", "recommended_book(\"Zoya\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 2 }