diff --git "a/data_exploration.ipynb" "b/data_exploration.ipynb" new file mode 100644--- /dev/null +++ "b/data_exploration.ipynb" @@ -0,0 +1,1634 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "3a564843", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | isbn13 | \n", + "isbn10 | \n", + "title | \n", + "subtitle | \n", + "authors | \n", + "categories | \n", + "thumbnail | \n", + "description | \n", + "published_year | \n", + "average_rating | \n", + "num_pages | \n", + "ratings_count | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "9780002005883 | \n", + "0002005883 | \n", + "Gilead | \n", + "NaN | \n", + "Marilynne Robinson | \n", + "Fiction | \n", + "http://books.google.com/books/content?id=KQZCP... | \n", + "A NOVEL THAT READERS and critics have been eag... | \n", + "2004.0 | \n", + "3.85 | \n", + "247.0 | \n", + "361.0 | \n", + "
| 1 | \n", + "9780002261982 | \n", + "0002261987 | \n", + "Spider's Web | \n", + "A Novel | \n", + "Charles Osborne;Agatha Christie | \n", + "Detective and mystery stories | \n", + "http://books.google.com/books/content?id=gA5GP... | \n", + "A new 'Christie for Christmas' -- a full-lengt... | \n", + "2000.0 | \n", + "3.83 | \n", + "241.0 | \n", + "5164.0 | \n", + "
| 2 | \n", + "9780006163831 | \n", + "0006163831 | \n", + "The One Tree | \n", + "NaN | \n", + "Stephen R. Donaldson | \n", + "American fiction | \n", + "http://books.google.com/books/content?id=OmQaw... | \n", + "Volume Two of Stephen Donaldson's acclaimed se... | \n", + "1982.0 | \n", + "3.97 | \n", + "479.0 | \n", + "172.0 | \n", + "
| 3 | \n", + "9780006178736 | \n", + "0006178731 | \n", + "Rage of angels | \n", + "NaN | \n", + "Sidney Sheldon | \n", + "Fiction | \n", + "http://books.google.com/books/content?id=FKo2T... | \n", + "A memorable, mesmerizing heroine Jennifer -- b... | \n", + "1993.0 | \n", + "3.93 | \n", + "512.0 | \n", + "29532.0 | \n", + "
| 4 | \n", + "9780006280897 | \n", + "0006280897 | \n", + "The Four Loves | \n", + "NaN | \n", + "Clive Staples Lewis | \n", + "Christian life | \n", + "http://books.google.com/books/content?id=XhQ5X... | \n", + "Lewis' work on the nature of love divides love... | \n", + "2002.0 | \n", + "4.15 | \n", + "170.0 | \n", + "33684.0 | \n", + "
| \n", + " | isbn13 | \n", + "isbn10 | \n", + "title | \n", + "subtitle | \n", + "authors | \n", + "categories | \n", + "thumbnail | \n", + "description | \n", + "published_year | \n", + "average_rating | \n", + "num_pages | \n", + "ratings_count | \n", + "missing_description | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "9780002005883 | \n", + "0002005883 | \n", + "Gilead | \n", + "NaN | \n", + "Marilynne Robinson | \n", + "Fiction | \n", + "http://books.google.com/books/content?id=KQZCP... | \n", + "A NOVEL THAT READERS and critics have been eag... | \n", + "2004.0 | \n", + "3.85 | \n", + "247.0 | \n", + "361.0 | \n", + "0 | \n", + "
| 1 | \n", + "9780002261982 | \n", + "0002261987 | \n", + "Spider's Web | \n", + "A Novel | \n", + "Charles Osborne;Agatha Christie | \n", + "Detective and mystery stories | \n", + "http://books.google.com/books/content?id=gA5GP... | \n", + "A new 'Christie for Christmas' -- a full-lengt... | \n", + "2000.0 | \n", + "3.83 | \n", + "241.0 | \n", + "5164.0 | \n", + "0 | \n", + "
| 2 | \n", + "9780006163831 | \n", + "0006163831 | \n", + "The One Tree | \n", + "NaN | \n", + "Stephen R. Donaldson | \n", + "American fiction | \n", + "http://books.google.com/books/content?id=OmQaw... | \n", + "Volume Two of Stephen Donaldson's acclaimed se... | \n", + "1982.0 | \n", + "3.97 | \n", + "479.0 | \n", + "172.0 | \n", + "0 | \n", + "
| 3 | \n", + "9780006178736 | \n", + "0006178731 | \n", + "Rage of angels | \n", + "NaN | \n", + "Sidney Sheldon | \n", + "Fiction | \n", + "http://books.google.com/books/content?id=FKo2T... | \n", + "A memorable, mesmerizing heroine Jennifer -- b... | \n", + "1993.0 | \n", + "3.93 | \n", + "512.0 | \n", + "29532.0 | \n", + "0 | \n", + "
| 4 | \n", + "9780006280897 | \n", + "0006280897 | \n", + "The Four Loves | \n", + "NaN | \n", + "Clive Staples Lewis | \n", + "Christian life | \n", + "http://books.google.com/books/content?id=XhQ5X... | \n", + "Lewis' work on the nature of love divides love... | \n", + "2002.0 | \n", + "4.15 | \n", + "170.0 | \n", + "33684.0 | \n", + "0 | \n", + "
| \n", + " | categories | \n", + "count | \n", + "
|---|---|---|
| 0 | \n", + "Fiction | \n", + "2523 | \n", + "
| 1 | \n", + "Juvenile Fiction | \n", + "534 | \n", + "
| 2 | \n", + "Biography & Autobiography | \n", + "391 | \n", + "
| 3 | \n", + "History | \n", + "258 | \n", + "
| 4 | \n", + "Literary Criticism | \n", + "164 | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "
| 280 | \n", + "Butlers | \n", + "1 | \n", + "
| 279 | \n", + "Gardens | \n", + "1 | \n", + "
| 278 | \n", + "Assassins | \n", + "1 | \n", + "
| 277 | \n", + "Married people | \n", + "1 | \n", + "
| 530 | \n", + "Indic fiction (English) | \n", + "1 | \n", + "
531 rows × 2 columns
\n", + "| \n", + " | isbn13 | \n", + "isbn10 | \n", + "title | \n", + "subtitle | \n", + "authors | \n", + "categories | \n", + "thumbnail | \n", + "description | \n", + "published_year | \n", + "average_rating | \n", + "num_pages | \n", + "ratings_count | \n", + "missing_description | \n", + "age_of_book | \n", + "words_in_description | \n", + "title_and_subtitle | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "9780002005883 | \n", + "0002005883 | \n", + "Gilead | \n", + "NaN | \n", + "Marilynne Robinson | \n", + "Fiction | \n", + "http://books.google.com/books/content?id=KQZCP... | \n", + "A NOVEL THAT READERS and critics have been eag... | \n", + "2004.0 | \n", + "3.85 | \n", + "247.0 | \n", + "361.0 | \n", + "0 | \n", + "20.0 | \n", + "199 | \n", + "Gilead | \n", + "
| 1 | \n", + "9780002261982 | \n", + "0002261987 | \n", + "Spider's Web | \n", + "A Novel | \n", + "Charles Osborne;Agatha Christie | \n", + "Detective and mystery stories | \n", + "http://books.google.com/books/content?id=gA5GP... | \n", + "A new 'Christie for Christmas' -- a full-lengt... | \n", + "2000.0 | \n", + "3.83 | \n", + "241.0 | \n", + "5164.0 | \n", + "0 | \n", + "24.0 | \n", + "205 | \n", + "Spider's Web: A Novel | \n", + "
| 3 | \n", + "9780006178736 | \n", + "0006178731 | \n", + "Rage of angels | \n", + "NaN | \n", + "Sidney Sheldon | \n", + "Fiction | \n", + "http://books.google.com/books/content?id=FKo2T... | \n", + "A memorable, mesmerizing heroine Jennifer -- b... | \n", + "1993.0 | \n", + "3.93 | \n", + "512.0 | \n", + "29532.0 | \n", + "0 | \n", + "31.0 | \n", + "57 | \n", + "Rage of angels | \n", + "
| 4 | \n", + "9780006280897 | \n", + "0006280897 | \n", + "The Four Loves | \n", + "NaN | \n", + "Clive Staples Lewis | \n", + "Christian life | \n", + "http://books.google.com/books/content?id=XhQ5X... | \n", + "Lewis' work on the nature of love divides love... | \n", + "2002.0 | \n", + "4.15 | \n", + "170.0 | \n", + "33684.0 | \n", + "0 | \n", + "22.0 | \n", + "45 | \n", + "The Four Loves | \n", + "
| 5 | \n", + "9780006280934 | \n", + "0006280935 | \n", + "The Problem of Pain | \n", + "NaN | \n", + "Clive Staples Lewis | \n", + "Christian life | \n", + "http://books.google.com/books/content?id=Kk-uV... | \n", + "\"In The Problem of Pain, C.S. Lewis, one of th... | \n", + "2002.0 | \n", + "4.09 | \n", + "176.0 | \n", + "37569.0 | \n", + "0 | \n", + "22.0 | \n", + "75 | \n", + "The Problem of Pain | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 6802 | \n", + "9788172235222 | \n", + "8172235224 | \n", + "Mistaken Identity | \n", + "NaN | \n", + "Nayantara Sahgal | \n", + "Indic fiction (English) | \n", + "http://books.google.com/books/content?id=q-tKP... | \n", + "On A Train Journey Home To North India After L... | \n", + "2003.0 | \n", + "2.93 | \n", + "324.0 | \n", + "0.0 | \n", + "0 | \n", + "21.0 | \n", + "288 | \n", + "Mistaken Identity | \n", + "
| 6803 | \n", + "9788173031014 | \n", + "8173031010 | \n", + "Journey to the East | \n", + "NaN | \n", + "Hermann Hesse | \n", + "Adventure stories | \n", + "http://books.google.com/books/content?id=rq6JP... | \n", + "This book tells the tale of a man who goes on ... | \n", + "2002.0 | \n", + "3.70 | \n", + "175.0 | \n", + "24.0 | \n", + "0 | \n", + "22.0 | \n", + "63 | \n", + "Journey to the East | \n", + "
| 6804 | \n", + "9788179921623 | \n", + "817992162X | \n", + "The Monk Who Sold His Ferrari: A Fable About F... | \n", + "NaN | \n", + "Robin Sharma | \n", + "Health & Fitness | \n", + "http://books.google.com/books/content?id=c_7mf... | \n", + "Wisdom to Create a Life of Passion, Purpose, a... | \n", + "2003.0 | \n", + "3.82 | \n", + "198.0 | \n", + "1568.0 | \n", + "0 | \n", + "21.0 | \n", + "117 | \n", + "The Monk Who Sold His Ferrari: A Fable About F... | \n", + "
| 6805 | \n", + "9788185300535 | \n", + "8185300534 | \n", + "I Am that | \n", + "Talks with Sri Nisargadatta Maharaj | \n", + "Sri Nisargadatta Maharaj;Sudhakar S. Dikshit | \n", + "Philosophy | \n", + "http://books.google.com/books/content?id=Fv_JP... | \n", + "This collection of the timeless teachings of o... | \n", + "1999.0 | \n", + "4.51 | \n", + "531.0 | \n", + "104.0 | \n", + "0 | \n", + "25.0 | \n", + "174 | \n", + "I Am that: Talks with Sri Nisargadatta Maharaj | \n", + "
| 6808 | \n", + "9789027712059 | \n", + "9027712050 | \n", + "The Berlin Phenomenology | \n", + "NaN | \n", + "Georg Wilhelm Friedrich Hegel | \n", + "History | \n", + "http://books.google.com/books/content?id=Vy7Sk... | \n", + "Since the three volume edition ofHegel's Philo... | \n", + "1981.0 | \n", + "0.00 | \n", + "210.0 | \n", + "0.0 | \n", + "0 | \n", + "43.0 | \n", + "245 | \n", + "The Berlin Phenomenology | \n", + "
5197 rows × 16 columns
\n", + "| \n", + " | isbn13 | \n", + "isbn10 | \n", + "title | \n", + "subtitle | \n", + "authors | \n", + "categories | \n", + "thumbnail | \n", + "description | \n", + "published_year | \n", + "average_rating | \n", + "num_pages | \n", + "ratings_count | \n", + "missing_description | \n", + "age_of_book | \n", + "words_in_description | \n", + "title_and_subtitle | \n", + "tagged_description | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "9780002005883 | \n", + "0002005883 | \n", + "Gilead | \n", + "NaN | \n", + "Marilynne Robinson | \n", + "Fiction | \n", + "http://books.google.com/books/content?id=KQZCP... | \n", + "A NOVEL THAT READERS and critics have been eag... | \n", + "2004.0 | \n", + "3.85 | \n", + "247.0 | \n", + "361.0 | \n", + "0 | \n", + "20.0 | \n", + "199 | \n", + "Gilead | \n", + "9780002005883 A NOVEL THAT READERS and critics... | \n", + "
| 1 | \n", + "9780002261982 | \n", + "0002261987 | \n", + "Spider's Web | \n", + "A Novel | \n", + "Charles Osborne;Agatha Christie | \n", + "Detective and mystery stories | \n", + "http://books.google.com/books/content?id=gA5GP... | \n", + "A new 'Christie for Christmas' -- a full-lengt... | \n", + "2000.0 | \n", + "3.83 | \n", + "241.0 | \n", + "5164.0 | \n", + "0 | \n", + "24.0 | \n", + "205 | \n", + "Spider's Web: A Novel | \n", + "9780002261982 A new 'Christie for Christmas' -... | \n", + "
| 3 | \n", + "9780006178736 | \n", + "0006178731 | \n", + "Rage of angels | \n", + "NaN | \n", + "Sidney Sheldon | \n", + "Fiction | \n", + "http://books.google.com/books/content?id=FKo2T... | \n", + "A memorable, mesmerizing heroine Jennifer -- b... | \n", + "1993.0 | \n", + "3.93 | \n", + "512.0 | \n", + "29532.0 | \n", + "0 | \n", + "31.0 | \n", + "57 | \n", + "Rage of angels | \n", + "9780006178736 A memorable, mesmerizing heroine... | \n", + "
| 4 | \n", + "9780006280897 | \n", + "0006280897 | \n", + "The Four Loves | \n", + "NaN | \n", + "Clive Staples Lewis | \n", + "Christian life | \n", + "http://books.google.com/books/content?id=XhQ5X... | \n", + "Lewis' work on the nature of love divides love... | \n", + "2002.0 | \n", + "4.15 | \n", + "170.0 | \n", + "33684.0 | \n", + "0 | \n", + "22.0 | \n", + "45 | \n", + "The Four Loves | \n", + "9780006280897 Lewis' work on the nature of lov... | \n", + "
| 5 | \n", + "9780006280934 | \n", + "0006280935 | \n", + "The Problem of Pain | \n", + "NaN | \n", + "Clive Staples Lewis | \n", + "Christian life | \n", + "http://books.google.com/books/content?id=Kk-uV... | \n", + "\"In The Problem of Pain, C.S. Lewis, one of th... | \n", + "2002.0 | \n", + "4.09 | \n", + "176.0 | \n", + "37569.0 | \n", + "0 | \n", + "22.0 | \n", + "75 | \n", + "The Problem of Pain | \n", + "9780006280934 \"In The Problem of Pain, C.S. Le... | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 6802 | \n", + "9788172235222 | \n", + "8172235224 | \n", + "Mistaken Identity | \n", + "NaN | \n", + "Nayantara Sahgal | \n", + "Indic fiction (English) | \n", + "http://books.google.com/books/content?id=q-tKP... | \n", + "On A Train Journey Home To North India After L... | \n", + "2003.0 | \n", + "2.93 | \n", + "324.0 | \n", + "0.0 | \n", + "0 | \n", + "21.0 | \n", + "288 | \n", + "Mistaken Identity | \n", + "9788172235222 On A Train Journey Home To North... | \n", + "
| 6803 | \n", + "9788173031014 | \n", + "8173031010 | \n", + "Journey to the East | \n", + "NaN | \n", + "Hermann Hesse | \n", + "Adventure stories | \n", + "http://books.google.com/books/content?id=rq6JP... | \n", + "This book tells the tale of a man who goes on ... | \n", + "2002.0 | \n", + "3.70 | \n", + "175.0 | \n", + "24.0 | \n", + "0 | \n", + "22.0 | \n", + "63 | \n", + "Journey to the East | \n", + "9788173031014 This book tells the tale of a ma... | \n", + "
| 6804 | \n", + "9788179921623 | \n", + "817992162X | \n", + "The Monk Who Sold His Ferrari: A Fable About F... | \n", + "NaN | \n", + "Robin Sharma | \n", + "Health & Fitness | \n", + "http://books.google.com/books/content?id=c_7mf... | \n", + "Wisdom to Create a Life of Passion, Purpose, a... | \n", + "2003.0 | \n", + "3.82 | \n", + "198.0 | \n", + "1568.0 | \n", + "0 | \n", + "21.0 | \n", + "117 | \n", + "The Monk Who Sold His Ferrari: A Fable About F... | \n", + "9788179921623 Wisdom to Create a Life of Passi... | \n", + "
| 6805 | \n", + "9788185300535 | \n", + "8185300534 | \n", + "I Am that | \n", + "Talks with Sri Nisargadatta Maharaj | \n", + "Sri Nisargadatta Maharaj;Sudhakar S. Dikshit | \n", + "Philosophy | \n", + "http://books.google.com/books/content?id=Fv_JP... | \n", + "This collection of the timeless teachings of o... | \n", + "1999.0 | \n", + "4.51 | \n", + "531.0 | \n", + "104.0 | \n", + "0 | \n", + "25.0 | \n", + "174 | \n", + "I Am that: Talks with Sri Nisargadatta Maharaj | \n", + "9788185300535 This collection of the timeless ... | \n", + "
| 6808 | \n", + "9789027712059 | \n", + "9027712050 | \n", + "The Berlin Phenomenology | \n", + "NaN | \n", + "Georg Wilhelm Friedrich Hegel | \n", + "History | \n", + "http://books.google.com/books/content?id=Vy7Sk... | \n", + "Since the three volume edition ofHegel's Philo... | \n", + "1981.0 | \n", + "0.00 | \n", + "210.0 | \n", + "0.0 | \n", + "0 | \n", + "43.0 | \n", + "245 | \n", + "The Berlin Phenomenology | \n", + "9789027712059 Since the three volume edition o... | \n", + "
5197 rows × 17 columns
\n", + "