Krishwall commited on
Commit
d83e0b5
·
verified ·
1 Parent(s): 195a84c

Upload folder using huggingface_hub

Browse files
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ GOOGLE_API_KEY=AIzaSyCCNT51MPqDSJELil9TtzRhX2Pg_swkNQI
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
BookRecommender/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
BookRecommender/README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: BookRecommender
3
+ emoji: 📚
4
+ colorFrom: red
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 5.33.1
8
+ app_file: app.py
9
+ pinned: false
10
+ short_description: Provides Book Recommendation using prompt
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
README.md CHANGED
@@ -1,12 +1,7 @@
1
- ---
2
- title: BookRecommenderLLm
3
- emoji: 🏢
4
- colorFrom: green
5
- colorTo: green
6
- sdk: gradio
7
- sdk_version: 5.33.1
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: BookRecommenderLLm
3
+ app_file: grad-dashboard.py
4
+ sdk: gradio
5
+ sdk_version: 5.33.1
6
+ ---
7
+ "# BookRecommenderLLm"
 
 
 
 
 
books.csv/books.csv ADDED
The diff for this file is too large to render. See raw diff
 
books_cleaned.csv ADDED
The diff for this file is too large to render. See raw diff
 
books_with_categories.csv ADDED
The diff for this file is too large to render. See raw diff
 
books_with_emotions.csv ADDED
The diff for this file is too large to render. See raw diff
 
cover-not-found.jpeg ADDED
dash.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import chromadb
2
+
3
+ try:
4
+ client = chromadb.Client()
5
+ collection = client.create_collection("test")
6
+ print("ChromaDB basic test passed!")
7
+ except Exception as e:
8
+ print(f"ChromaDB basic test failed: {e}")
data_exploration.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
grad-dashboard.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from dotenv import load_dotenv
4
+ import os
5
+ from langchain_community.vectorstores import FAISS
6
+ from langchain_community.document_loaders import TextLoader
7
+ # from langchain_openai import OpenAIEmbeddings
8
+ from langchain_text_splitters import CharacterTextSplitter
9
+ from langchain_chroma import Chroma
10
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
11
+
12
+
13
+ import gradio as gr
14
+
15
+ load_dotenv()
16
+
17
+ books=pd.read_csv("books_with_emotions.csv")
18
+ books["large_thumbnail"]=books["thumbnail"]+"&fife=w800"
19
+ books["large_thumbnail"]=np.where(books["large_thumbnail"].isna(),
20
+ "cover-not-found.jpeg",books["large_thumbnail"],
21
+ )
22
+ raw_documents=TextLoader("tagged_description.txt",encoding="utf-8").load()
23
+ text_splitter=CharacterTextSplitter(separator="\n",chunk_size=0,chunk_overlap=0)
24
+ documents=text_splitter.split_documents(raw_documents)
25
+ print(f"Number of documents loaded: {len(documents)}")
26
+
27
+ # try:
28
+ # db_books = Chroma.from_documents(
29
+ # documents[:5],
30
+ # embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001",google_api_key=os.getenv("GOOGLE_API_KEY"))
31
+ # )
32
+ # print("Chroma DB created with sample documents")
33
+ # except Exception as e:
34
+ # print(f"An error occurred with sample documents: {e}")
35
+ db_books = FAISS.from_documents(
36
+ documents,
37
+ embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001")
38
+ )
39
+ print("FAISS DB created with documents")
40
+ def retrieve_semantic_recommendation(
41
+ query:str,
42
+ category:str=None,
43
+ tone:str =None,
44
+ initial_top_k:int =50,
45
+ final_top_k:int =16,
46
+ )-> pd.DataFrame:
47
+ print("rsr")
48
+ recs=db_books.similarity_search(query,k=initial_top_k)
49
+ books_list=[int(rec.page_content.strip('"').split()[0]) for rec in recs]
50
+ book_recs=books[books["isbn13"].isin(books_list)].head(final_top_k)
51
+
52
+ if category!="All":
53
+ book_recs=book_recs[book_recs["simple_categories"]==category].head(final_top_k)
54
+ else:
55
+ book_recs=book_recs.head(final_top_k)
56
+
57
+
58
+ if tone=="Happy":
59
+ book_recs.sort_values(by="joy",ascending=False,inplace=True)
60
+ elif tone=="Surprising":
61
+ book_recs.sort_values(by="surprise",ascending=False,inplace=True)
62
+ elif tone=="Angry":
63
+ book_recs.sort_values(by="anger",ascending=False,inplace=True)
64
+ elif tone=="Suspenseful":
65
+ book_recs.sort_values(by="fear",ascending=False,inplace=True)
66
+ elif tone=="Sad":
67
+ book_recs.sort_values(by="sadness",ascending=False,inplace=True)
68
+
69
+ return book_recs
70
+
71
+ def recommend_books(
72
+ query:str,
73
+ category:str,
74
+ tone:str
75
+ ):
76
+ print("Inside recommend_books function")
77
+ recommendations= retrieve_semantic_recommendation(query,category,tone)
78
+ results=[]
79
+
80
+ for _,row in recommendations.iterrows():
81
+ description=row["description"]
82
+ truncated_desc_split= description.split()
83
+ truncated_description=" ".join(truncated_desc_split[:30])+"..."
84
+
85
+ authors_split= row["authors"].split(";")
86
+ if len(authors_split)==2:
87
+ authors_str=f"{authors_split[0]} and {authors_split[1]}"
88
+ elif len(authors_split)>2:
89
+ authors_str=f"{', '.join(authors_split[:-1])}, and{authors_split[-1]}"
90
+ else:
91
+ authors_str=row["authors"]
92
+
93
+ caption =f"{row['title']} by {authors_str}: {truncated_description}"
94
+ results.append((row["large_thumbnail"],caption))
95
+
96
+ return results
97
+
98
+ categories =["All"] + sorted(books["simple_categories"].unique()
99
+ )
100
+ tones=["All"] + ["Happy", "Surprising", "Angry", "Suspenseful", "Sad"]
101
+
102
+ with gr.Blocks(theme=gr.themes.Glass()) as dashboard:
103
+ gr.Markdown("# Semantic book recommender")
104
+
105
+ with gr.Row():
106
+ user_query= gr.Textbox(
107
+ label="Please enter a description of a book:",
108
+ placeholder="e.g., A story about forgiveness")
109
+
110
+ category_dropdown=gr.Dropdown(choices = categories,label="Select a category", value="All")
111
+ tone_dropdown=gr.Dropdown(choices=tones, label="Select an emotional tone:", value="All")
112
+ submit_button = gr.Button("Get Recommendations")
113
+
114
+ gr.Markdown("## Recommendations")
115
+ output=gr.Gallery(label="Recommend books", columns=8,rows=2)
116
+
117
+ submit_button.click(fn=recommend_books,
118
+ inputs=[user_query,category_dropdown,tone_dropdown],
119
+ outputs=output)
120
+
121
+
122
+ if __name__ == "__main__":
123
+ try:
124
+ print("Launching the Gradio dashboard...")
125
+ dashboard.launch(share=True)
126
+ except Exception as e:
127
+ print(f"An error occurred: {e}")
sentiment-analysis.ipynb ADDED
@@ -0,0 +1,1059 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "407898b3",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd\n",
11
+ "books=pd.read_csv('books_with_categories.csv')"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 2,
17
+ "id": "5590fdf2",
18
+ "metadata": {},
19
+ "outputs": [
20
+ {
21
+ "name": "stderr",
22
+ "output_type": "stream",
23
+ "text": [
24
+ "c:\\Users\\KRISH\\miniconda3\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
25
+ " from .autonotebook import tqdm as notebook_tqdm\n",
26
+ "Device set to use cuda:0\n"
27
+ ]
28
+ },
29
+ {
30
+ "data": {
31
+ "text/plain": [
32
+ "[[{'label': 'joy', 'score': 0.9778217077255249},\n",
33
+ " {'label': 'anger', 'score': 0.00690877391025424},\n",
34
+ " {'label': 'neutral', 'score': 0.004429477732628584},\n",
35
+ " {'label': 'sadness', 'score': 0.0038780963514000177},\n",
36
+ " {'label': 'fear', 'score': 0.0034606074914336205},\n",
37
+ " {'label': 'disgust', 'score': 0.002587498864158988},\n",
38
+ " {'label': 'surprise', 'score': 0.0009138151071965694}]]"
39
+ ]
40
+ },
41
+ "execution_count": 2,
42
+ "metadata": {},
43
+ "output_type": "execute_result"
44
+ }
45
+ ],
46
+ "source": [
47
+ "# Use a pipeline as a high-level helper\n",
48
+ "from transformers import pipeline\n",
49
+ "\n",
50
+ "classifier = pipeline(\"text-classification\", model=\"j-hartmann/emotion-english-distilroberta-base\",top_k=None,device=0)\n",
51
+ "classifier(\"I love this \")"
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "code",
56
+ "execution_count": 3,
57
+ "id": "7435bb0a",
58
+ "metadata": {},
59
+ "outputs": [
60
+ {
61
+ "data": {
62
+ "text/plain": [
63
+ "'A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives. John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers. It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up. Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist. He is troubled, too, by his prodigal namesake, Jack (John Ames) Boughton, his best friend’s lost son who returns to Gilead searching for forgiveness and redemption. Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest of life’s details, Gilead is a song of celebration and acceptance of the best and the worst the world has to offer. At its heart is a tale of the sacred bonds between fathers and sons, pitch-perfect in style and story, set to dazzle critics and readers alike.'"
64
+ ]
65
+ },
66
+ "execution_count": 3,
67
+ "metadata": {},
68
+ "output_type": "execute_result"
69
+ }
70
+ ],
71
+ "source": [
72
+ "books[\"description\"][0]"
73
+ ]
74
+ },
75
+ {
76
+ "cell_type": "code",
77
+ "execution_count": 4,
78
+ "id": "833fe654",
79
+ "metadata": {},
80
+ "outputs": [
81
+ {
82
+ "data": {
83
+ "text/plain": [
84
+ "[[{'label': 'fear', 'score': 0.654841423034668},\n",
85
+ " {'label': 'neutral', 'score': 0.16985194385051727},\n",
86
+ " {'label': 'sadness', 'score': 0.11640876531600952},\n",
87
+ " {'label': 'surprise', 'score': 0.02070068195462227},\n",
88
+ " {'label': 'disgust', 'score': 0.019100716337561607},\n",
89
+ " {'label': 'joy', 'score': 0.0151612414047122},\n",
90
+ " {'label': 'anger', 'score': 0.003935152664780617}]]"
91
+ ]
92
+ },
93
+ "execution_count": 4,
94
+ "metadata": {},
95
+ "output_type": "execute_result"
96
+ }
97
+ ],
98
+ "source": [
99
+ "classifier(books[\"description\"][0])"
100
+ ]
101
+ },
102
+ {
103
+ "cell_type": "code",
104
+ "execution_count": 5,
105
+ "id": "39ac9c2b",
106
+ "metadata": {},
107
+ "outputs": [
108
+ {
109
+ "data": {
110
+ "text/plain": [
111
+ "[[{'label': 'surprise', 'score': 0.7296032905578613},\n",
112
+ " {'label': 'neutral', 'score': 0.14038528501987457},\n",
113
+ " {'label': 'fear', 'score': 0.06816212832927704},\n",
114
+ " {'label': 'joy', 'score': 0.04794234782457352},\n",
115
+ " {'label': 'anger', 'score': 0.00915635097771883},\n",
116
+ " {'label': 'disgust', 'score': 0.002628469606861472},\n",
117
+ " {'label': 'sadness', 'score': 0.002122158883139491}],\n",
118
+ " [{'label': 'neutral', 'score': 0.449370801448822},\n",
119
+ " {'label': 'disgust', 'score': 0.27359163761138916},\n",
120
+ " {'label': 'joy', 'score': 0.10908281058073044},\n",
121
+ " {'label': 'sadness', 'score': 0.0936271920800209},\n",
122
+ " {'label': 'anger', 'score': 0.04047831892967224},\n",
123
+ " {'label': 'surprise', 'score': 0.02697017416357994},\n",
124
+ " {'label': 'fear', 'score': 0.006879049353301525}],\n",
125
+ " [{'label': 'neutral', 'score': 0.6462163925170898},\n",
126
+ " {'label': 'sadness', 'score': 0.24273289740085602},\n",
127
+ " {'label': 'disgust', 'score': 0.043422624468803406},\n",
128
+ " {'label': 'surprise', 'score': 0.028300542384386063},\n",
129
+ " {'label': 'joy', 'score': 0.014211481437087059},\n",
130
+ " {'label': 'fear', 'score': 0.014084117487072945},\n",
131
+ " {'label': 'anger', 'score': 0.011031893081963062}],\n",
132
+ " [{'label': 'fear', 'score': 0.928167998790741},\n",
133
+ " {'label': 'anger', 'score': 0.03219102695584297},\n",
134
+ " {'label': 'neutral', 'score': 0.012808704748749733},\n",
135
+ " {'label': 'sadness', 'score': 0.008756889030337334},\n",
136
+ " {'label': 'surprise', 'score': 0.008597930893301964},\n",
137
+ " {'label': 'disgust', 'score': 0.008431846275925636},\n",
138
+ " {'label': 'joy', 'score': 0.0010455820010975003}],\n",
139
+ " [{'label': 'sadness', 'score': 0.9671574234962463},\n",
140
+ " {'label': 'neutral', 'score': 0.015104176476597786},\n",
141
+ " {'label': 'disgust', 'score': 0.0064806039445102215},\n",
142
+ " {'label': 'fear', 'score': 0.005394001957029104},\n",
143
+ " {'label': 'surprise', 'score': 0.0022869459353387356},\n",
144
+ " {'label': 'anger', 'score': 0.0018428926123306155},\n",
145
+ " {'label': 'joy', 'score': 0.0017338803736492991}],\n",
146
+ " [{'label': 'joy', 'score': 0.9327973127365112},\n",
147
+ " {'label': 'disgust', 'score': 0.03771765157580376},\n",
148
+ " {'label': 'neutral', 'score': 0.015891950577497482},\n",
149
+ " {'label': 'sadness', 'score': 0.006444534286856651},\n",
150
+ " {'label': 'anger', 'score': 0.005025016609579325},\n",
151
+ " {'label': 'surprise', 'score': 0.0015812088968232274},\n",
152
+ " {'label': 'fear', 'score': 0.0005423093680292368}],\n",
153
+ " [{'label': 'joy', 'score': 0.6528706550598145},\n",
154
+ " {'label': 'neutral', 'score': 0.2542746365070343},\n",
155
+ " {'label': 'surprise', 'score': 0.06808312982320786},\n",
156
+ " {'label': 'sadness', 'score': 0.009908992797136307},\n",
157
+ " {'label': 'disgust', 'score': 0.006512203253805637},\n",
158
+ " {'label': 'anger', 'score': 0.004821316804736853},\n",
159
+ " {'label': 'fear', 'score': 0.003529016859829426}],\n",
160
+ " [{'label': 'neutral', 'score': 0.549476683139801},\n",
161
+ " {'label': 'sadness', 'score': 0.11169017851352692},\n",
162
+ " {'label': 'disgust', 'score': 0.10400667041540146},\n",
163
+ " {'label': 'surprise', 'score': 0.07876553386449814},\n",
164
+ " {'label': 'anger', 'score': 0.06413363665342331},\n",
165
+ " {'label': 'fear', 'score': 0.051362816244363785},\n",
166
+ " {'label': 'joy', 'score': 0.04056441783905029}]]"
167
+ ]
168
+ },
169
+ "execution_count": 5,
170
+ "metadata": {},
171
+ "output_type": "execute_result"
172
+ }
173
+ ],
174
+ "source": [
175
+ "classifier(books[\"description\"][0].split(\".\"))"
176
+ ]
177
+ },
178
+ {
179
+ "cell_type": "code",
180
+ "execution_count": 6,
181
+ "id": "67fbf665",
182
+ "metadata": {},
183
+ "outputs": [],
184
+ "source": [
185
+ "sentences = books[\"description\"][0].split(\".\")\n",
186
+ "predictions=classifier(sentences)"
187
+ ]
188
+ },
189
+ {
190
+ "cell_type": "code",
191
+ "execution_count": 7,
192
+ "id": "c622277d",
193
+ "metadata": {},
194
+ "outputs": [
195
+ {
196
+ "data": {
197
+ "text/plain": [
198
+ "'A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives'"
199
+ ]
200
+ },
201
+ "execution_count": 7,
202
+ "metadata": {},
203
+ "output_type": "execute_result"
204
+ }
205
+ ],
206
+ "source": [
207
+ "sentences[0]"
208
+ ]
209
+ },
210
+ {
211
+ "cell_type": "code",
212
+ "execution_count": 9,
213
+ "id": "11a6fb6d",
214
+ "metadata": {},
215
+ "outputs": [
216
+ {
217
+ "data": {
218
+ "text/plain": [
219
+ "[{'label': 'fear', 'score': 0.928167998790741},\n",
220
+ " {'label': 'anger', 'score': 0.03219102695584297},\n",
221
+ " {'label': 'neutral', 'score': 0.012808704748749733},\n",
222
+ " {'label': 'sadness', 'score': 0.008756889030337334},\n",
223
+ " {'label': 'surprise', 'score': 0.008597930893301964},\n",
224
+ " {'label': 'disgust', 'score': 0.008431846275925636},\n",
225
+ " {'label': 'joy', 'score': 0.0010455820010975003}]"
226
+ ]
227
+ },
228
+ "execution_count": 9,
229
+ "metadata": {},
230
+ "output_type": "execute_result"
231
+ }
232
+ ],
233
+ "source": [
234
+ "predictions[3]"
235
+ ]
236
+ },
237
+ {
238
+ "cell_type": "code",
239
+ "execution_count": 10,
240
+ "id": "03e4b938",
241
+ "metadata": {},
242
+ "outputs": [
243
+ {
244
+ "data": {
245
+ "text/plain": [
246
+ "[{'label': 'anger', 'score': 0.00915635097771883},\n",
247
+ " {'label': 'disgust', 'score': 0.002628469606861472},\n",
248
+ " {'label': 'fear', 'score': 0.06816212832927704},\n",
249
+ " {'label': 'joy', 'score': 0.04794234782457352},\n",
250
+ " {'label': 'neutral', 'score': 0.14038528501987457},\n",
251
+ " {'label': 'sadness', 'score': 0.002122158883139491},\n",
252
+ " {'label': 'surprise', 'score': 0.7296032905578613}]"
253
+ ]
254
+ },
255
+ "execution_count": 10,
256
+ "metadata": {},
257
+ "output_type": "execute_result"
258
+ }
259
+ ],
260
+ "source": [
261
+ "sorted(predictions[0],key=lambda x:x[\"label\"])"
262
+ ]
263
+ },
264
+ {
265
+ "cell_type": "code",
266
+ "execution_count": 11,
267
+ "id": "494af118",
268
+ "metadata": {},
269
+ "outputs": [],
270
+ "source": [
271
+ "import numpy as np\n",
272
+ "emotion_labels= [x[\"label\"] for x in predictions[0]]\n",
273
+ "emotion_scores = {label:[] for label in emotion_labels}\n",
274
+ "isbn=[]\n",
275
+ "\n",
276
+ "def calculate_max_emotion_scores(predictions):\n",
277
+ " per_emotion_scores={label:[] for label in emotion_labels}\n",
278
+ " for prediction in predictions:\n",
279
+ " sorted_predictions=sorted(prediction,key = lambda x:x[\"label\"])\n",
280
+ " for index,label in enumerate(emotion_labels):\n",
281
+ " per_emotion_scores[label].append(sorted_predictions[index][\"score\"])\n",
282
+ "\n",
283
+ " return {label:np.max(scores) for label,scores in per_emotion_scores.items()}"
284
+ ]
285
+ },
286
+ {
287
+ "cell_type": "code",
288
+ "execution_count": 12,
289
+ "id": "50eabfd2",
290
+ "metadata": {},
291
+ "outputs": [
292
+ {
293
+ "name": "stderr",
294
+ "output_type": "stream",
295
+ "text": [
296
+ "You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n"
297
+ ]
298
+ }
299
+ ],
300
+ "source": [
301
+ "for i in range(10):\n",
302
+ " isbn.append(books[\"isbn13\"][i])\n",
303
+ " sentences=books[\"description\"][i].split(\".\")\n",
304
+ " predictions=classifier(sentences)\n",
305
+ " max_scores= calculate_max_emotion_scores(predictions)\n",
306
+ " for label in emotion_labels:\n",
307
+ " emotion_scores[label].append(max_scores[label])\n",
308
+ " "
309
+ ]
310
+ },
311
+ {
312
+ "cell_type": "code",
313
+ "execution_count": 13,
314
+ "id": "35410108",
315
+ "metadata": {},
316
+ "outputs": [
317
+ {
318
+ "data": {
319
+ "text/plain": [
320
+ "{'surprise': [0.00915635097771883,\n",
321
+ " 0.005966869182884693,\n",
322
+ " 0.041300997138023376,\n",
323
+ " 0.016036199405789375,\n",
324
+ " 0.01362438965588808,\n",
325
+ " 0.009158486500382423,\n",
326
+ " 0.003973892889916897,\n",
327
+ " 0.023656249046325684,\n",
328
+ " 0.3006698787212372,\n",
329
+ " 0.01765839010477066],\n",
330
+ " 'neutral': [0.002628469606861472,\n",
331
+ " 0.002887075301259756,\n",
332
+ " 0.024568455293774605,\n",
333
+ " 0.06069517880678177,\n",
334
+ " 0.12224282324314117,\n",
335
+ " 0.012228667736053467,\n",
336
+ " 0.0038164728321135044,\n",
337
+ " 0.009091983549296856,\n",
338
+ " 0.2794811427593231,\n",
339
+ " 0.1779269576072693],\n",
340
+ " 'fear': [0.06816212832927704,\n",
341
+ " 0.0038098874501883984,\n",
342
+ " 0.1040615662932396,\n",
343
+ " 0.001691634999588132,\n",
344
+ " 0.09504348784685135,\n",
345
+ " 0.03679506108164787,\n",
346
+ " 0.0012676806654781103,\n",
347
+ " 0.4044956862926483,\n",
348
+ " 0.08446498215198517,\n",
349
+ " 0.04945705831050873],\n",
350
+ " 'joy': [0.04794234782457352,\n",
351
+ " 0.7044203877449036,\n",
352
+ " 0.7672369480133057,\n",
353
+ " 0.1617567539215088,\n",
354
+ " 0.0083356574177742,\n",
355
+ " 0.043375857174396515,\n",
356
+ " 0.8725652098655701,\n",
357
+ " 0.01337516214698553,\n",
358
+ " 0.0013904988300055265,\n",
359
+ " 0.032197605818510056],\n",
360
+ " 'anger': [0.14038528501987457,\n",
361
+ " 0.21776098012924194,\n",
362
+ " 0.042176082730293274,\n",
363
+ " 0.7326855063438416,\n",
364
+ " 0.27261340618133545,\n",
365
+ " 0.6213923692703247,\n",
366
+ " 0.07678427547216415,\n",
367
+ " 0.29216688871383667,\n",
368
+ " 0.029392102733254433,\n",
369
+ " 0.662406325340271],\n",
370
+ " 'disgust': [0.002122158883139491,\n",
371
+ " 0.004508530721068382,\n",
372
+ " 0.010859863832592964,\n",
373
+ " 0.020988158881664276,\n",
374
+ " 0.4758804738521576,\n",
375
+ " 0.0051463996060192585,\n",
376
+ " 0.004653005860745907,\n",
377
+ " 0.022726479917764664,\n",
378
+ " 0.2327764630317688,\n",
379
+ " 0.013540088199079037],\n",
380
+ " 'sadness': [0.7296032905578613,\n",
381
+ " 0.060646187514066696,\n",
382
+ " 0.009796091355383396,\n",
383
+ " 0.006146553438156843,\n",
384
+ " 0.012259832583367825,\n",
385
+ " 0.2719031274318695,\n",
386
+ " 0.036939460784196854,\n",
387
+ " 0.23448744416236877,\n",
388
+ " 0.07182495296001434,\n",
389
+ " 0.04681351035833359]}"
390
+ ]
391
+ },
392
+ "execution_count": 13,
393
+ "metadata": {},
394
+ "output_type": "execute_result"
395
+ }
396
+ ],
397
+ "source": [
398
+ "emotion_scores"
399
+ ]
400
+ },
401
+ {
402
+ "cell_type": "code",
403
+ "execution_count": 15,
404
+ "id": "c99e6dec",
405
+ "metadata": {},
406
+ "outputs": [
407
+ {
408
+ "name": "stderr",
409
+ "output_type": "stream",
410
+ "text": [
411
+ "100%|██████████| 5197/5197 [06:10<00:00, 14.02it/s] \n"
412
+ ]
413
+ }
414
+ ],
415
+ "source": [
416
+ "from tqdm import tqdm\n",
417
+ "emotion_labels= [x[\"label\"] for x in predictions[0]]\n",
418
+ "emotion_scores = {label:[] for label in emotion_labels}\n",
419
+ "isbn=[]\n",
420
+ "for i in tqdm(range(len(books))):\n",
421
+ " isbn.append(books[\"isbn13\"][i])\n",
422
+ " sentences=books[\"description\"][i].split(\".\")\n",
423
+ " predictions=classifier(sentences)\n",
424
+ " max_scores= calculate_max_emotion_scores(predictions)\n",
425
+ " for label in emotion_labels:\n",
426
+ " emotion_scores[label].append(max_scores[label])\n",
427
+ " "
428
+ ]
429
+ },
430
+ {
431
+ "cell_type": "code",
432
+ "execution_count": 16,
433
+ "id": "da04acb1",
434
+ "metadata": {},
435
+ "outputs": [
436
+ {
437
+ "data": {
438
+ "text/html": [
439
+ "<div>\n",
440
+ "<style scoped>\n",
441
+ " .dataframe tbody tr th:only-of-type {\n",
442
+ " vertical-align: middle;\n",
443
+ " }\n",
444
+ "\n",
445
+ " .dataframe tbody tr th {\n",
446
+ " vertical-align: top;\n",
447
+ " }\n",
448
+ "\n",
449
+ " .dataframe thead th {\n",
450
+ " text-align: right;\n",
451
+ " }\n",
452
+ "</style>\n",
453
+ "<table border=\"1\" class=\"dataframe\">\n",
454
+ " <thead>\n",
455
+ " <tr style=\"text-align: right;\">\n",
456
+ " <th></th>\n",
457
+ " <th>fear</th>\n",
458
+ " <th>anger</th>\n",
459
+ " <th>sadness</th>\n",
460
+ " <th>neutral</th>\n",
461
+ " <th>disgust</th>\n",
462
+ " <th>joy</th>\n",
463
+ " <th>surprise</th>\n",
464
+ " <th>isbn13</th>\n",
465
+ " </tr>\n",
466
+ " </thead>\n",
467
+ " <tbody>\n",
468
+ " <tr>\n",
469
+ " <th>0</th>\n",
470
+ " <td>0.009156</td>\n",
471
+ " <td>0.002628</td>\n",
472
+ " <td>0.068162</td>\n",
473
+ " <td>0.047942</td>\n",
474
+ " <td>0.140385</td>\n",
475
+ " <td>0.002122</td>\n",
476
+ " <td>0.729603</td>\n",
477
+ " <td>9780002005883</td>\n",
478
+ " </tr>\n",
479
+ " <tr>\n",
480
+ " <th>1</th>\n",
481
+ " <td>0.005967</td>\n",
482
+ " <td>0.002887</td>\n",
483
+ " <td>0.003810</td>\n",
484
+ " <td>0.704420</td>\n",
485
+ " <td>0.217761</td>\n",
486
+ " <td>0.004509</td>\n",
487
+ " <td>0.060646</td>\n",
488
+ " <td>9780002261982</td>\n",
489
+ " </tr>\n",
490
+ " <tr>\n",
491
+ " <th>2</th>\n",
492
+ " <td>0.041301</td>\n",
493
+ " <td>0.024568</td>\n",
494
+ " <td>0.104062</td>\n",
495
+ " <td>0.767237</td>\n",
496
+ " <td>0.042176</td>\n",
497
+ " <td>0.010860</td>\n",
498
+ " <td>0.009796</td>\n",
499
+ " <td>9780006178736</td>\n",
500
+ " </tr>\n",
501
+ " <tr>\n",
502
+ " <th>3</th>\n",
503
+ " <td>0.016036</td>\n",
504
+ " <td>0.060695</td>\n",
505
+ " <td>0.001692</td>\n",
506
+ " <td>0.161757</td>\n",
507
+ " <td>0.732686</td>\n",
508
+ " <td>0.020988</td>\n",
509
+ " <td>0.006147</td>\n",
510
+ " <td>9780006280897</td>\n",
511
+ " </tr>\n",
512
+ " <tr>\n",
513
+ " <th>4</th>\n",
514
+ " <td>0.013624</td>\n",
515
+ " <td>0.122243</td>\n",
516
+ " <td>0.095043</td>\n",
517
+ " <td>0.008336</td>\n",
518
+ " <td>0.272613</td>\n",
519
+ " <td>0.475880</td>\n",
520
+ " <td>0.012260</td>\n",
521
+ " <td>9780006280934</td>\n",
522
+ " </tr>\n",
523
+ " </tbody>\n",
524
+ "</table>\n",
525
+ "</div>"
526
+ ],
527
+ "text/plain": [
528
+ " fear anger sadness neutral disgust joy surprise \\\n",
529
+ "0 0.009156 0.002628 0.068162 0.047942 0.140385 0.002122 0.729603 \n",
530
+ "1 0.005967 0.002887 0.003810 0.704420 0.217761 0.004509 0.060646 \n",
531
+ "2 0.041301 0.024568 0.104062 0.767237 0.042176 0.010860 0.009796 \n",
532
+ "3 0.016036 0.060695 0.001692 0.161757 0.732686 0.020988 0.006147 \n",
533
+ "4 0.013624 0.122243 0.095043 0.008336 0.272613 0.475880 0.012260 \n",
534
+ "\n",
535
+ " isbn13 \n",
536
+ "0 9780002005883 \n",
537
+ "1 9780002261982 \n",
538
+ "2 9780006178736 \n",
539
+ "3 9780006280897 \n",
540
+ "4 9780006280934 "
541
+ ]
542
+ },
543
+ "execution_count": 16,
544
+ "metadata": {},
545
+ "output_type": "execute_result"
546
+ }
547
+ ],
548
+ "source": [
549
+ "emotions_df=pd.DataFrame(emotion_scores)\n",
550
+ "emotions_df[\"isbn13\"]=isbn\n",
551
+ "emotions_df.head()"
552
+ ]
553
+ },
554
+ {
555
+ "cell_type": "code",
556
+ "execution_count": 17,
557
+ "id": "5f693d61",
558
+ "metadata": {},
559
+ "outputs": [],
560
+ "source": [
561
+ "books=pd.merge(books,emotions_df,on=\"isbn13\")"
562
+ ]
563
+ },
564
+ {
565
+ "cell_type": "code",
566
+ "execution_count": 18,
567
+ "id": "ee09e5e5",
568
+ "metadata": {},
569
+ "outputs": [
570
+ {
571
+ "data": {
572
+ "text/html": [
573
+ "<div>\n",
574
+ "<style scoped>\n",
575
+ " .dataframe tbody tr th:only-of-type {\n",
576
+ " vertical-align: middle;\n",
577
+ " }\n",
578
+ "\n",
579
+ " .dataframe tbody tr th {\n",
580
+ " vertical-align: top;\n",
581
+ " }\n",
582
+ "\n",
583
+ " .dataframe thead th {\n",
584
+ " text-align: right;\n",
585
+ " }\n",
586
+ "</style>\n",
587
+ "<table border=\"1\" class=\"dataframe\">\n",
588
+ " <thead>\n",
589
+ " <tr style=\"text-align: right;\">\n",
590
+ " <th></th>\n",
591
+ " <th>isbn13</th>\n",
592
+ " <th>isbn10</th>\n",
593
+ " <th>title</th>\n",
594
+ " <th>authors</th>\n",
595
+ " <th>categories</th>\n",
596
+ " <th>thumbnail</th>\n",
597
+ " <th>description</th>\n",
598
+ " <th>published_year</th>\n",
599
+ " <th>average_rating</th>\n",
600
+ " <th>num_pages</th>\n",
601
+ " <th>...</th>\n",
602
+ " <th>title_and_subtitle</th>\n",
603
+ " <th>tagged_description</th>\n",
604
+ " <th>simple_categories</th>\n",
605
+ " <th>fear</th>\n",
606
+ " <th>anger</th>\n",
607
+ " <th>sadness</th>\n",
608
+ " <th>neutral</th>\n",
609
+ " <th>disgust</th>\n",
610
+ " <th>joy</th>\n",
611
+ " <th>surprise</th>\n",
612
+ " </tr>\n",
613
+ " </thead>\n",
614
+ " <tbody>\n",
615
+ " <tr>\n",
616
+ " <th>0</th>\n",
617
+ " <td>9780002005883</td>\n",
618
+ " <td>0002005883</td>\n",
619
+ " <td>Gilead</td>\n",
620
+ " <td>Marilynne Robinson</td>\n",
621
+ " <td>Fiction</td>\n",
622
+ " <td>http://books.google.com/books/content?id=KQZCP...</td>\n",
623
+ " <td>A NOVEL THAT READERS and critics have been eag...</td>\n",
624
+ " <td>2004.0</td>\n",
625
+ " <td>3.85</td>\n",
626
+ " <td>247.0</td>\n",
627
+ " <td>...</td>\n",
628
+ " <td>Gilead</td>\n",
629
+ " <td>9780002005883 A NOVEL THAT READERS and critics...</td>\n",
630
+ " <td>Fiction</td>\n",
631
+ " <td>0.009156</td>\n",
632
+ " <td>0.002628</td>\n",
633
+ " <td>0.068162</td>\n",
634
+ " <td>0.047942</td>\n",
635
+ " <td>0.140385</td>\n",
636
+ " <td>0.002122</td>\n",
637
+ " <td>0.729603</td>\n",
638
+ " </tr>\n",
639
+ " <tr>\n",
640
+ " <th>1</th>\n",
641
+ " <td>9780002261982</td>\n",
642
+ " <td>0002261987</td>\n",
643
+ " <td>Spider's Web</td>\n",
644
+ " <td>Charles Osborne;Agatha Christie</td>\n",
645
+ " <td>Detective and mystery stories</td>\n",
646
+ " <td>http://books.google.com/books/content?id=gA5GP...</td>\n",
647
+ " <td>A new 'Christie for Christmas' -- a full-lengt...</td>\n",
648
+ " <td>2000.0</td>\n",
649
+ " <td>3.83</td>\n",
650
+ " <td>241.0</td>\n",
651
+ " <td>...</td>\n",
652
+ " <td>Spider's Web: A Novel</td>\n",
653
+ " <td>9780002261982 A new 'Christie for Christmas' -...</td>\n",
654
+ " <td>Fiction</td>\n",
655
+ " <td>0.005967</td>\n",
656
+ " <td>0.002887</td>\n",
657
+ " <td>0.003810</td>\n",
658
+ " <td>0.704420</td>\n",
659
+ " <td>0.217761</td>\n",
660
+ " <td>0.004509</td>\n",
661
+ " <td>0.060646</td>\n",
662
+ " </tr>\n",
663
+ " <tr>\n",
664
+ " <th>2</th>\n",
665
+ " <td>9780006178736</td>\n",
666
+ " <td>0006178731</td>\n",
667
+ " <td>Rage of angels</td>\n",
668
+ " <td>Sidney Sheldon</td>\n",
669
+ " <td>Fiction</td>\n",
670
+ " <td>http://books.google.com/books/content?id=FKo2T...</td>\n",
671
+ " <td>A memorable, mesmerizing heroine Jennifer -- b...</td>\n",
672
+ " <td>1993.0</td>\n",
673
+ " <td>3.93</td>\n",
674
+ " <td>512.0</td>\n",
675
+ " <td>...</td>\n",
676
+ " <td>Rage of angels</td>\n",
677
+ " <td>9780006178736 A memorable, mesmerizing heroine...</td>\n",
678
+ " <td>Fiction</td>\n",
679
+ " <td>0.041301</td>\n",
680
+ " <td>0.024568</td>\n",
681
+ " <td>0.104062</td>\n",
682
+ " <td>0.767237</td>\n",
683
+ " <td>0.042176</td>\n",
684
+ " <td>0.010860</td>\n",
685
+ " <td>0.009796</td>\n",
686
+ " </tr>\n",
687
+ " <tr>\n",
688
+ " <th>3</th>\n",
689
+ " <td>9780006280897</td>\n",
690
+ " <td>0006280897</td>\n",
691
+ " <td>The Four Loves</td>\n",
692
+ " <td>Clive Staples Lewis</td>\n",
693
+ " <td>Christian life</td>\n",
694
+ " <td>http://books.google.com/books/content?id=XhQ5X...</td>\n",
695
+ " <td>Lewis' work on the nature of love divides love...</td>\n",
696
+ " <td>2002.0</td>\n",
697
+ " <td>4.15</td>\n",
698
+ " <td>170.0</td>\n",
699
+ " <td>...</td>\n",
700
+ " <td>The Four Loves</td>\n",
701
+ " <td>9780006280897 Lewis' work on the nature of lov...</td>\n",
702
+ " <td>Nonfiction</td>\n",
703
+ " <td>0.016036</td>\n",
704
+ " <td>0.060695</td>\n",
705
+ " <td>0.001692</td>\n",
706
+ " <td>0.161757</td>\n",
707
+ " <td>0.732686</td>\n",
708
+ " <td>0.020988</td>\n",
709
+ " <td>0.006147</td>\n",
710
+ " </tr>\n",
711
+ " <tr>\n",
712
+ " <th>4</th>\n",
713
+ " <td>9780006280934</td>\n",
714
+ " <td>0006280935</td>\n",
715
+ " <td>The Problem of Pain</td>\n",
716
+ " <td>Clive Staples Lewis</td>\n",
717
+ " <td>Christian life</td>\n",
718
+ " <td>http://books.google.com/books/content?id=Kk-uV...</td>\n",
719
+ " <td>\"In The Problem of Pain, C.S. Lewis, one of th...</td>\n",
720
+ " <td>2002.0</td>\n",
721
+ " <td>4.09</td>\n",
722
+ " <td>176.0</td>\n",
723
+ " <td>...</td>\n",
724
+ " <td>The Problem of Pain</td>\n",
725
+ " <td>9780006280934 \"In The Problem of Pain, C.S. Le...</td>\n",
726
+ " <td>Nonfiction</td>\n",
727
+ " <td>0.013624</td>\n",
728
+ " <td>0.122243</td>\n",
729
+ " <td>0.095043</td>\n",
730
+ " <td>0.008336</td>\n",
731
+ " <td>0.272613</td>\n",
732
+ " <td>0.475880</td>\n",
733
+ " <td>0.012260</td>\n",
734
+ " </tr>\n",
735
+ " <tr>\n",
736
+ " <th>...</th>\n",
737
+ " <td>...</td>\n",
738
+ " <td>...</td>\n",
739
+ " <td>...</td>\n",
740
+ " <td>...</td>\n",
741
+ " <td>...</td>\n",
742
+ " <td>...</td>\n",
743
+ " <td>...</td>\n",
744
+ " <td>...</td>\n",
745
+ " <td>...</td>\n",
746
+ " <td>...</td>\n",
747
+ " <td>...</td>\n",
748
+ " <td>...</td>\n",
749
+ " <td>...</td>\n",
750
+ " <td>...</td>\n",
751
+ " <td>...</td>\n",
752
+ " <td>...</td>\n",
753
+ " <td>...</td>\n",
754
+ " <td>...</td>\n",
755
+ " <td>...</td>\n",
756
+ " <td>...</td>\n",
757
+ " <td>...</td>\n",
758
+ " </tr>\n",
759
+ " <tr>\n",
760
+ " <th>5192</th>\n",
761
+ " <td>9788172235222</td>\n",
762
+ " <td>8172235224</td>\n",
763
+ " <td>Mistaken Identity</td>\n",
764
+ " <td>Nayantara Sahgal</td>\n",
765
+ " <td>Indic fiction (English)</td>\n",
766
+ " <td>http://books.google.com/books/content?id=q-tKP...</td>\n",
767
+ " <td>On A Train Journey Home To North India After L...</td>\n",
768
+ " <td>2003.0</td>\n",
769
+ " <td>2.93</td>\n",
770
+ " <td>324.0</td>\n",
771
+ " <td>...</td>\n",
772
+ " <td>Mistaken Identity</td>\n",
773
+ " <td>9788172235222 On A Train Journey Home To North...</td>\n",
774
+ " <td>Fiction</td>\n",
775
+ " <td>0.025156</td>\n",
776
+ " <td>0.001939</td>\n",
777
+ " <td>0.094667</td>\n",
778
+ " <td>0.002254</td>\n",
779
+ " <td>0.010511</td>\n",
780
+ " <td>0.857255</td>\n",
781
+ " <td>0.008218</td>\n",
782
+ " </tr>\n",
783
+ " <tr>\n",
784
+ " <th>5193</th>\n",
785
+ " <td>9788173031014</td>\n",
786
+ " <td>8173031010</td>\n",
787
+ " <td>Journey to the East</td>\n",
788
+ " <td>Hermann Hesse</td>\n",
789
+ " <td>Adventure stories</td>\n",
790
+ " <td>http://books.google.com/books/content?id=rq6JP...</td>\n",
791
+ " <td>This book tells the tale of a man who goes on ...</td>\n",
792
+ " <td>2002.0</td>\n",
793
+ " <td>3.70</td>\n",
794
+ " <td>175.0</td>\n",
795
+ " <td>...</td>\n",
796
+ " <td>Journey to the East</td>\n",
797
+ " <td>9788173031014 This book tells the tale of a ma...</td>\n",
798
+ " <td>Nonfiction</td>\n",
799
+ " <td>0.005602</td>\n",
800
+ " <td>0.003775</td>\n",
801
+ " <td>0.018216</td>\n",
802
+ " <td>0.400263</td>\n",
803
+ " <td>0.338892</td>\n",
804
+ " <td>0.005487</td>\n",
805
+ " <td>0.227765</td>\n",
806
+ " </tr>\n",
807
+ " <tr>\n",
808
+ " <th>5194</th>\n",
809
+ " <td>9788179921623</td>\n",
810
+ " <td>817992162X</td>\n",
811
+ " <td>The Monk Who Sold His Ferrari: A Fable About F...</td>\n",
812
+ " <td>Robin Sharma</td>\n",
813
+ " <td>Health &amp; Fitness</td>\n",
814
+ " <td>http://books.google.com/books/content?id=c_7mf...</td>\n",
815
+ " <td>Wisdom to Create a Life of Passion, Purpose, a...</td>\n",
816
+ " <td>2003.0</td>\n",
817
+ " <td>3.82</td>\n",
818
+ " <td>198.0</td>\n",
819
+ " <td>...</td>\n",
820
+ " <td>The Monk Who Sold His Ferrari: A Fable About F...</td>\n",
821
+ " <td>9788179921623 Wisdom to Create a Life of Passi...</td>\n",
822
+ " <td>Fiction</td>\n",
823
+ " <td>0.008463</td>\n",
824
+ " <td>0.009147</td>\n",
825
+ " <td>0.013295</td>\n",
826
+ " <td>0.620452</td>\n",
827
+ " <td>0.329754</td>\n",
828
+ " <td>0.010788</td>\n",
829
+ " <td>0.008101</td>\n",
830
+ " </tr>\n",
831
+ " <tr>\n",
832
+ " <th>5195</th>\n",
833
+ " <td>9788185300535</td>\n",
834
+ " <td>8185300534</td>\n",
835
+ " <td>I Am that</td>\n",
836
+ " <td>Sri Nisargadatta Maharaj;Sudhakar S. Dikshit</td>\n",
837
+ " <td>Philosophy</td>\n",
838
+ " <td>http://books.google.com/books/content?id=Fv_JP...</td>\n",
839
+ " <td>This collection of the timeless teachings of o...</td>\n",
840
+ " <td>1999.0</td>\n",
841
+ " <td>4.51</td>\n",
842
+ " <td>531.0</td>\n",
843
+ " <td>...</td>\n",
844
+ " <td>I Am that: Talks with Sri Nisargadatta Maharaj</td>\n",
845
+ " <td>9788185300535 This collection of the timeless ...</td>\n",
846
+ " <td>Nonfiction</td>\n",
847
+ " <td>0.005475</td>\n",
848
+ " <td>0.034544</td>\n",
849
+ " <td>0.003970</td>\n",
850
+ " <td>0.258353</td>\n",
851
+ " <td>0.648011</td>\n",
852
+ " <td>0.017372</td>\n",
853
+ " <td>0.032275</td>\n",
854
+ " </tr>\n",
855
+ " <tr>\n",
856
+ " <th>5196</th>\n",
857
+ " <td>9789027712059</td>\n",
858
+ " <td>9027712050</td>\n",
859
+ " <td>The Berlin Phenomenology</td>\n",
860
+ " <td>Georg Wilhelm Friedrich Hegel</td>\n",
861
+ " <td>History</td>\n",
862
+ " <td>http://books.google.com/books/content?id=Vy7Sk...</td>\n",
863
+ " <td>Since the three volume edition ofHegel's Philo...</td>\n",
864
+ " <td>1981.0</td>\n",
865
+ " <td>0.00</td>\n",
866
+ " <td>210.0</td>\n",
867
+ " <td>...</td>\n",
868
+ " <td>The Berlin Phenomenology</td>\n",
869
+ " <td>9789027712059 Since the three volume edition o...</td>\n",
870
+ " <td>Nonfiction</td>\n",
871
+ " <td>0.002837</td>\n",
872
+ " <td>0.003137</td>\n",
873
+ " <td>0.001166</td>\n",
874
+ " <td>0.958549</td>\n",
875
+ " <td>0.028252</td>\n",
876
+ " <td>0.002916</td>\n",
877
+ " <td>0.003142</td>\n",
878
+ " </tr>\n",
879
+ " </tbody>\n",
880
+ "</table>\n",
881
+ "<p>5197 rows × 21 columns</p>\n",
882
+ "</div>"
883
+ ],
884
+ "text/plain": [
885
+ " isbn13 isbn10 \\\n",
886
+ "0 9780002005883 0002005883 \n",
887
+ "1 9780002261982 0002261987 \n",
888
+ "2 9780006178736 0006178731 \n",
889
+ "3 9780006280897 0006280897 \n",
890
+ "4 9780006280934 0006280935 \n",
891
+ "... ... ... \n",
892
+ "5192 9788172235222 8172235224 \n",
893
+ "5193 9788173031014 8173031010 \n",
894
+ "5194 9788179921623 817992162X \n",
895
+ "5195 9788185300535 8185300534 \n",
896
+ "5196 9789027712059 9027712050 \n",
897
+ "\n",
898
+ " title \\\n",
899
+ "0 Gilead \n",
900
+ "1 Spider's Web \n",
901
+ "2 Rage of angels \n",
902
+ "3 The Four Loves \n",
903
+ "4 The Problem of Pain \n",
904
+ "... ... \n",
905
+ "5192 Mistaken Identity \n",
906
+ "5193 Journey to the East \n",
907
+ "5194 The Monk Who Sold His Ferrari: A Fable About F... \n",
908
+ "5195 I Am that \n",
909
+ "5196 The Berlin Phenomenology \n",
910
+ "\n",
911
+ " authors \\\n",
912
+ "0 Marilynne Robinson \n",
913
+ "1 Charles Osborne;Agatha Christie \n",
914
+ "2 Sidney Sheldon \n",
915
+ "3 Clive Staples Lewis \n",
916
+ "4 Clive Staples Lewis \n",
917
+ "... ... \n",
918
+ "5192 Nayantara Sahgal \n",
919
+ "5193 Hermann Hesse \n",
920
+ "5194 Robin Sharma \n",
921
+ "5195 Sri Nisargadatta Maharaj;Sudhakar S. Dikshit \n",
922
+ "5196 Georg Wilhelm Friedrich Hegel \n",
923
+ "\n",
924
+ " categories \\\n",
925
+ "0 Fiction \n",
926
+ "1 Detective and mystery stories \n",
927
+ "2 Fiction \n",
928
+ "3 Christian life \n",
929
+ "4 Christian life \n",
930
+ "... ... \n",
931
+ "5192 Indic fiction (English) \n",
932
+ "5193 Adventure stories \n",
933
+ "5194 Health & Fitness \n",
934
+ "5195 Philosophy \n",
935
+ "5196 History \n",
936
+ "\n",
937
+ " thumbnail \\\n",
938
+ "0 http://books.google.com/books/content?id=KQZCP... \n",
939
+ "1 http://books.google.com/books/content?id=gA5GP... \n",
940
+ "2 http://books.google.com/books/content?id=FKo2T... \n",
941
+ "3 http://books.google.com/books/content?id=XhQ5X... \n",
942
+ "4 http://books.google.com/books/content?id=Kk-uV... \n",
943
+ "... ... \n",
944
+ "5192 http://books.google.com/books/content?id=q-tKP... \n",
945
+ "5193 http://books.google.com/books/content?id=rq6JP... \n",
946
+ "5194 http://books.google.com/books/content?id=c_7mf... \n",
947
+ "5195 http://books.google.com/books/content?id=Fv_JP... \n",
948
+ "5196 http://books.google.com/books/content?id=Vy7Sk... \n",
949
+ "\n",
950
+ " description published_year \\\n",
951
+ "0 A NOVEL THAT READERS and critics have been eag... 2004.0 \n",
952
+ "1 A new 'Christie for Christmas' -- a full-lengt... 2000.0 \n",
953
+ "2 A memorable, mesmerizing heroine Jennifer -- b... 1993.0 \n",
954
+ "3 Lewis' work on the nature of love divides love... 2002.0 \n",
955
+ "4 \"In The Problem of Pain, C.S. Lewis, one of th... 2002.0 \n",
956
+ "... ... ... \n",
957
+ "5192 On A Train Journey Home To North India After L... 2003.0 \n",
958
+ "5193 This book tells the tale of a man who goes on ... 2002.0 \n",
959
+ "5194 Wisdom to Create a Life of Passion, Purpose, a... 2003.0 \n",
960
+ "5195 This collection of the timeless teachings of o... 1999.0 \n",
961
+ "5196 Since the three volume edition ofHegel's Philo... 1981.0 \n",
962
+ "\n",
963
+ " average_rating num_pages ... \\\n",
964
+ "0 3.85 247.0 ... \n",
965
+ "1 3.83 241.0 ... \n",
966
+ "2 3.93 512.0 ... \n",
967
+ "3 4.15 170.0 ... \n",
968
+ "4 4.09 176.0 ... \n",
969
+ "... ... ... ... \n",
970
+ "5192 2.93 324.0 ... \n",
971
+ "5193 3.70 175.0 ... \n",
972
+ "5194 3.82 198.0 ... \n",
973
+ "5195 4.51 531.0 ... \n",
974
+ "5196 0.00 210.0 ... \n",
975
+ "\n",
976
+ " title_and_subtitle \\\n",
977
+ "0 Gilead \n",
978
+ "1 Spider's Web: A Novel \n",
979
+ "2 Rage of angels \n",
980
+ "3 The Four Loves \n",
981
+ "4 The Problem of Pain \n",
982
+ "... ... \n",
983
+ "5192 Mistaken Identity \n",
984
+ "5193 Journey to the East \n",
985
+ "5194 The Monk Who Sold His Ferrari: A Fable About F... \n",
986
+ "5195 I Am that: Talks with Sri Nisargadatta Maharaj \n",
987
+ "5196 The Berlin Phenomenology \n",
988
+ "\n",
989
+ " tagged_description simple_categories \\\n",
990
+ "0 9780002005883 A NOVEL THAT READERS and critics... Fiction \n",
991
+ "1 9780002261982 A new 'Christie for Christmas' -... Fiction \n",
992
+ "2 9780006178736 A memorable, mesmerizing heroine... Fiction \n",
993
+ "3 9780006280897 Lewis' work on the nature of lov... Nonfiction \n",
994
+ "4 9780006280934 \"In The Problem of Pain, C.S. Le... Nonfiction \n",
995
+ "... ... ... \n",
996
+ "5192 9788172235222 On A Train Journey Home To North... Fiction \n",
997
+ "5193 9788173031014 This book tells the tale of a ma... Nonfiction \n",
998
+ "5194 9788179921623 Wisdom to Create a Life of Passi... Fiction \n",
999
+ "5195 9788185300535 This collection of the timeless ... Nonfiction \n",
1000
+ "5196 9789027712059 Since the three volume edition o... Nonfiction \n",
1001
+ "\n",
1002
+ " fear anger sadness neutral disgust joy surprise \n",
1003
+ "0 0.009156 0.002628 0.068162 0.047942 0.140385 0.002122 0.729603 \n",
1004
+ "1 0.005967 0.002887 0.003810 0.704420 0.217761 0.004509 0.060646 \n",
1005
+ "2 0.041301 0.024568 0.104062 0.767237 0.042176 0.010860 0.009796 \n",
1006
+ "3 0.016036 0.060695 0.001692 0.161757 0.732686 0.020988 0.006147 \n",
1007
+ "4 0.013624 0.122243 0.095043 0.008336 0.272613 0.475880 0.012260 \n",
1008
+ "... ... ... ... ... ... ... ... \n",
1009
+ "5192 0.025156 0.001939 0.094667 0.002254 0.010511 0.857255 0.008218 \n",
1010
+ "5193 0.005602 0.003775 0.018216 0.400263 0.338892 0.005487 0.227765 \n",
1011
+ "5194 0.008463 0.009147 0.013295 0.620452 0.329754 0.010788 0.008101 \n",
1012
+ "5195 0.005475 0.034544 0.003970 0.258353 0.648011 0.017372 0.032275 \n",
1013
+ "5196 0.002837 0.003137 0.001166 0.958549 0.028252 0.002916 0.003142 \n",
1014
+ "\n",
1015
+ "[5197 rows x 21 columns]"
1016
+ ]
1017
+ },
1018
+ "execution_count": 18,
1019
+ "metadata": {},
1020
+ "output_type": "execute_result"
1021
+ }
1022
+ ],
1023
+ "source": [
1024
+ "books"
1025
+ ]
1026
+ },
1027
+ {
1028
+ "cell_type": "code",
1029
+ "execution_count": 19,
1030
+ "id": "4f4407d2",
1031
+ "metadata": {},
1032
+ "outputs": [],
1033
+ "source": [
1034
+ "books.to_csv(\"books_with_emotions.csv\",index=False)"
1035
+ ]
1036
+ }
1037
+ ],
1038
+ "metadata": {
1039
+ "kernelspec": {
1040
+ "display_name": "base",
1041
+ "language": "python",
1042
+ "name": "python3"
1043
+ },
1044
+ "language_info": {
1045
+ "codemirror_mode": {
1046
+ "name": "ipython",
1047
+ "version": 3
1048
+ },
1049
+ "file_extension": ".py",
1050
+ "mimetype": "text/x-python",
1051
+ "name": "python",
1052
+ "nbconvert_exporter": "python",
1053
+ "pygments_lexer": "ipython3",
1054
+ "version": "3.12.3"
1055
+ }
1056
+ },
1057
+ "nbformat": 4,
1058
+ "nbformat_minor": 5
1059
+ }
tagged_description.txt ADDED
The diff for this file is too large to render. See raw diff
 
text-classification.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
vector_search.ipynb ADDED
The diff for this file is too large to render. See raw diff