Spaces:
Build error
Build error
| import numpy as np | |
| import pandas as pd | |
| import gradio as gr | |
| import torch | |
| from langchain_community.document_loaders import TextLoader | |
| from langchain_text_splitters import CharacterTextSplitter | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_chroma import Chroma | |
| books = pd.read_csv('data/books_with_emotions.csv') | |
| # Book Thumbnail | |
| books['large_thumbnail'] = books['thumbnail'] + '&fife=w800' | |
| books['large_thumbnail'] = np.where( | |
| books['large_thumbnail'].isna(), | |
| 'cover-not-found.jpg', | |
| books['large_thumbnail'] | |
| ) | |
| # Create Vector Database | |
| raw_docs = TextLoader('./data/full_desc.txt', encoding='utf-8').load() | |
| text_splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0, separator='\n') | |
| docs = text_splitter.split_documents(raw_docs) | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name='sentence-transformers/all-MiniLM-L6-v2', | |
| model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'} | |
| ) | |
| database = Chroma.from_documents(docs, embeddings) | |
| # Retrieval | |
| def retrieval(query: str, category: str=None, tone: str=None, init_top_k: int=80, final_top_k: int=16) -> pd.DataFrame: | |
| # recs = database.similarity_search_with_score(query, k=init_top_k) | |
| recs = database.similarity_search(query, k=init_top_k) | |
| ids = [int(rec.page_content.strip('"').split()[0]) for rec in recs] | |
| matches = books[books['isbn13'].isin(ids)] | |
| if category != 'All': | |
| matches = matches[matches['final_categories'] == category] | |
| matches = matches.head(final_top_k) | |
| if tone == 'Happy': | |
| matches.sort_values(by='joy', ascending=False, inplace=True) | |
| elif tone == 'Surprising': | |
| matches.sort_values(by='surprise', ascending=False, inplace=True) | |
| elif tone == 'Angry': | |
| matches.sort_values(by='anger', ascending=False, inplace=True) | |
| elif tone == 'Suspenseful': | |
| matches.sort_values(by='fear', ascending=False, inplace=True) | |
| elif tone == 'Sad': | |
| matches.sort_values(by='sadness', ascending=False, inplace=True) | |
| return matches | |
| # Recommendation | |
| def recommend(query: str, category: str, tone: str): | |
| recs = retrieval(query, category, tone) | |
| results = [] | |
| for _, row in recs.iterrows(): | |
| description = ' '.join(row['description'].split()[:30]) + '...' | |
| authors = row['authors'].split(';') | |
| if len(authors) == 2: | |
| authors_str = authors[0] + ' and ' + authors[1] | |
| elif len(authors) > 2: | |
| authors_str = ', '.join(authors[:-1]) + ' and ' + authors[-1] | |
| else: | |
| authors_str = authors[0] | |
| caption = f"{row['full_title']} by {authors_str}: {description}" | |
| results.append([ | |
| row['large_thumbnail'], | |
| caption, | |
| ]) | |
| return results | |
| # Dashboard | |
| categories = ['All'] + sorted(books['final_categories'].unique()) | |
| tones = ['All'] + ['Happy', 'Surprising', 'Angry', 'Suspenseful', 'Sad'] | |
| with gr.Blocks(theme=gr.themes.Glass()) as dashboard: | |
| gr.Markdown('# Semantics Book Recommendation System') | |
| with gr.Row(): | |
| user_query = gr.Textbox( | |
| label='Please enter the description of the book you want to read', | |
| placeholder='e.g. A story about a boy who ...', | |
| ) | |
| category = gr.Dropdown( | |
| choices=categories, | |
| label='Select a category', | |
| value='All' | |
| ) | |
| tone = gr.Dropdown( | |
| choices=tones, | |
| label='Select an emotional tone', | |
| value='All' | |
| ) | |
| btn = gr.Button('Find books') | |
| gr.Markdown('## Recommendations') | |
| output = gr.Gallery( | |
| label='Recommended Books', | |
| columns=8, rows=2 | |
| ) | |
| btn.click( | |
| fn=recommend, | |
| inputs=[user_query, category, tone], | |
| outputs=output | |
| ) | |
| if __name__ == '__main__': | |
| dashboard.launch(share=True) |