import os import pandas as pd import numpy as np import gc # Environment variables (set in HF Spaces settings) GROQ_API_KEY = os.getenv("GROQ_API_KEY") HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") # ----------------------------- # LANGCHAIN IMPORTS # ----------------------------- from langchain_huggingface import HuggingFaceEmbeddings from langchain_community.document_loaders import TextLoader from langchain_text_splitters import CharacterTextSplitter from langchain_community.vectorstores import Chroma # Gradio import gradio as gr print("Loading book data...") # ----------------------------- # LOAD BOOK DATA # ----------------------------- books = pd.read_csv("books_with_emotions.csv") books["large_thumbnail"] = books["thumbnail"] + "&fife=w800" books["large_thumbnail"] = np.where( books["large_thumbnail"].isna(), "cover-not-found.jpg", books["large_thumbnail"], ) print("Loading documents...") # ----------------------------- # LOAD DOCUMENTS FOR SEMANTIC INDEX # ----------------------------- file_path = "tagged_description.txt" loader = TextLoader(file_path, encoding="utf-8") raw_documents = loader.load() print(f"Loaded {len(raw_documents)} documents") text_splitter = CharacterTextSplitter( separator="\n", chunk_size=1, chunk_overlap=0 ) documents = text_splitter.split_documents(raw_documents) del raw_documents, loader gc.collect() print("Initializing embeddings model...") # ----------------------------- # CREATE VECTOR STORE # ----------------------------- embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': True} ) print("Creating vector database...") db_books = Chroma.from_documents( documents, embedding=embeddings, persist_directory="./chroma_db" ) del documents, text_splitter gc.collect() print("Application ready!") def retrieve_semantic_recommendations( query: str, category: str = None, tone: str = None, initial_top_k: int = 50, final_top_k: int = 16, ) -> pd.DataFrame: recs = db_books.similarity_search(query, k=initial_top_k) books_list = [int(rec.page_content.strip('"').split()[0]) for rec in recs] book_recs = books[books["isbn13"].isin(books_list)].head(initial_top_k) if category != "All": book_recs = book_recs[book_recs["simple_categories"] == category].head(final_top_k) else: book_recs = book_recs.head(final_top_k) if tone == "Happy": book_recs.sort_values(by="joy", ascending=False, inplace=True) elif tone == "Surprising": book_recs.sort_values(by="surprise", ascending=False, inplace=True) elif tone == "Angry": book_recs.sort_values(by="anger", ascending=False, inplace=True) elif tone == "Suspenseful": book_recs.sort_values(by="fear", ascending=False, inplace=True) elif tone == "Sad": book_recs.sort_values(by="sadness", ascending=False, inplace=True) return book_recs def recommend_books(query: str, category: str, tone: str): try: recommendations = retrieve_semantic_recommendations(query, category, tone) results = [] for _, row in recommendations.iterrows(): description = row["description"] truncated_desc_split = description.split() truncated_description = " ".join(truncated_desc_split[:30]) + "..." authors_split = row["authors"].split(";") if len(authors_split) == 2: authors_str = f"{authors_split[0]} and {authors_split[1]}" elif len(authors_split) > 2: authors_str = f"{', '.join(authors_split[:-1])}, and {authors_split[-1]}" else: authors_str = row["authors"] caption = f"{row['title']} by {authors_str}: {truncated_description}" results.append((row["large_thumbnail"], caption)) gc.collect() return results except Exception as e: print(f"Error: {e}") return [] categories = ["All"] + sorted(books["simple_categories"].unique()) tones = ["All"] + ["Happy", "Surprising", "Angry", "Suspenseful", "Sad"] with gr.Blocks(theme=gr.themes.Glass()) as dashboard: gr.Markdown("# 📚 Semantic Book Recommender") gr.Markdown("Find your next favorite book using AI-powered semantic search!") with gr.Row(): user_query = gr.Textbox( label="Describe the book you're looking for:", placeholder="e.g., A story about forgiveness and redemption", scale=2 ) with gr.Row(): category_dropdown = gr.Dropdown( choices=categories, label="Category:", value="All", scale=1 ) tone_dropdown = gr.Dropdown( choices=tones, label="Emotional Tone:", value="All", scale=1 ) submit_button = gr.Button("🔍 Find Books", variant="primary", scale=1) gr.Markdown("## 📖 Recommendations") output = gr.Gallery(label="Recommended Books", columns=4, rows=4, height="auto") submit_button.click( fn=recommend_books, inputs=[user_query, category_dropdown, tone_dropdown], outputs=output ) user_query.submit( fn=recommend_books, inputs=[user_query, category_dropdown, tone_dropdown], outputs=output ) if __name__ == "__main__": dashboard.launch()