Spaces:

Felix273
/

Book-Recommender-System

Sleeping

App Files Files Community

github-actions[bot] commited on Oct 14, 2025

Commit

267ad08

1 Parent(s): 49bc1b1

Auto-deploy from GitHub Actions

Browse files

Files changed (17) hide show

app.py +1 -1
space_repo/requirements.txt +2 -2
space_repo/space_repo/space_repo/.gitignore +207 -0
space_repo/space_repo/space_repo/EDA/EDA.ipynb +0 -0
space_repo/space_repo/space_repo/LICENSE +21 -0
space_repo/space_repo/space_repo/README.md +27 -0
space_repo/space_repo/space_repo/app.py +125 -0
space_repo/space_repo/space_repo/data/book_cleaned.csv +3 -0
space_repo/space_repo/space_repo/data/book_with_categories.csv +3 -0
space_repo/space_repo/space_repo/data/books.csv +3 -0
space_repo/space_repo/space_repo/data/books_with_emotions.csv +3 -0
space_repo/space_repo/space_repo/data/recommendations.csv +3 -0
space_repo/space_repo/space_repo/data/tagged_description.txt +3 -0
space_repo/space_repo/space_repo/requirements.txt +3 -0
space_repo/space_repo/space_repo/sentiment_analysis.py +49 -0
space_repo/space_repo/space_repo/text_classification.py +124 -0
space_repo/space_repo/space_repo/vector_search.py +56 -0

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ load_dotenv()
 # =======================
 # Load and preprocess books
 # =======================
-books = pd.read_csv("books_with_emotions.csv")
 books["large_thumbnail"] = np.where(
     books["thumbnail"].notna(),

 # =======================
 # Load and preprocess books
 # =======================
+books = pd.read_csv("data/books_with_emotions.csv")
 books["large_thumbnail"] = np.where(
     books["thumbnail"].notna(),

space_repo/requirements.txt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:472496b2ad0b094965beb86014b030ece208be13538da848bf1c85c6d1ea2678
-size 313

 version https://git-lfs.github.com/spec/v1
+oid sha256:e8e7ea40c3f1142afd5bd94236b82b270c2f4cf4f2b600209c43416e461d89e7
+size 372

space_repo/space_repo/space_repo/.gitignore ADDED Viewed

	@@ -0,0 +1,207 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/

space_repo/space_repo/space_repo/EDA/EDA.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

space_repo/space_repo/space_repo/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Thien Phuc Nguyen
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

space_repo/space_repo/space_repo/README.md ADDED Viewed

	@@ -0,0 +1,27 @@

+---
+title: Sentiment Analysis Dashboard
+emoji: 📚
+colorFrom: indigo
+colorTo: purple
+sdk: gradio
+sdk_version: "4.39.0"
+app_file: app.py
+pinned: true
+---
+# Sentiment Analysis Dashboard 📊
+This project is an interactive Gradio web app for analyzing **book emotions** based on their descriptions.
+It combines NLP and emotion classification models to visualize the emotional distribution of books.
+## 🚀 Features
+- Upload or search books with emotion detection
+- Supports emotions: joy, sadness, anger, fear, disgust, surprise, and neutral
+- Visual charts and comparison between predicted vs. labeled emotions
+- Built with `transformers`, `pandas`, and `gradio`
+## 🧠 Model
+Uses a pre-trained Hugging Face emotion classifier fine-tuned on social media text.
+## ⚙️ Deployment
+CI/CD is automated via **GitHub Actions**, deploying directly to Hugging Face Spaces.

space_repo/space_repo/space_repo/app.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import pandas as pd
+import numpy as np
+from dotenv import load_dotenv
+import gradio as gr
+from langchain_community.document_loaders import TextLoader
+from langchain_openai import OpenAIEmbeddings
+from langchain_text_splitters import CharacterTextSplitter
+from langchain_chroma import Chroma
+# Load environment variables
+load_dotenv()
+# =======================
+# Load and preprocess books
+# =======================
+books = pd.read_csv("books_with_emotions.csv")
+books["large_thumbnail"] = np.where(
+    books["thumbnail"].notna(),
+    books["thumbnail"] + "&fife=w800",
+    "cover-not-found.jpg"
+)
+# =======================
+# Prepare Chroma vector DB
+# =======================
+raw_documents = TextLoader("tagged_description.txt").load()
+text_splitter = CharacterTextSplitter(chunk_size=1, chunk_overlap=0, separator="\n")
+documents = text_splitter.split_documents(raw_documents)
+db_books = Chroma.from_documents(documents, OpenAIEmbeddings())
+# =======================
+# Semantic retrieval logic
+# =======================
+def retrieve_semantic_recommendations(query: str,
+                                      category: str = "All",
+                                      tone: str = "All",
+                                      initial_top_k: int = 50,
+                                      final_top_k: int = 16) -> pd.DataFrame:
+    """Truy xuất danh sách gợi ý dựa trên ngữ nghĩa, danh mục và cảm xúc."""
+    recs = db_books.similarity_search(query, k=initial_top_k)
+    books_list = [int(rec.page_content.strip('"').split()[0]) for rec in recs]
+    # Lọc sách theo ISBN
+    book_recs = books[books["isbn13"].isin(books_list)].head(initial_top_k)
+    # Lọc theo category
+    if category != "All":
+        book_recs = book_recs[book_recs["simple_categories"] == category]
+    # Sắp xếp theo tone cảm xúc
+    tone_sort_map = {
+        "Happy": "joy",
+        "Surprising": "surprise",
+        "Angry": "anger",
+        "Suspenseful": "fear",
+        "Sad": "sadness"
+    }
+    if tone in tone_sort_map:
+        book_recs = book_recs.sort_values(by=tone_sort_map[tone], ascending=False)
+    return book_recs.head(final_top_k)
+# =======================
+# Recommendation formatting
+# =======================
+def recommend_books(query: str, category: str, tone: str):
+    recommendations = retrieve_semantic_recommendations(query, category, tone)
+    results = []
+    for _, row in recommendations.iterrows():
+        desc = row["description"].split()
+        truncated_description = " ".join(desc[:30]) + "..."
+        authors = row["authors"].split(";")
+        if len(authors) == 1:
+            authors_str = authors[0]
+        elif len(authors) == 2:
+            authors_str = f"{authors[0]} and {authors[1]}"
+        else:
+            authors_str = f"{', '.join(authors[:-1])}, and {authors[-1]}"
+        caption = f"{row['title']} by {authors_str}: {truncated_description}"
+        results.append((row["large_thumbnail"], caption))
+    return results
+# =======================
+# Build Gradio dashboard
+# =======================
+categories = ["All"] + sorted(books["simple_categories"].unique())
+tones = ["All", "Happy", "Surprising", "Angry", "Suspenseful", "Sad"]
+with gr.Blocks() as dashboard:
+    gr.Markdown("# 📚 Semantic Book Recommender")
+    with gr.Row():
+        user_query = gr.Textbox(
+            label="Please enter a description of a book:",
+            placeholder="e.g., A story about forgiveness"
+        )
+        category_dropdown = gr.Dropdown(
+            choices=categories,
+            label="Select a category:",
+            value="All"
+        )
+        tone_dropdown = gr.Dropdown(
+            choices=tones,
+            label="Select an emotional tone:",
+            value="All"
+        )
+        submit_button = gr.Button("Find recommendations")
+    gr.Markdown("## 🧠 Recommendations")
+    output = gr.Gallery(label="Recommended books", columns=8, rows=2)
+    submit_button.click(fn=recommend_books,
+                        inputs=[user_query, category_dropdown, tone_dropdown],
+                        outputs=output)
+if __name__ == "__main__":
+    dashboard.launch()

space_repo/space_repo/space_repo/data/book_cleaned.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d34b72aed224acefc8fd234b63f242e3ddcbdea95c04082f4980d4909ea00b1
+size 6421116

space_repo/space_repo/space_repo/data/book_with_categories.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:efd1c17d252abcb2d30f5eea07faedc8180c7ce0e23fccb8613eb6d42c9c88d5
+size 6468698

space_repo/space_repo/space_repo/data/books.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64a19afd3df7170d153bb5b7e1afdfed4a05e2fe3ac163a168867596f2515e43
+size 4142211

space_repo/space_repo/space_repo/data/books_with_emotions.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e94e95dc2caec7a9bf8044dda0611d0837428cce1873eae15c7043786ecdfb81
+size 7185778

space_repo/space_repo/space_repo/data/recommendations.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f340096a7be4a17caa068d89af79b6e69f0360512cfb3664dcee510a234b88a
+size 13787

space_repo/space_repo/space_repo/data/tagged_description.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0cacf3c12e1418e194fa09f06006de8c202fa40c97109ef183ae7f4b23b41712
+size 2607714

space_repo/space_repo/space_repo/requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:472496b2ad0b094965beb86014b030ece208be13538da848bf1c85c6d1ea2678
+size 313

space_repo/space_repo/space_repo/sentiment_analysis.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+from transformers import pipeline
+# Initialize the emotion classifier
+classifier = pipeline(
+    "text-classification",
+    model="j-hartmann/emotion-english-distilroberta-base",
+    top_k=None,
+    device="mps"
+)
+books = pd.read_csv("data/book_with_categories.csv")
+# Define emotion labels
+emotion_labels = ['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise']
+def calculate_max_emotion_score(predictions):
+    """Calculate maximum score for each emotion from predictions."""
+    per_emotion_scores = {label: [] for label in emotion_labels}
+    for pred in predictions:
+        sorted_pred = sorted(pred, key=lambda x: x['label'])
+        for idx, label in enumerate(emotion_labels):
+            per_emotion_scores[label].append(sorted_pred[idx]['score'])
+    return {label: np.max(scores) for label, scores in per_emotion_scores.items()}
+# Process sentiment analysis for all books
+emotion_scores = {label: [] for label in emotion_labels}
+isbn = []
+for i in tqdm(range(len(books)), desc="Processing books"):
+    isbn.append(books.iloc[i]['isbn13'])
+    sentences = books.iloc[i]['description'].split('.')
+    predictions = classifier(sentences)
+    max_scores = calculate_max_emotion_score(predictions)
+    for label in emotion_labels:
+        emotion_scores[label].append(max_scores[label])
+emotions_df = pd.DataFrame(emotion_scores)
+emotions_df["isbn13"] = isbn
+# Merge with original books DataFrame
+books = pd.merge(books, emotions_df, on="isbn13")
+books.to_csv('data/books_with_emotions.csv', index=False)
+print("Sentiment analysis completed and saved to 'data/books_with_emotions.csv'")

space_repo/space_repo/space_repo/text_classification.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+from transformers import pipeline
+# Load data
+books = pd.read_csv('data/book_cleaned.csv')
+# Define category mapping for simplification
+categories_mapping = {
+    'Fiction': 'Fiction',
+    'Juvenile Fiction': 'Fiction',
+    'Biography & Autobiography': 'Nonfiction',
+    'History': 'Nonfiction',
+    'Literary Criticism': 'Nonfiction',
+    'Philosophy': 'Nonfiction',
+    'Religion': 'Nonfiction',
+    'Comics & Graphic Novels': 'Fiction',
+    'Drama': 'Fiction',
+    'Juvenile Nonfiction': 'Nonfiction',
+    'Science': 'Nonfiction',
+    'Poetry': 'Fiction'
+}
+# Apply category mapping
+books['simple_categories'] = books['categories'].map(categories_mapping)
+# Initialize zero-shot classifier
+fiction_categories = ['Fiction', 'Nonfiction']
+classifier = pipeline(
+    "zero-shot-classification",
+    model="facebook/bart-large-mnli",
+    device="mps"
+)
+def generate_prediction(sequence: str, categories: list, classifier: pipeline) -> str:
+    """
+    Generate predicted category for a given sequence using a zero-shot classifier.
+    Args:
+        sequence (str): Input text to classify.
+        categories (list): List of possible categories.
+        classifier (pipeline): Hugging Face zero-shot classification pipeline.
+    Returns:
+        str: Predicted category label.
+    Raises:
+        ValueError: If sequence is empty or invalid.
+        RuntimeError: If classifier output is invalid.
+    """
+    if not sequence or not isinstance(sequence, str):
+        raise ValueError("Sequence must be a non-empty string")
+    if not categories or not isinstance(categories, list):
+        raise ValueError("Categories must be a non-empty list")
+    try:
+        sequence = sequence[:512] if len(sequence) > 512 else sequence
+        result = classifier(sequence, candidate_labels=categories, multi_label=False)
+        if not isinstance(result, dict) or 'labels' not in result or 'scores' not in result:
+            raise RuntimeError(f"Unexpected classifier output: {result}")
+        max_idx = np.argmax(result['scores'])
+        return result['labels'][max_idx]
+    except Exception as e:
+        raise RuntimeError(f"Error in prediction: {str(e)}")
+# Evaluate classifier on a sample of known categories
+actual_cats = []
+pred_cats = []
+# Process Fiction samples
+fiction_descriptions = books.loc[books['simple_categories'] == "Fiction", "description"].reset_index(drop=True)
+for i in tqdm(range(min(200, len(fiction_descriptions))), desc="Processing Fiction"):
+    try:
+        actual_cats.append("Fiction")
+        pred_cats.append(generate_prediction(fiction_descriptions[i], fiction_categories, classifier))
+    except Exception as e:
+        print(f"Error processing Fiction sample {i}: {str(e)}")
+        pred_cats.append("Unknown")  # Fallback category
+# Process Nonfiction samples
+nonfiction_descriptions = books.loc[books['simple_categories'] == "Nonfiction", "description"].reset_index(drop=True)
+for i in tqdm(range(min(200, len(nonfiction_descriptions))), desc="Processing Nonfiction"):
+    try:
+        actual_cats.append("Nonfiction")
+        pred_cats.append(generate_prediction(nonfiction_descriptions[i], fiction_categories, classifier))
+    except Exception as e:
+        print(f"Error processing Nonfiction sample {i}: {str(e)}")
+        pred_cats.append("Unknown")  # Fallback category
+# Create predictions DataFrame and calculate accuracy
+preds_df = pd.DataFrame({"actual_cats": actual_cats, "pred_cats": pred_cats})
+preds_df["correct_pred"] = (preds_df["actual_cats"] == preds_df["pred_cats"]).astype(int)
+accuracy = preds_df["correct_pred"].mean()
+print(f"Classification accuracy: {accuracy:.4f}")
+# Predict categories for missing values
+missing_cat = books.loc[books["simple_categories"].isna(), ["isbn13", "description"]].reset_index(drop=True)
+isbn = []
+preds = []
+for i in tqdm(range(len(missing_cat)), desc="Predicting missing categories"):
+    try:
+        isbn.append(missing_cat['isbn13'][i])
+        preds.append(generate_prediction(missing_cat['description'][i], fiction_categories, classifier))
+    except Exception as e:
+        print(f"Error predicting for ISBN {missing_cat['isbn13'][i]}: {str(e)}")
+        preds.append("Unknown")  # Fallback category
+        isbn.append(missing_cat['isbn13'][i])
+# Create DataFrame for predicted categories
+missing_preds_df = pd.DataFrame({"isbn13": isbn, "predicted_categories": preds})
+# Merge predictions and fill missing categories
+books = pd.merge(books, missing_preds_df, on="isbn13", how="left")
+books["simple_categories"] = books["simple_categories"].fillna(books["predicted_categories"])
+books = books.drop(columns=["predicted_categories"])
+# Save updated DataFrame
+books.to_csv('data/book_with_categories.csv', index=False)
+print("Category classification completed and saved to 'data/book_with_categories.csv'")
+print("Category distribution:")
+print(books['simple_categories'].value_counts())

space_repo/space_repo/space_repo/vector_search.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import pandas as pd
+import numpy as np
+from langchain_community.document_loaders import TextLoader
+from langchain_text_splitters import CharacterTextSplitter
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_openai import OpenAIEmbeddings
+from langchain_chroma import Chroma
+from langchain.embeddings import SentenceTransformerEmbeddings
+# Load data (assuming db_books is a FAISS vector store)
+books = pd.read_csv('data/book_with_categories.csv')
+def retrieve_semantic_recommendations(query: str, top_k: int = 10, db_books=None) -> pd.DataFrame:
+    """
+    Retrieve top-k book recommendations based on semantic similarity to the query.
+    Args:
+        query (str): The search query.
+        top_k (int): Number of recommendations to return.
+        db_books: FAISS vector store containing book embeddings.
+    Returns:
+        pd.DataFrame: DataFrame with top-k book recommendations.
+    """
+    if db_books is None:
+        raise ValueError("db_books vector store is required")
+    # Perform similarity search
+    recs = db_books.similarity_search(query, k=50)
+    # Extract ISBNs from search results
+    books_list = [int(rec.page_content.strip('"').split()[0]) for rec in recs]
+    # Filter books DataFrame and return top-k
+    return books[books["isbn13"].isin(books_list)].head(top_k)
+if __name__ == "__main__":
+    books = pd.read_csv('data/book_cleaned.csv')
+    books['tagged_description'].to_csv('data/tagged_description.txt',
+                                    sep='\n',
+                                    index=False,
+                                    header=False)
+    raw_docs = TextLoader('data/tagged_description.txt', encoding='utf-8').load()
+    text_splitter = CharacterTextSplitter(chunk_size=1, chunk_overlap=0, separator="\n")
+    documents = text_splitter.split_documents(raw_docs)
+    embedding = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")
+    db_books = Chroma.from_documents(
+    documents,
+    embedding=embedding)
+    query = "A book to teach children about nature"
+    recommendations = retrieve_semantic_recommendations(query, top_k=10, db_books=db_books)
+    print(recommendations)