Spaces:

CSAT
/

bookengine

Sleeping

App Files Files Community

CSAT commited on Apr 9, 2025

Commit

22bd453

verified ·

1 Parent(s): 5ad083d

Upload 2 files

Browse files

Files changed (2) hide show

app.py +168 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import streamlit as st
+import pickle
+import polars as pl
+import re
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.neighbors import NearestNeighbors
+import matplotlib.pyplot as plt
+import seaborn as sns
+# Set page configuration
+st.set_page_config(
+    page_title="Book Recommendation System",
+    page_icon="📚",
+    layout="wide"
+)
+# App title and description
+st.title("📚 Book Recommendation System")
+st.markdown("Enter a book summary and genres to get personalized book recommendations!")
+# Load the TF-IDF vectorizer
+@st.cache_resource
+def load_models():
+    with open('tfidf_vectorizer.pkl', 'rb') as f:
+        tfidf = pickle.load(f)
+    # Load the KNN model
+    with open('knn_model.pkl', 'rb') as f:
+        knn_model = pickle.load(f)
+    return tfidf, knn_model
+# Load the dataset
+@st.cache_data
+def load_data():
+    df_lazy = pl.scan_csv('goodreadsV5.csv')
+    df_cleaned = (
+        df_lazy.drop_nulls(subset=['name', 'summary', 'genres'])
+        .with_columns([
+            (pl.col('summary') + ' ' + pl.col('genres')).alias('combined_features')
+        ])
+    ).collect()
+    # Apply preprocessing to create the 'processed_features' column
+    df_cleaned = df_cleaned.with_columns([
+        pl.col('combined_features')
+        .map_elements(preprocess_text, return_dtype=pl.Utf8)
+        .alias('processed_features')
+    ])
+    return df_cleaned
+# Define the preprocessing function
+def preprocess_text(text):
+    return re.sub(r'[^a-zA-Z0-9\s]', '', text.lower())
+# Recommendation function for out-of-dataset books
+def recommend_books_knn_out_of_dataset(input_summary, input_genres, top_n=5):
+    # Combine and preprocess the input book's features
+    combined_input = f"{input_summary} {input_genres}"
+    processed_input = preprocess_text(combined_input)
+    # Transform the input book's features using the loaded TF-IDF vectorizer
+    input_vector = tfidf.transform([processed_input])
+    # Find the nearest neighbors using the loaded KNN model
+    distances, indices = knn_model.kneighbors(input_vector, n_neighbors=top_n)
+    # Retrieve the recommended book titles and additional information
+    recommendations = []
+    for i, idx in enumerate(indices.flatten()):
+        book_info = {
+            "title": df_cleaned['name'][idx],
+            "summary": df_cleaned['summary'][idx],
+            "genres": df_cleaned['genres'][idx],
+            "similarity_score": 1 - distances.flatten()[i]  # Convert distance to similarity
+        }
+        recommendations.append(book_info)
+    return recommendations
+# Load models and data
+try:
+    tfidf, knn_model = load_models()
+    df_cleaned = load_data()
+    models_loaded = True
+except Exception as e:
+    st.error(f"Error loading models or data: {e}")
+    models_loaded = False
+# Sidebar for inputs
+st.sidebar.header("Input Parameters")
+# Input fields
+input_summary = st.sidebar.text_area("Book Summary",
+                                    placeholder="Enter a brief summary of the book...",
+                                    height=150)
+input_genres = st.sidebar.text_input("Genres",
+                                    placeholder="E.g., fantasy, adventure, mystery")
+# Number of recommendations slider
+num_recommendations = st.sidebar.slider("Number of Recommendations",
+                                        min_value=1,
+                                        max_value=10,
+                                        value=5)
+# Get recommendations button
+if st.sidebar.button("Get Recommendations") and models_loaded:
+    if input_summary and input_genres:
+        with st.spinner("Finding the perfect books for you..."):
+            # Get recommendations
+            recommendations = recommend_books_knn_out_of_dataset(
+                input_summary,
+                input_genres,
+                top_n=num_recommendations
+            )
+            # Display recommendations
+            st.header("Recommended Books")
+            # Create columns for book cards
+            cols = st.columns(min(3, num_recommendations))
+            for i, book in enumerate(recommendations):
+                col_idx = i % 3
+                with cols[col_idx]:
+                    st.subheader(book["title"])
+                    st.markdown(f"**Genres:** {book['genres']}")
+                    st.markdown(f"**Similarity Score:** {book['similarity_score']:.2f}")
+                    with st.expander("Summary"):
+                        st.write(book["summary"])
+                    st.divider()
+            # Visualization of similarity scores
+            st.header("Similarity Scores")
+            fig, ax = plt.subplots(figsize=(10, 5))
+            book_titles = [book["title"] for book in recommendations]
+            similarity_scores = [book["similarity_score"] for book in recommendations]
+            # Create horizontal bar chart
+            sns.barplot(x=similarity_scores, y=book_titles, palette="viridis", ax=ax)
+            ax.set_xlabel("Similarity Score")
+            ax.set_ylabel("Book Title")
+            ax.set_title("Book Recommendation Similarity Scores")
+            st.pyplot(fig)
+    else:
+        st.warning("Please enter both a summary and genres to get recommendations.")
+# Add some information about the app
+st.sidebar.markdown("---")
+st.sidebar.header("About")
+st.sidebar.info(
+    """
+    This app uses TF-IDF vectorization and K-Nearest Neighbors to recommend books
+    based on your input summary and genres.
+    The recommendations are based on textual similarity between your input and
+    our database of books from Goodreads.
+    """
+)
+# Add a footer
+st.markdown("---")
+st.markdown("📚 Book Recommendation System | Created with Streamlit")

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit
+polars
+scikit-learn
+matplotlib
+seaborn
+requests