Spaces:

CSAT
/

bookengine

Sleeping

App Files Files Community

CSAT commited on Apr 9, 2025

Commit

20ce80b

verified ·

1 Parent(s): 22bd453

Update app.py

Browse files

Files changed (1) hide show

app.py +184 -168

app.py CHANGED Viewed

@@ -1,168 +1,184 @@
-import streamlit as st
-import pickle
-import polars as pl
-import re
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.neighbors import NearestNeighbors
-import matplotlib.pyplot as plt
-import seaborn as sns
-# Set page configuration
-st.set_page_config(
-    page_title="Book Recommendation System",
-    page_icon="📚",
-    layout="wide"
-)
-# App title and description
-st.title("📚 Book Recommendation System")
-st.markdown("Enter a book summary and genres to get personalized book recommendations!")
-# Load the TF-IDF vectorizer
-@st.cache_resource
-def load_models():
-    with open('tfidf_vectorizer.pkl', 'rb') as f:
-        tfidf = pickle.load(f)
-    # Load the KNN model
-    with open('knn_model.pkl', 'rb') as f:
-        knn_model = pickle.load(f)
-    return tfidf, knn_model
-# Load the dataset
-@st.cache_data
-def load_data():
-    df_lazy = pl.scan_csv('goodreadsV5.csv')
-    df_cleaned = (
-        df_lazy.drop_nulls(subset=['name', 'summary', 'genres'])
-        .with_columns([
-            (pl.col('summary') + ' ' + pl.col('genres')).alias('combined_features')
-        ])
-    ).collect()
-    # Apply preprocessing to create the 'processed_features' column
-    df_cleaned = df_cleaned.with_columns([
-        pl.col('combined_features')
-        .map_elements(preprocess_text, return_dtype=pl.Utf8)
-        .alias('processed_features')
-    ])
-    return df_cleaned
-# Define the preprocessing function
-def preprocess_text(text):
-    return re.sub(r'[^a-zA-Z0-9\s]', '', text.lower())
-# Recommendation function for out-of-dataset books
-def recommend_books_knn_out_of_dataset(input_summary, input_genres, top_n=5):
-    # Combine and preprocess the input book's features
-    combined_input = f"{input_summary} {input_genres}"
-    processed_input = preprocess_text(combined_input)
-    # Transform the input book's features using the loaded TF-IDF vectorizer
-    input_vector = tfidf.transform([processed_input])
-    # Find the nearest neighbors using the loaded KNN model
-    distances, indices = knn_model.kneighbors(input_vector, n_neighbors=top_n)
-    # Retrieve the recommended book titles and additional information
-    recommendations = []
-    for i, idx in enumerate(indices.flatten()):
-        book_info = {
-            "title": df_cleaned['name'][idx],
-            "summary": df_cleaned['summary'][idx],
-            "genres": df_cleaned['genres'][idx],
-            "similarity_score": 1 - distances.flatten()[i]  # Convert distance to similarity
-        }
-        recommendations.append(book_info)
-    return recommendations
-# Load models and data
-try:
-    tfidf, knn_model = load_models()
-    df_cleaned = load_data()
-    models_loaded = True
-except Exception as e:
-    st.error(f"Error loading models or data: {e}")
-    models_loaded = False
-# Sidebar for inputs
-st.sidebar.header("Input Parameters")
-# Input fields
-input_summary = st.sidebar.text_area("Book Summary",
-                                    placeholder="Enter a brief summary of the book...",
-                                    height=150)
-input_genres = st.sidebar.text_input("Genres",
-                                    placeholder="E.g., fantasy, adventure, mystery")
-# Number of recommendations slider
-num_recommendations = st.sidebar.slider("Number of Recommendations",
-                                        min_value=1,
-                                        max_value=10,
-                                        value=5)
-# Get recommendations button
-if st.sidebar.button("Get Recommendations") and models_loaded:
-    if input_summary and input_genres:
-        with st.spinner("Finding the perfect books for you..."):
-            # Get recommendations
-            recommendations = recommend_books_knn_out_of_dataset(
-                input_summary,
-                input_genres,
-                top_n=num_recommendations
-            )
-            # Display recommendations
-            st.header("Recommended Books")
-            # Create columns for book cards
-            cols = st.columns(min(3, num_recommendations))
-            for i, book in enumerate(recommendations):
-                col_idx = i % 3
-                with cols[col_idx]:
-                    st.subheader(book["title"])
-                    st.markdown(f"**Genres:** {book['genres']}")
-                    st.markdown(f"**Similarity Score:** {book['similarity_score']:.2f}")
-                    with st.expander("Summary"):
-                        st.write(book["summary"])
-                    st.divider()
-            # Visualization of similarity scores
-            st.header("Similarity Scores")
-            fig, ax = plt.subplots(figsize=(10, 5))
-            book_titles = [book["title"] for book in recommendations]
-            similarity_scores = [book["similarity_score"] for book in recommendations]
-            # Create horizontal bar chart
-            sns.barplot(x=similarity_scores, y=book_titles, palette="viridis", ax=ax)
-            ax.set_xlabel("Similarity Score")
-            ax.set_ylabel("Book Title")
-            ax.set_title("Book Recommendation Similarity Scores")
-            st.pyplot(fig)
-    else:
-        st.warning("Please enter both a summary and genres to get recommendations.")
-# Add some information about the app
-st.sidebar.markdown("---")
-st.sidebar.header("About")
-st.sidebar.info(
-    """
-    This app uses TF-IDF vectorization and K-Nearest Neighbors to recommend books
-    based on your input summary and genres.
-    The recommendations are based on textual similarity between your input and
-    our database of books from Goodreads.
-    """
-)
-# Add a footer
-st.markdown("---")
-st.markdown("📚 Book Recommendation System | Created with Streamlit")

+import streamlit as st
+import pickle
+import polars as pl
+import re
+import pandas as pd
+import numpy as np
+from collections import Counter
+st.set_page_config(page_title="Book Recommendation Engine", layout="wide")
+@st.cache_resource
+def load_models():
+    # Load the TF-IDF vectorizer
+    with open('tfidf_vectorizer.pkl', 'rb') as f:
+        tfidf = pickle.load(f)
+    # Load the KNN model
+    with open('knn_model.pkl', 'rb') as f:
+        knn_model = pickle.load(f)
+    return tfidf, knn_model
+@st.cache_data
+def load_data():
+    # Load the dataset
+    df_lazy = pl.scan_csv('goodreadsV5.csv')
+    df_cleaned = (
+        df_lazy.drop_nulls(subset=['name', 'summary', 'genres'])
+        .with_columns([
+            (pl.col('summary') + ' ' + pl.col('genres')).alias('combined_features')
+        ])
+    ).collect()
+    # Apply preprocessing to create the 'processed_features' column
+    df_cleaned = df_cleaned.with_columns([
+        pl.col('combined_features')
+        .map_elements(preprocess_text, return_dtype=pl.Utf8)
+        .alias('processed_features')
+    ])
+    # Convert to pandas for easier indexing with KNN results
+    df_pandas = df_cleaned.to_pandas()
+    return df_cleaned, df_pandas
+# Define the preprocessing function
+def preprocess_text(text):
+    return re.sub(r'[^a-zA-Z0-9\s]', '', text.lower())
+# Recommendation function for out-of-dataset books
+def recommend_books_knn_out_of_dataset(df_pandas, tfidf, knn_model, input_summary, input_genres, top_n=5):
+    # Combine and preprocess the input book's features
+    combined_input = f"{input_summary} {input_genres}"
+    processed_input = preprocess_text(combined_input)
+    # Transform the input book's features using the loaded TF-IDF vectorizer
+    input_vector = tfidf.transform([processed_input])
+    # Find the nearest neighbors using the loaded KNN model
+    distances, indices = knn_model.kneighbors(input_vector, n_neighbors=top_n)
+    # Retrieve the recommended book information using pandas DataFrame
+    recommendations = []
+    for i, idx in enumerate(indices.flatten()):
+        book = {
+            "title": df_pandas.iloc[idx]['name'],
+            "summary": df_pandas.iloc[idx]['summary'],
+            "genres": df_pandas.iloc[idx]['genres'],
+            "similarity_score": 1 - distances.flatten()[i]  # Convert distance to similarity score
+        }
+        recommendations.append(book)
+    return recommendations
+def main():
+    st.title("📚 Book Recommendation Engine")
+    # Initialize session state variables if they don't exist
+    if 'example_summary' not in st.session_state:
+        st.session_state['example_summary'] = ""
+    if 'example_genres' not in st.session_state:
+        st.session_state['example_genres'] = ""
+    if 'run_example' not in st.session_state:
+        st.session_state['run_example'] = False
+    try:
+        # Load models and data
+        tfidf, knn_model = load_models()
+        df_cleaned, df_pandas = load_data()
+        # Pre-fill with example if one was selected
+        default_summary = st.session_state['example_summary'] if st.session_state['run_example'] else "A fantasy adventure about a young wizard learning magic."
+        default_genres = st.session_state['example_genres'] if st.session_state['run_example'] else "fantasy, adventure, magic"
+        # Main content
+        st.subheader("Find Book Recommendations")
+        st.write("Enter a book summary and genres to get personalized recommendations.")
+        col1, col2 = st.columns(2)
+        with col1:
+            input_summary = st.text_area("Book Summary", default_summary, height=150)
+        with col2:
+            input_genres = st.text_input("Genres (comma-separated)", default_genres)
+            num_recommendations = st.slider("Number of Recommendations",
+                                           min_value=1, max_value=20, value=5)
+        # Display recommendations immediately if example was selected
+        if st.session_state['run_example'] or st.button("Get Recommendations", type="primary"):
+            with st.spinner("Finding the best book matches for you..."):
+                # Use the current input values, which may come from examples or user input
+                recommendations = recommend_books_knn_out_of_dataset(
+                    df_pandas, tfidf, knn_model, input_summary, input_genres, num_recommendations
+                )
+            st.subheader("📚 Your Recommended Books")
+            for i, book in enumerate(recommendations):
+                with st.expander(f"{i+1}. {book['title']}"):
+                    st.markdown(f"**Summary:** {book['summary']}")
+                    st.markdown(f"**Genres:** {book['genres']}")
+            # Reset the example flag so it doesn't run again on rerender
+            st.session_state['run_example'] = False
+        # Example tabs section
+        st.subheader("Try these examples")
+        example_tabs = st.tabs(["Fantasy Adventure", "Romance", "Science Fiction", "Mystery"])
+        def set_example(summary, genres):
+            st.session_state['example_summary'] = summary
+            st.session_state['example_genres'] = genres
+            st.session_state['run_example'] = True
+            st.rerun()
+        with example_tabs[0]:
+            st.write("A magical journey through enchanted lands with dragons and wizards.")
+            st.write("Genres: fantasy, adventure, magic")
+            if st.button("Use this example", key="ex1"):
+                set_example(
+                    "A magical journey through enchanted lands with dragons and wizards.",
+                    "fantasy, adventure, magic"
+                )
+        with example_tabs[1]:
+            st.write("A love story between two people from different worlds who meet by chance.")
+            st.write("Genres: romance, contemporary, drama")
+            if st.button("Use this example", key="ex2"):
+                set_example(
+                    "A love story between two people from different worlds who meet by chance.",
+                    "romance, contemporary, drama"
+                )
+        with example_tabs[2]:
+            st.write("Space explorers discover an alien civilization that challenges their understanding of humanity.")
+            st.write("Genres: science fiction, space, aliens")
+            if st.button("Use this example", key="ex3"):
+                set_example(
+                    "Space explorers discover an alien civilization that challenges their understanding of humanity.",
+                    "science fiction, space, aliens"
+                )
+        with example_tabs[3]:
+            st.write("A detective investigates a series of mysterious disappearances in a small town.")
+            st.write("Genres: mystery, thriller, crime")
+            if st.button("Use this example", key="ex4"):
+                set_example(
+                    "A detective investigates a series of mysterious disappearances in a small town.",
+                    "mystery, thriller, crime"
+                )
+    except Exception as e:
+        st.error(f"An error occurred: {e}")
+        st.info("Make sure you have the required model files (tfidf_vectorizer.pkl, knn_model.pkl) and dataset (goodreadsV2.csv) in the same directory as this app.")
+        st.code("""
+# Files needed:
+- tfidf_vectorizer.pkl: Your trained TF-IDF vectorizer
+- knn_model.pkl: Your trained KNN model
+- goodreadsV2.csv: Your dataset with book information
+        """)
+if __name__ == "__main__":
+    main()