Spaces:

AI-Manith
/

bookRecEngine

Sleeping

App Files Files Community

AI-Manith commited on Apr 6, 2025

Commit

989865d

verified ·

1 Parent(s): 27abc1f

Create app.py

Browse files

Files changed (1) hide show

app.py +191 -0

app.py ADDED Viewed

	@@ -0,0 +1,191 @@

+import streamlit as st
+import pickle
+import polars as pl
+import re
+import requests
+from io import BytesIO
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.neighbors import NearestNeighbors
+import matplotlib.pyplot as plt
+import seaborn as sns
+# Set page configuration
+st.set_page_config(
+    page_title="Book Recommendation System",
+    page_icon="📚",
+    layout="wide"
+)
+# App title and description
+st.title("📚 Book Recommendation System")
+st.markdown("Enter a book summary and genres to get personalized book recommendations!")
+# GitHub URLs for model files and dataset
+GITHUB_CSV_URL = "https://media.githubusercontent.com/media/Manithj/bookRecEngine/refs/heads/main/goodreadsV2.csv"
+GITHUB_KNN_URL = "https://media.githubusercontent.com/media/Manithj/bookRecEngine/refs/heads/main/knn_model.pkl"
+GITHUB_TFIDF_URL = "https://raw.githubusercontent.com/Manithj/bookRecEngine/main/tfidf_vectorizer.pkl"
+# Load models from GitHub
+@st.cache_resource
+def load_models_from_github():
+    try:
+        # Load TF-IDF vectorizer
+        tfidf_response = requests.get(GITHUB_TFIDF_URL)
+        tfidf = pickle.loads(tfidf_response.content)
+        # Load KNN model
+        knn_response = requests.get(GITHUB_KNN_URL)
+        knn_model = pickle.loads(knn_response.content)
+        return tfidf, knn_model
+    except Exception as e:
+        st.error(f"Error loading models: {e}")
+        return None, None
+# Load the dataset from GitHub
+@st.cache_data
+def load_data_from_github():
+    try:
+        # Load CSV directly using Polars
+        df_cleaned = pl.read_csv(GITHUB_CSV_URL)
+        # Clean and prepare the data
+        df_cleaned = df_cleaned.drop_nulls(subset=['name', 'summary', 'genres'])
+        df_cleaned = df_cleaned.with_columns([
+            (pl.col('summary') + ' ' + pl.col('genres')).alias('combined_features')
+        ])
+        # Apply preprocessing
+        df_cleaned = df_cleaned.with_columns([
+            pl.col('combined_features')
+            .map_elements(preprocess_text, return_dtype=pl.Utf8)
+            .alias('processed_features')
+        ])
+        return df_cleaned
+    except Exception as e:
+        st.error(f"Error loading dataset: {e}")
+        return None
+# Define the preprocessing function
+def preprocess_text(text):
+    return re.sub(r'[^a-zA-Z0-9\s]', '', text.lower())
+# Recommendation function for out-of-dataset books
+def recommend_books_knn_out_of_dataset(input_summary, input_genres, top_n=5):
+    # Combine and preprocess the input book's features
+    combined_input = f"{input_summary} {input_genres}"
+    processed_input = preprocess_text(combined_input)
+    # Transform the input book's features using the loaded TF-IDF vectorizer
+    input_vector = tfidf.transform([processed_input])
+    # Find the nearest neighbors using the loaded KNN model
+    distances, indices = knn_model.kneighbors(input_vector, n_neighbors=top_n)
+    # Retrieve the recommended book titles and additional information
+    recommendations = []
+    for i, idx in enumerate(indices.flatten()):
+        book_info = {
+            "title": df_cleaned['name'][idx],
+            "summary": df_cleaned['summary'][idx],
+            "genres": df_cleaned['genres'][idx],
+            "similarity_score": 1 - distances.flatten()[i]  # Convert distance to similarity
+        }
+        recommendations.append(book_info)
+    return recommendations
+# Status indicator for loading data
+with st.spinner("Loading models and data from GitHub..."):
+    # Load models and data
+    tfidf, knn_model = load_models_from_github()
+    df_cleaned = load_data_from_github()
+    if tfidf is not None and knn_model is not None and df_cleaned is not None:
+        st.success("Models and data loaded successfully!")
+        models_loaded = True
+    else:
+        st.error("Failed to load models or data. Please check the GitHub URLs.")
+        models_loaded = False
+# Sidebar for inputs
+st.sidebar.header("Input Parameters")
+# Input fields
+input_summary = st.sidebar.text_area("Book Summary",
+                                    placeholder="Enter a brief summary of the book...",
+                                    height=150)
+input_genres = st.sidebar.text_input("Genres",
+                                    placeholder="E.g., fantasy, adventure, mystery")
+# Number of recommendations slider
+num_recommendations = st.sidebar.slider("Number of Recommendations",
+                                        min_value=1,
+                                        max_value=10,
+                                        value=5)
+# Get recommendations button
+if st.sidebar.button("Get Recommendations") and models_loaded:
+    if input_summary and input_genres:
+        with st.spinner("Finding the perfect books for you..."):
+            # Get recommendations
+            recommendations = recommend_books_knn_out_of_dataset(
+                input_summary,
+                input_genres,
+                top_n=num_recommendations
+            )
+            # Display recommendations
+            st.header("Recommended Books")
+            # Create columns for book cards
+            cols = st.columns(min(3, num_recommendations))
+            for i, book in enumerate(recommendations):
+                col_idx = i % 3
+                with cols[col_idx]:
+                    st.subheader(book["title"])
+                    st.markdown(f"**Genres:** {book['genres']}")
+                    st.markdown(f"**Similarity Score:** {book['similarity_score']:.2f}")
+                    with st.expander("Summary"):
+                        st.write(book["summary"])
+                    st.divider()
+            # Visualization of similarity scores
+            st.header("Similarity Scores")
+            fig, ax = plt.subplots(figsize=(10, 5))
+            book_titles = [book["title"] for book in recommendations]
+            similarity_scores = [book["similarity_score"] for book in recommendations]
+            # Create horizontal bar chart
+            sns.barplot(x=similarity_scores, y=book_titles, palette="viridis", ax=ax)
+            ax.set_xlabel("Similarity Score")
+            ax.set_ylabel("Book Title")
+            ax.set_title("Book Recommendation Similarity Scores")
+            st.pyplot(fig)
+    else:
+        st.warning("Please enter both a summary and genres to get recommendations.")
+# Add some information about the app
+st.sidebar.markdown("---")
+st.sidebar.header("About")
+st.sidebar.info(
+    """
+    This app uses TF-IDF vectorization and K-Nearest Neighbors to recommend books
+    based on your input summary and genres.
+    The recommendations are based on textual similarity between your input and
+    our database of books from Goodreads.
+    Models and data are loaded directly from GitHub.
+    """
+)
+# Add a footer
+st.markdown("---")
+st.markdown("📚 Book Recommendation System | Created with Streamlit")