Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pickle | |
| import polars as pl | |
| import re | |
| import pandas as pd | |
| import numpy as np | |
| from collections import Counter | |
| st.set_page_config(page_title="Book Recommendation Engine", layout="wide") | |
| def load_models(): | |
| # Load the TF-IDF vectorizer | |
| with open('tfidf_vectorizer.pkl', 'rb') as f: | |
| tfidf = pickle.load(f) | |
| # Load the KNN model | |
| with open('knn_model.pkl', 'rb') as f: | |
| knn_model = pickle.load(f) | |
| return tfidf, knn_model | |
| def load_data(): | |
| # Load the dataset | |
| df_lazy = pl.scan_csv('goodreadsV5.csv') | |
| df_cleaned = ( | |
| df_lazy.drop_nulls(subset=['name', 'summary', 'genres']) | |
| .with_columns([ | |
| (pl.col('summary') + ' ' + pl.col('genres')).alias('combined_features') | |
| ]) | |
| ).collect() | |
| # Apply preprocessing to create the 'processed_features' column | |
| df_cleaned = df_cleaned.with_columns([ | |
| pl.col('combined_features') | |
| .map_elements(preprocess_text, return_dtype=pl.Utf8) | |
| .alias('processed_features') | |
| ]) | |
| # Convert to pandas for easier indexing with KNN results | |
| df_pandas = df_cleaned.to_pandas() | |
| return df_cleaned, df_pandas | |
| # Define the preprocessing function | |
| def preprocess_text(text): | |
| return re.sub(r'[^a-zA-Z0-9\s]', '', text.lower()) | |
| # Recommendation function for out-of-dataset books | |
| def recommend_books_knn_out_of_dataset(df_pandas, tfidf, knn_model, input_summary, input_genres, top_n=5): | |
| # Combine and preprocess the input book's features | |
| combined_input = f"{input_summary} {input_genres}" | |
| processed_input = preprocess_text(combined_input) | |
| # Transform the input book's features using the loaded TF-IDF vectorizer | |
| input_vector = tfidf.transform([processed_input]) | |
| # Find the nearest neighbors using the loaded KNN model | |
| distances, indices = knn_model.kneighbors(input_vector, n_neighbors=top_n) | |
| # Retrieve the recommended book information using pandas DataFrame | |
| recommendations = [] | |
| for i, idx in enumerate(indices.flatten()): | |
| book = { | |
| "title": df_pandas.iloc[idx]['name'], | |
| "summary": df_pandas.iloc[idx]['summary'], | |
| "genres": df_pandas.iloc[idx]['genres'], | |
| "similarity_score": 1 - distances.flatten()[i] # Convert distance to similarity score | |
| } | |
| recommendations.append(book) | |
| return recommendations | |
| def main(): | |
| st.title("π Book Recommendation Engine") | |
| # Initialize session state variables if they don't exist | |
| if 'example_summary' not in st.session_state: | |
| st.session_state['example_summary'] = "" | |
| if 'example_genres' not in st.session_state: | |
| st.session_state['example_genres'] = "" | |
| if 'run_example' not in st.session_state: | |
| st.session_state['run_example'] = False | |
| try: | |
| # Load models and data | |
| tfidf, knn_model = load_models() | |
| df_cleaned, df_pandas = load_data() | |
| # Pre-fill with example if one was selected | |
| default_summary = st.session_state['example_summary'] if st.session_state['run_example'] else "A fantasy adventure about a young wizard learning magic." | |
| default_genres = st.session_state['example_genres'] if st.session_state['run_example'] else "fantasy, adventure, magic" | |
| # Main content | |
| st.subheader("Find Book Recommendations") | |
| st.write("Enter a book summary and genres to get personalized recommendations.") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| input_summary = st.text_area("Book Summary", default_summary, height=150) | |
| with col2: | |
| input_genres = st.text_input("Genres (comma-separated)", default_genres) | |
| num_recommendations = st.slider("Number of Recommendations", | |
| min_value=1, max_value=20, value=5) | |
| # Display recommendations immediately if example was selected | |
| if st.session_state['run_example'] or st.button("Get Recommendations", type="primary"): | |
| with st.spinner("Finding the best book matches for you..."): | |
| # Use the current input values, which may come from examples or user input | |
| recommendations = recommend_books_knn_out_of_dataset( | |
| df_pandas, tfidf, knn_model, input_summary, input_genres, num_recommendations | |
| ) | |
| st.subheader("π Your Recommended Books") | |
| for i, book in enumerate(recommendations): | |
| with st.expander(f"{i+1}. {book['title']}"): | |
| st.markdown(f"**Summary:** {book['summary']}") | |
| st.markdown(f"**Genres:** {book['genres']}") | |
| # Reset the example flag so it doesn't run again on rerender | |
| st.session_state['run_example'] = False | |
| # Example tabs section | |
| st.subheader("Try these examples") | |
| example_tabs = st.tabs(["Fantasy Adventure", "Romance", "Science Fiction", "Mystery"]) | |
| def set_example(summary, genres): | |
| st.session_state['example_summary'] = summary | |
| st.session_state['example_genres'] = genres | |
| st.session_state['run_example'] = True | |
| st.rerun() | |
| with example_tabs[0]: | |
| st.write("A magical journey through enchanted lands with dragons and wizards.") | |
| st.write("Genres: fantasy, adventure, magic") | |
| if st.button("Use this example", key="ex1"): | |
| set_example( | |
| "A magical journey through enchanted lands with dragons and wizards.", | |
| "fantasy, adventure, magic" | |
| ) | |
| with example_tabs[1]: | |
| st.write("A love story between two people from different worlds who meet by chance.") | |
| st.write("Genres: romance, contemporary, drama") | |
| if st.button("Use this example", key="ex2"): | |
| set_example( | |
| "A love story between two people from different worlds who meet by chance.", | |
| "romance, contemporary, drama" | |
| ) | |
| with example_tabs[2]: | |
| st.write("Space explorers discover an alien civilization that challenges their understanding of humanity.") | |
| st.write("Genres: science fiction, space, aliens") | |
| if st.button("Use this example", key="ex3"): | |
| set_example( | |
| "Space explorers discover an alien civilization that challenges their understanding of humanity.", | |
| "science fiction, space, aliens" | |
| ) | |
| with example_tabs[3]: | |
| st.write("A detective investigates a series of mysterious disappearances in a small town.") | |
| st.write("Genres: mystery, thriller, crime") | |
| if st.button("Use this example", key="ex4"): | |
| set_example( | |
| "A detective investigates a series of mysterious disappearances in a small town.", | |
| "mystery, thriller, crime" | |
| ) | |
| except Exception as e: | |
| st.error(f"An error occurred: {e}") | |
| st.info("Make sure you have the required model files (tfidf_vectorizer.pkl, knn_model.pkl) and dataset (goodreadsV2.csv) in the same directory as this app.") | |
| st.code(""" | |
| # Files needed: | |
| - tfidf_vectorizer.pkl: Your trained TF-IDF vectorizer | |
| - knn_model.pkl: Your trained KNN model | |
| - goodreadsV2.csv: Your dataset with book information | |
| """) | |
| if __name__ == "__main__": | |
| main() |