Spaces:
Sleeping
Sleeping
File size: 7,650 Bytes
20ce80b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 |
import streamlit as st
import pickle
import polars as pl
import re
import pandas as pd
import numpy as np
from collections import Counter
st.set_page_config(page_title="Book Recommendation Engine", layout="wide")
@st.cache_resource
def load_models():
# Load the TF-IDF vectorizer
with open('tfidf_vectorizer.pkl', 'rb') as f:
tfidf = pickle.load(f)
# Load the KNN model
with open('knn_model.pkl', 'rb') as f:
knn_model = pickle.load(f)
return tfidf, knn_model
@st.cache_data
def load_data():
# Load the dataset
df_lazy = pl.scan_csv('goodreadsV5.csv')
df_cleaned = (
df_lazy.drop_nulls(subset=['name', 'summary', 'genres'])
.with_columns([
(pl.col('summary') + ' ' + pl.col('genres')).alias('combined_features')
])
).collect()
# Apply preprocessing to create the 'processed_features' column
df_cleaned = df_cleaned.with_columns([
pl.col('combined_features')
.map_elements(preprocess_text, return_dtype=pl.Utf8)
.alias('processed_features')
])
# Convert to pandas for easier indexing with KNN results
df_pandas = df_cleaned.to_pandas()
return df_cleaned, df_pandas
# Define the preprocessing function
def preprocess_text(text):
return re.sub(r'[^a-zA-Z0-9\s]', '', text.lower())
# Recommendation function for out-of-dataset books
def recommend_books_knn_out_of_dataset(df_pandas, tfidf, knn_model, input_summary, input_genres, top_n=5):
# Combine and preprocess the input book's features
combined_input = f"{input_summary} {input_genres}"
processed_input = preprocess_text(combined_input)
# Transform the input book's features using the loaded TF-IDF vectorizer
input_vector = tfidf.transform([processed_input])
# Find the nearest neighbors using the loaded KNN model
distances, indices = knn_model.kneighbors(input_vector, n_neighbors=top_n)
# Retrieve the recommended book information using pandas DataFrame
recommendations = []
for i, idx in enumerate(indices.flatten()):
book = {
"title": df_pandas.iloc[idx]['name'],
"summary": df_pandas.iloc[idx]['summary'],
"genres": df_pandas.iloc[idx]['genres'],
"similarity_score": 1 - distances.flatten()[i] # Convert distance to similarity score
}
recommendations.append(book)
return recommendations
def main():
st.title("π Book Recommendation Engine")
# Initialize session state variables if they don't exist
if 'example_summary' not in st.session_state:
st.session_state['example_summary'] = ""
if 'example_genres' not in st.session_state:
st.session_state['example_genres'] = ""
if 'run_example' not in st.session_state:
st.session_state['run_example'] = False
try:
# Load models and data
tfidf, knn_model = load_models()
df_cleaned, df_pandas = load_data()
# Pre-fill with example if one was selected
default_summary = st.session_state['example_summary'] if st.session_state['run_example'] else "A fantasy adventure about a young wizard learning magic."
default_genres = st.session_state['example_genres'] if st.session_state['run_example'] else "fantasy, adventure, magic"
# Main content
st.subheader("Find Book Recommendations")
st.write("Enter a book summary and genres to get personalized recommendations.")
col1, col2 = st.columns(2)
with col1:
input_summary = st.text_area("Book Summary", default_summary, height=150)
with col2:
input_genres = st.text_input("Genres (comma-separated)", default_genres)
num_recommendations = st.slider("Number of Recommendations",
min_value=1, max_value=20, value=5)
# Display recommendations immediately if example was selected
if st.session_state['run_example'] or st.button("Get Recommendations", type="primary"):
with st.spinner("Finding the best book matches for you..."):
# Use the current input values, which may come from examples or user input
recommendations = recommend_books_knn_out_of_dataset(
df_pandas, tfidf, knn_model, input_summary, input_genres, num_recommendations
)
st.subheader("π Your Recommended Books")
for i, book in enumerate(recommendations):
with st.expander(f"{i+1}. {book['title']}"):
st.markdown(f"**Summary:** {book['summary']}")
st.markdown(f"**Genres:** {book['genres']}")
# Reset the example flag so it doesn't run again on rerender
st.session_state['run_example'] = False
# Example tabs section
st.subheader("Try these examples")
example_tabs = st.tabs(["Fantasy Adventure", "Romance", "Science Fiction", "Mystery"])
def set_example(summary, genres):
st.session_state['example_summary'] = summary
st.session_state['example_genres'] = genres
st.session_state['run_example'] = True
st.rerun()
with example_tabs[0]:
st.write("A magical journey through enchanted lands with dragons and wizards.")
st.write("Genres: fantasy, adventure, magic")
if st.button("Use this example", key="ex1"):
set_example(
"A magical journey through enchanted lands with dragons and wizards.",
"fantasy, adventure, magic"
)
with example_tabs[1]:
st.write("A love story between two people from different worlds who meet by chance.")
st.write("Genres: romance, contemporary, drama")
if st.button("Use this example", key="ex2"):
set_example(
"A love story between two people from different worlds who meet by chance.",
"romance, contemporary, drama"
)
with example_tabs[2]:
st.write("Space explorers discover an alien civilization that challenges their understanding of humanity.")
st.write("Genres: science fiction, space, aliens")
if st.button("Use this example", key="ex3"):
set_example(
"Space explorers discover an alien civilization that challenges their understanding of humanity.",
"science fiction, space, aliens"
)
with example_tabs[3]:
st.write("A detective investigates a series of mysterious disappearances in a small town.")
st.write("Genres: mystery, thriller, crime")
if st.button("Use this example", key="ex4"):
set_example(
"A detective investigates a series of mysterious disappearances in a small town.",
"mystery, thriller, crime"
)
except Exception as e:
st.error(f"An error occurred: {e}")
st.info("Make sure you have the required model files (tfidf_vectorizer.pkl, knn_model.pkl) and dataset (goodreadsV2.csv) in the same directory as this app.")
st.code("""
# Files needed:
- tfidf_vectorizer.pkl: Your trained TF-IDF vectorizer
- knn_model.pkl: Your trained KNN model
- goodreadsV2.csv: Your dataset with book information
""")
if __name__ == "__main__":
main() |