Spaces:

CSAT
/

bookengine

Sleeping

App Files Files Community

bookengine / app.py

CSAT

Update app.py

20ce80b verified 9 months ago

raw

history blame contribute delete

7.65 kB

	import streamlit as st
	import pickle
	import polars as pl
	import re
	import pandas as pd
	import numpy as np
	from collections import Counter

	st.set_page_config(page_title="Book Recommendation Engine", layout="wide")

	@st.cache_resource
	def load_models():
	# Load the TF-IDF vectorizer
	with open('tfidf_vectorizer.pkl', 'rb') as f:
	tfidf = pickle.load(f)

	# Load the KNN model
	with open('knn_model.pkl', 'rb') as f:
	knn_model = pickle.load(f)

	return tfidf, knn_model

	@st.cache_data
	def load_data():
	# Load the dataset
	df_lazy = pl.scan_csv('goodreadsV5.csv')
	df_cleaned = (
	df_lazy.drop_nulls(subset=['name', 'summary', 'genres'])
	.with_columns([
	(pl.col('summary') + ' ' + pl.col('genres')).alias('combined_features')
	])
	).collect()

	# Apply preprocessing to create the 'processed_features' column
	df_cleaned = df_cleaned.with_columns([
	pl.col('combined_features')
	.map_elements(preprocess_text, return_dtype=pl.Utf8)
	.alias('processed_features')
	])

	# Convert to pandas for easier indexing with KNN results
	df_pandas = df_cleaned.to_pandas()

	return df_cleaned, df_pandas

	# Define the preprocessing function
	def preprocess_text(text):
	return re.sub(r'[^a-zA-Z0-9\s]', '', text.lower())

	# Recommendation function for out-of-dataset books
	def recommend_books_knn_out_of_dataset(df_pandas, tfidf, knn_model, input_summary, input_genres, top_n=5):
	# Combine and preprocess the input book's features
	combined_input = f"{input_summary} {input_genres}"
	processed_input = preprocess_text(combined_input)

	# Transform the input book's features using the loaded TF-IDF vectorizer
	input_vector = tfidf.transform([processed_input])

	# Find the nearest neighbors using the loaded KNN model
	distances, indices = knn_model.kneighbors(input_vector, n_neighbors=top_n)

	# Retrieve the recommended book information using pandas DataFrame
	recommendations = []
	for i, idx in enumerate(indices.flatten()):
	book = {
	"title": df_pandas.iloc[idx]['name'],
	"summary": df_pandas.iloc[idx]['summary'],
	"genres": df_pandas.iloc[idx]['genres'],
	"similarity_score": 1 - distances.flatten()[i] # Convert distance to similarity score
	}
	recommendations.append(book)

	return recommendations

	def main():
	st.title("📚 Book Recommendation Engine")

	# Initialize session state variables if they don't exist
	if 'example_summary' not in st.session_state:
	st.session_state['example_summary'] = ""
	if 'example_genres' not in st.session_state:
	st.session_state['example_genres'] = ""
	if 'run_example' not in st.session_state:
	st.session_state['run_example'] = False

	try:
	# Load models and data
	tfidf, knn_model = load_models()
	df_cleaned, df_pandas = load_data()

	# Pre-fill with example if one was selected
	default_summary = st.session_state['example_summary'] if st.session_state['run_example'] else "A fantasy adventure about a young wizard learning magic."
	default_genres = st.session_state['example_genres'] if st.session_state['run_example'] else "fantasy, adventure, magic"

	# Main content
	st.subheader("Find Book Recommendations")
	st.write("Enter a book summary and genres to get personalized recommendations.")

	col1, col2 = st.columns(2)

	with col1:
	input_summary = st.text_area("Book Summary", default_summary, height=150)

	with col2:
	input_genres = st.text_input("Genres (comma-separated)", default_genres)
	num_recommendations = st.slider("Number of Recommendations",
	min_value=1, max_value=20, value=5)

	# Display recommendations immediately if example was selected
	if st.session_state['run_example'] or st.button("Get Recommendations", type="primary"):
	with st.spinner("Finding the best book matches for you..."):
	# Use the current input values, which may come from examples or user input
	recommendations = recommend_books_knn_out_of_dataset(
	df_pandas, tfidf, knn_model, input_summary, input_genres, num_recommendations
	)

	st.subheader("📚 Your Recommended Books")

	for i, book in enumerate(recommendations):
	with st.expander(f"{i+1}. {book['title']}"):
	st.markdown(f"Summary: {book['summary']}")
	st.markdown(f"Genres: {book['genres']}")

	# Reset the example flag so it doesn't run again on rerender
	st.session_state['run_example'] = False

	# Example tabs section
	st.subheader("Try these examples")
	example_tabs = st.tabs(["Fantasy Adventure", "Romance", "Science Fiction", "Mystery"])

	def set_example(summary, genres):
	st.session_state['example_summary'] = summary
	st.session_state['example_genres'] = genres
	st.session_state['run_example'] = True
	st.rerun()

	with example_tabs[0]:
	st.write("A magical journey through enchanted lands with dragons and wizards.")
	st.write("Genres: fantasy, adventure, magic")
	if st.button("Use this example", key="ex1"):
	set_example(
	"A magical journey through enchanted lands with dragons and wizards.",
	"fantasy, adventure, magic"
	)

	with example_tabs[1]:
	st.write("A love story between two people from different worlds who meet by chance.")
	st.write("Genres: romance, contemporary, drama")
	if st.button("Use this example", key="ex2"):
	set_example(
	"A love story between two people from different worlds who meet by chance.",
	"romance, contemporary, drama"
	)

	with example_tabs[2]:
	st.write("Space explorers discover an alien civilization that challenges their understanding of humanity.")
	st.write("Genres: science fiction, space, aliens")
	if st.button("Use this example", key="ex3"):
	set_example(
	"Space explorers discover an alien civilization that challenges their understanding of humanity.",
	"science fiction, space, aliens"
	)

	with example_tabs[3]:
	st.write("A detective investigates a series of mysterious disappearances in a small town.")
	st.write("Genres: mystery, thriller, crime")
	if st.button("Use this example", key="ex4"):
	set_example(
	"A detective investigates a series of mysterious disappearances in a small town.",
	"mystery, thriller, crime"
	)

	except Exception as e:
	st.error(f"An error occurred: {e}")
	st.info("Make sure you have the required model files (tfidf_vectorizer.pkl, knn_model.pkl) and dataset (goodreadsV2.csv) in the same directory as this app.")
	st.code("""
	# Files needed:
	- tfidf_vectorizer.pkl: Your trained TF-IDF vectorizer
	- knn_model.pkl: Your trained KNN model
	- goodreadsV2.csv: Your dataset with book information
	""")

	if __name__ == "__main__":
	main()