Spaces:

AI-Manith
/

bookRecEngine

Sleeping

App Files Files Community

bookRecEngine / app.py

AI-Manith

Update app.py

1e61b24 verified about 1 year ago

raw

history blame

8.04 kB

	import streamlit as st
	import pickle
	import polars as pl
	import re
	import requests
	from io import BytesIO
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.neighbors import NearestNeighbors
	import matplotlib.pyplot as plt
	import seaborn as sns

	# Set page configuration
	st.set_page_config(
	page_title="Book Recommendation System",
	page_icon="📚",
	layout="wide"
	)

	# GitHub URLs for model files and dataset
	GITHUB_CSV_URL = "https://media.githubusercontent.com/media/Manithj/bookRecEngine/refs/heads/main/goodreadsV2.csv"
	GITHUB_KNN_URL = "https://media.githubusercontent.com/media/Manithj/bookRecEngine/refs/heads/main/knn_model.pkl"
	GITHUB_TFIDF_URL = "https://raw.githubusercontent.com/Manithj/bookRecEngine/main/tfidf_vectorizer.pkl"

	# Define the preprocessing function
	def preprocess_text(text):
	return re.sub(r'[^a-zA-Z0-9\s]', '', text.lower())

	# Load models from GitHub - using st.cache_resource to load only once
	@st.cache_resource
	def load_models_from_github():
	try:
	# Load TF-IDF vectorizer
	tfidf_response = requests.get(GITHUB_TFIDF_URL)
	tfidf = pickle.loads(tfidf_response.content)

	# Load KNN model
	knn_response = requests.get(GITHUB_KNN_URL)
	knn_model = pickle.loads(knn_response.content)

	return tfidf, knn_model
	except Exception as e:
	st.error(f"Error loading models: {e}")
	return None, None

	# Load the dataset from GitHub - using st.cache_data to load only once
	@st.cache_data
	def load_data_from_github():
	try:
	# Load CSV directly using Polars
	df_cleaned = pl.read_csv(GITHUB_CSV_URL)

	# Clean and prepare the data
	df_cleaned = df_cleaned.drop_nulls(subset=['name', 'summary', 'genres'])
	df_cleaned = df_cleaned.with_columns([
	(pl.col('summary') + ' ' + pl.col('genres')).alias('combined_features')
	])

	# Apply preprocessing
	df_cleaned = df_cleaned.with_columns([
	pl.col('combined_features')
	.map_elements(preprocess_text, return_dtype=pl.Utf8)
	.alias('processed_features')
	])

	return df_cleaned
	except Exception as e:
	st.error(f"Error loading dataset: {e}")
	return None

	# Load models and data at startup - this happens only once due to caching
	with st.spinner("Loading models and data (this will only happen once)..."):
	tfidf, knn_model = load_models_from_github()
	df_cleaned = load_data_from_github()

	if tfidf is not None and knn_model is not None and df_cleaned is not None:
	models_loaded = True
	else:
	models_loaded = False

	# App title and description
	st.title("📚 Book Recommendation System")
	st.markdown("Enter a book summary and genres to get personalized book recommendations!")

	if not models_loaded:
	st.error("Failed to load models or data. Please check the GitHub URLs.")
	else:
	st.success("Models and data loaded successfully!")

	# Recommendation function for out-of-dataset books
	def recommend_books_knn_out_of_dataset(input_summary, input_genres, top_n=5):
	# Combine and preprocess the input book's features
	combined_input = f"{input_summary} {input_genres}"
	processed_input = preprocess_text(combined_input)

	# Transform the input book's features using the loaded TF-IDF vectorizer
	input_vector = tfidf.transform([processed_input])

	# Find the nearest neighbors using the loaded KNN model
	distances, indices = knn_model.kneighbors(input_vector, n_neighbors=top_n)

	# Retrieve the recommended book titles and additional information
	recommendations = []
	for i, idx in enumerate(indices.flatten()):
	book_info = {
	"title": df_cleaned['name'][idx],
	"summary": df_cleaned['summary'][idx],
	"genres": df_cleaned['genres'][idx],
	"similarity_score": 1 - distances.flatten()[i] # Convert distance to similarity
	}
	recommendations.append(book_info)

	return recommendations

	# Sidebar for inputs
	st.sidebar.header("Input Parameters")

	# Input fields
	input_summary = st.sidebar.text_area("Book Summary",
	placeholder="Enter a brief summary of the book...",
	height=150)

	input_genres = st.sidebar.text_input("Genres",
	placeholder="E.g., fantasy, adventure, mystery")

	# Number of recommendations slider
	num_recommendations = st.sidebar.slider("Number of Recommendations",
	min_value=1,
	max_value=10,
	value=5)

	# Get recommendations button
	if st.sidebar.button("Get Recommendations") and models_loaded:
	if input_summary and input_genres:
	with st.spinner("Finding the perfect books for you..."):
	# Get recommendations
	recommendations = recommend_books_knn_out_of_dataset(
	input_summary,
	input_genres,
	top_n=num_recommendations
	)

	# Display recommendations
	st.header("Recommended Books")

	# Create columns for book cards
	cols = st.columns(min(3, num_recommendations))

	for i, book in enumerate(recommendations):
	col_idx = i % 3
	with cols[col_idx]:
	st.subheader(book["title"])
	st.markdown(f"Genres: {book['genres']}")
	st.markdown(f"Similarity Score: {book['similarity_score']:.2f}")
	with st.expander("Summary"):
	st.write(book["summary"])
	st.divider()

	# Visualization of similarity scores
	st.header("Similarity Scores")
	fig, ax = plt.subplots(figsize=(10, 5))

	book_titles = [book["title"] for book in recommendations]
	similarity_scores = [book["similarity_score"] for book in recommendations]

	# Create horizontal bar chart
	sns.barplot(x=similarity_scores, y=book_titles, palette="viridis", ax=ax)
	ax.set_xlabel("Similarity Score")
	ax.set_ylabel("Book Title")
	ax.set_title("Book Recommendation Similarity Scores")

	st.pyplot(fig)

	else:
	st.warning("Please enter both a summary and genres to get recommendations.")

	# Add some information about the app
	st.sidebar.markdown("---")
	st.sidebar.header("About")
	st.sidebar.info(
	"""
	This app uses TF-IDF vectorization and K-Nearest Neighbors to recommend books
	based on your input summary and genres.

	The recommendations are based on textual similarity between your input and
	our database of books from Goodreads.

	Models and data are loaded directly from GitHub.
	"""
	)

	# Add example inputs for quick testing
	st.sidebar.markdown("---")
	st.sidebar.header("Try these examples")

	if st.sidebar.button("Example 1: Fantasy Adventure"):
	st.sidebar.text_area("Book Summary",
	value="A young wizard discovers his magical powers and embarks on a journey to defeat a dark lord threatening the world.",
	height=150, key="example1_summary")
	st.sidebar.text_input("Genres", value="fantasy, adventure, magic", key="example1_genres")

	if st.sidebar.button("Example 2: Mystery Thriller"):
	st.sidebar.text_area("Book Summary",
	value="A detective investigates a series of murders that seem to be connected to an unsolved case from decades ago.",
	height=150, key="example2_summary")
	st.sidebar.text_input("Genres", value="mystery, thriller, crime", key="example2_genres")

	# Add a footer
	st.markdown("---")
	st.markdown("📚 Book Recommendation System \| Created with Streamlit")