AI-Manith commited on
Commit
989865d
Β·
verified Β·
1 Parent(s): 27abc1f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +191 -0
app.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ import polars as pl
4
+ import re
5
+ import requests
6
+ from io import BytesIO
7
+ from sklearn.feature_extraction.text import TfidfVectorizer
8
+ from sklearn.neighbors import NearestNeighbors
9
+ import matplotlib.pyplot as plt
10
+ import seaborn as sns
11
+
12
+ # Set page configuration
13
+ st.set_page_config(
14
+ page_title="Book Recommendation System",
15
+ page_icon="πŸ“š",
16
+ layout="wide"
17
+ )
18
+
19
+ # App title and description
20
+ st.title("πŸ“š Book Recommendation System")
21
+ st.markdown("Enter a book summary and genres to get personalized book recommendations!")
22
+
23
+ # GitHub URLs for model files and dataset
24
+ GITHUB_CSV_URL = "https://media.githubusercontent.com/media/Manithj/bookRecEngine/refs/heads/main/goodreadsV2.csv"
25
+ GITHUB_KNN_URL = "https://media.githubusercontent.com/media/Manithj/bookRecEngine/refs/heads/main/knn_model.pkl"
26
+ GITHUB_TFIDF_URL = "https://raw.githubusercontent.com/Manithj/bookRecEngine/main/tfidf_vectorizer.pkl"
27
+
28
+ # Load models from GitHub
29
+ @st.cache_resource
30
+ def load_models_from_github():
31
+ try:
32
+ # Load TF-IDF vectorizer
33
+ tfidf_response = requests.get(GITHUB_TFIDF_URL)
34
+ tfidf = pickle.loads(tfidf_response.content)
35
+
36
+ # Load KNN model
37
+ knn_response = requests.get(GITHUB_KNN_URL)
38
+ knn_model = pickle.loads(knn_response.content)
39
+
40
+ return tfidf, knn_model
41
+ except Exception as e:
42
+ st.error(f"Error loading models: {e}")
43
+ return None, None
44
+
45
+ # Load the dataset from GitHub
46
+ @st.cache_data
47
+ def load_data_from_github():
48
+ try:
49
+ # Load CSV directly using Polars
50
+ df_cleaned = pl.read_csv(GITHUB_CSV_URL)
51
+
52
+ # Clean and prepare the data
53
+ df_cleaned = df_cleaned.drop_nulls(subset=['name', 'summary', 'genres'])
54
+ df_cleaned = df_cleaned.with_columns([
55
+ (pl.col('summary') + ' ' + pl.col('genres')).alias('combined_features')
56
+ ])
57
+
58
+ # Apply preprocessing
59
+ df_cleaned = df_cleaned.with_columns([
60
+ pl.col('combined_features')
61
+ .map_elements(preprocess_text, return_dtype=pl.Utf8)
62
+ .alias('processed_features')
63
+ ])
64
+
65
+ return df_cleaned
66
+ except Exception as e:
67
+ st.error(f"Error loading dataset: {e}")
68
+ return None
69
+
70
+ # Define the preprocessing function
71
+ def preprocess_text(text):
72
+ return re.sub(r'[^a-zA-Z0-9\s]', '', text.lower())
73
+
74
+ # Recommendation function for out-of-dataset books
75
+ def recommend_books_knn_out_of_dataset(input_summary, input_genres, top_n=5):
76
+ # Combine and preprocess the input book's features
77
+ combined_input = f"{input_summary} {input_genres}"
78
+ processed_input = preprocess_text(combined_input)
79
+
80
+ # Transform the input book's features using the loaded TF-IDF vectorizer
81
+ input_vector = tfidf.transform([processed_input])
82
+
83
+ # Find the nearest neighbors using the loaded KNN model
84
+ distances, indices = knn_model.kneighbors(input_vector, n_neighbors=top_n)
85
+
86
+ # Retrieve the recommended book titles and additional information
87
+ recommendations = []
88
+ for i, idx in enumerate(indices.flatten()):
89
+ book_info = {
90
+ "title": df_cleaned['name'][idx],
91
+ "summary": df_cleaned['summary'][idx],
92
+ "genres": df_cleaned['genres'][idx],
93
+ "similarity_score": 1 - distances.flatten()[i] # Convert distance to similarity
94
+ }
95
+ recommendations.append(book_info)
96
+
97
+ return recommendations
98
+
99
+ # Status indicator for loading data
100
+ with st.spinner("Loading models and data from GitHub..."):
101
+ # Load models and data
102
+ tfidf, knn_model = load_models_from_github()
103
+ df_cleaned = load_data_from_github()
104
+
105
+ if tfidf is not None and knn_model is not None and df_cleaned is not None:
106
+ st.success("Models and data loaded successfully!")
107
+ models_loaded = True
108
+ else:
109
+ st.error("Failed to load models or data. Please check the GitHub URLs.")
110
+ models_loaded = False
111
+
112
+ # Sidebar for inputs
113
+ st.sidebar.header("Input Parameters")
114
+
115
+ # Input fields
116
+ input_summary = st.sidebar.text_area("Book Summary",
117
+ placeholder="Enter a brief summary of the book...",
118
+ height=150)
119
+
120
+ input_genres = st.sidebar.text_input("Genres",
121
+ placeholder="E.g., fantasy, adventure, mystery")
122
+
123
+ # Number of recommendations slider
124
+ num_recommendations = st.sidebar.slider("Number of Recommendations",
125
+ min_value=1,
126
+ max_value=10,
127
+ value=5)
128
+
129
+ # Get recommendations button
130
+ if st.sidebar.button("Get Recommendations") and models_loaded:
131
+ if input_summary and input_genres:
132
+ with st.spinner("Finding the perfect books for you..."):
133
+ # Get recommendations
134
+ recommendations = recommend_books_knn_out_of_dataset(
135
+ input_summary,
136
+ input_genres,
137
+ top_n=num_recommendations
138
+ )
139
+
140
+ # Display recommendations
141
+ st.header("Recommended Books")
142
+
143
+ # Create columns for book cards
144
+ cols = st.columns(min(3, num_recommendations))
145
+
146
+ for i, book in enumerate(recommendations):
147
+ col_idx = i % 3
148
+ with cols[col_idx]:
149
+ st.subheader(book["title"])
150
+ st.markdown(f"**Genres:** {book['genres']}")
151
+ st.markdown(f"**Similarity Score:** {book['similarity_score']:.2f}")
152
+ with st.expander("Summary"):
153
+ st.write(book["summary"])
154
+ st.divider()
155
+
156
+ # Visualization of similarity scores
157
+ st.header("Similarity Scores")
158
+ fig, ax = plt.subplots(figsize=(10, 5))
159
+
160
+ book_titles = [book["title"] for book in recommendations]
161
+ similarity_scores = [book["similarity_score"] for book in recommendations]
162
+
163
+ # Create horizontal bar chart
164
+ sns.barplot(x=similarity_scores, y=book_titles, palette="viridis", ax=ax)
165
+ ax.set_xlabel("Similarity Score")
166
+ ax.set_ylabel("Book Title")
167
+ ax.set_title("Book Recommendation Similarity Scores")
168
+
169
+ st.pyplot(fig)
170
+
171
+ else:
172
+ st.warning("Please enter both a summary and genres to get recommendations.")
173
+
174
+ # Add some information about the app
175
+ st.sidebar.markdown("---")
176
+ st.sidebar.header("About")
177
+ st.sidebar.info(
178
+ """
179
+ This app uses TF-IDF vectorization and K-Nearest Neighbors to recommend books
180
+ based on your input summary and genres.
181
+
182
+ The recommendations are based on textual similarity between your input and
183
+ our database of books from Goodreads.
184
+
185
+ Models and data are loaded directly from GitHub.
186
+ """
187
+ )
188
+
189
+ # Add a footer
190
+ st.markdown("---")
191
+ st.markdown("πŸ“š Book Recommendation System | Created with Streamlit")