CSAT commited on
Commit
20ce80b
Β·
verified Β·
1 Parent(s): 22bd453

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -168
app.py CHANGED
@@ -1,168 +1,184 @@
1
- import streamlit as st
2
- import pickle
3
- import polars as pl
4
- import re
5
- from sklearn.feature_extraction.text import TfidfVectorizer
6
- from sklearn.neighbors import NearestNeighbors
7
- import matplotlib.pyplot as plt
8
- import seaborn as sns
9
-
10
- # Set page configuration
11
- st.set_page_config(
12
- page_title="Book Recommendation System",
13
- page_icon="πŸ“š",
14
- layout="wide"
15
- )
16
-
17
- # App title and description
18
- st.title("πŸ“š Book Recommendation System")
19
- st.markdown("Enter a book summary and genres to get personalized book recommendations!")
20
-
21
- # Load the TF-IDF vectorizer
22
- @st.cache_resource
23
- def load_models():
24
- with open('tfidf_vectorizer.pkl', 'rb') as f:
25
- tfidf = pickle.load(f)
26
-
27
- # Load the KNN model
28
- with open('knn_model.pkl', 'rb') as f:
29
- knn_model = pickle.load(f)
30
-
31
- return tfidf, knn_model
32
-
33
- # Load the dataset
34
- @st.cache_data
35
- def load_data():
36
- df_lazy = pl.scan_csv('goodreadsV5.csv')
37
- df_cleaned = (
38
- df_lazy.drop_nulls(subset=['name', 'summary', 'genres'])
39
- .with_columns([
40
- (pl.col('summary') + ' ' + pl.col('genres')).alias('combined_features')
41
- ])
42
- ).collect()
43
-
44
- # Apply preprocessing to create the 'processed_features' column
45
- df_cleaned = df_cleaned.with_columns([
46
- pl.col('combined_features')
47
- .map_elements(preprocess_text, return_dtype=pl.Utf8)
48
- .alias('processed_features')
49
- ])
50
-
51
- return df_cleaned
52
-
53
- # Define the preprocessing function
54
- def preprocess_text(text):
55
- return re.sub(r'[^a-zA-Z0-9\s]', '', text.lower())
56
-
57
- # Recommendation function for out-of-dataset books
58
- def recommend_books_knn_out_of_dataset(input_summary, input_genres, top_n=5):
59
- # Combine and preprocess the input book's features
60
- combined_input = f"{input_summary} {input_genres}"
61
- processed_input = preprocess_text(combined_input)
62
-
63
- # Transform the input book's features using the loaded TF-IDF vectorizer
64
- input_vector = tfidf.transform([processed_input])
65
-
66
- # Find the nearest neighbors using the loaded KNN model
67
- distances, indices = knn_model.kneighbors(input_vector, n_neighbors=top_n)
68
-
69
- # Retrieve the recommended book titles and additional information
70
- recommendations = []
71
- for i, idx in enumerate(indices.flatten()):
72
- book_info = {
73
- "title": df_cleaned['name'][idx],
74
- "summary": df_cleaned['summary'][idx],
75
- "genres": df_cleaned['genres'][idx],
76
- "similarity_score": 1 - distances.flatten()[i] # Convert distance to similarity
77
- }
78
- recommendations.append(book_info)
79
-
80
- return recommendations
81
-
82
- # Load models and data
83
- try:
84
- tfidf, knn_model = load_models()
85
- df_cleaned = load_data()
86
- models_loaded = True
87
- except Exception as e:
88
- st.error(f"Error loading models or data: {e}")
89
- models_loaded = False
90
-
91
- # Sidebar for inputs
92
- st.sidebar.header("Input Parameters")
93
-
94
- # Input fields
95
- input_summary = st.sidebar.text_area("Book Summary",
96
- placeholder="Enter a brief summary of the book...",
97
- height=150)
98
-
99
- input_genres = st.sidebar.text_input("Genres",
100
- placeholder="E.g., fantasy, adventure, mystery")
101
-
102
- # Number of recommendations slider
103
- num_recommendations = st.sidebar.slider("Number of Recommendations",
104
- min_value=1,
105
- max_value=10,
106
- value=5)
107
-
108
- # Get recommendations button
109
- if st.sidebar.button("Get Recommendations") and models_loaded:
110
- if input_summary and input_genres:
111
- with st.spinner("Finding the perfect books for you..."):
112
- # Get recommendations
113
- recommendations = recommend_books_knn_out_of_dataset(
114
- input_summary,
115
- input_genres,
116
- top_n=num_recommendations
117
- )
118
-
119
- # Display recommendations
120
- st.header("Recommended Books")
121
-
122
- # Create columns for book cards
123
- cols = st.columns(min(3, num_recommendations))
124
-
125
- for i, book in enumerate(recommendations):
126
- col_idx = i % 3
127
- with cols[col_idx]:
128
- st.subheader(book["title"])
129
- st.markdown(f"**Genres:** {book['genres']}")
130
- st.markdown(f"**Similarity Score:** {book['similarity_score']:.2f}")
131
- with st.expander("Summary"):
132
- st.write(book["summary"])
133
- st.divider()
134
-
135
- # Visualization of similarity scores
136
- st.header("Similarity Scores")
137
- fig, ax = plt.subplots(figsize=(10, 5))
138
-
139
- book_titles = [book["title"] for book in recommendations]
140
- similarity_scores = [book["similarity_score"] for book in recommendations]
141
-
142
- # Create horizontal bar chart
143
- sns.barplot(x=similarity_scores, y=book_titles, palette="viridis", ax=ax)
144
- ax.set_xlabel("Similarity Score")
145
- ax.set_ylabel("Book Title")
146
- ax.set_title("Book Recommendation Similarity Scores")
147
-
148
- st.pyplot(fig)
149
-
150
- else:
151
- st.warning("Please enter both a summary and genres to get recommendations.")
152
-
153
- # Add some information about the app
154
- st.sidebar.markdown("---")
155
- st.sidebar.header("About")
156
- st.sidebar.info(
157
- """
158
- This app uses TF-IDF vectorization and K-Nearest Neighbors to recommend books
159
- based on your input summary and genres.
160
-
161
- The recommendations are based on textual similarity between your input and
162
- our database of books from Goodreads.
163
- """
164
- )
165
-
166
- # Add a footer
167
- st.markdown("---")
168
- st.markdown("πŸ“š Book Recommendation System | Created with Streamlit")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ import polars as pl
4
+ import re
5
+ import pandas as pd
6
+ import numpy as np
7
+ from collections import Counter
8
+
9
+ st.set_page_config(page_title="Book Recommendation Engine", layout="wide")
10
+
11
+ @st.cache_resource
12
+ def load_models():
13
+ # Load the TF-IDF vectorizer
14
+ with open('tfidf_vectorizer.pkl', 'rb') as f:
15
+ tfidf = pickle.load(f)
16
+
17
+ # Load the KNN model
18
+ with open('knn_model.pkl', 'rb') as f:
19
+ knn_model = pickle.load(f)
20
+
21
+ return tfidf, knn_model
22
+
23
+ @st.cache_data
24
+ def load_data():
25
+ # Load the dataset
26
+ df_lazy = pl.scan_csv('goodreadsV5.csv')
27
+ df_cleaned = (
28
+ df_lazy.drop_nulls(subset=['name', 'summary', 'genres'])
29
+ .with_columns([
30
+ (pl.col('summary') + ' ' + pl.col('genres')).alias('combined_features')
31
+ ])
32
+ ).collect()
33
+
34
+ # Apply preprocessing to create the 'processed_features' column
35
+ df_cleaned = df_cleaned.with_columns([
36
+ pl.col('combined_features')
37
+ .map_elements(preprocess_text, return_dtype=pl.Utf8)
38
+ .alias('processed_features')
39
+ ])
40
+
41
+ # Convert to pandas for easier indexing with KNN results
42
+ df_pandas = df_cleaned.to_pandas()
43
+
44
+ return df_cleaned, df_pandas
45
+
46
+ # Define the preprocessing function
47
+ def preprocess_text(text):
48
+ return re.sub(r'[^a-zA-Z0-9\s]', '', text.lower())
49
+
50
+ # Recommendation function for out-of-dataset books
51
+ def recommend_books_knn_out_of_dataset(df_pandas, tfidf, knn_model, input_summary, input_genres, top_n=5):
52
+ # Combine and preprocess the input book's features
53
+ combined_input = f"{input_summary} {input_genres}"
54
+ processed_input = preprocess_text(combined_input)
55
+
56
+ # Transform the input book's features using the loaded TF-IDF vectorizer
57
+ input_vector = tfidf.transform([processed_input])
58
+
59
+ # Find the nearest neighbors using the loaded KNN model
60
+ distances, indices = knn_model.kneighbors(input_vector, n_neighbors=top_n)
61
+
62
+ # Retrieve the recommended book information using pandas DataFrame
63
+ recommendations = []
64
+ for i, idx in enumerate(indices.flatten()):
65
+ book = {
66
+ "title": df_pandas.iloc[idx]['name'],
67
+ "summary": df_pandas.iloc[idx]['summary'],
68
+ "genres": df_pandas.iloc[idx]['genres'],
69
+ "similarity_score": 1 - distances.flatten()[i] # Convert distance to similarity score
70
+ }
71
+ recommendations.append(book)
72
+
73
+ return recommendations
74
+
75
+ def main():
76
+ st.title("πŸ“š Book Recommendation Engine")
77
+
78
+ # Initialize session state variables if they don't exist
79
+ if 'example_summary' not in st.session_state:
80
+ st.session_state['example_summary'] = ""
81
+ if 'example_genres' not in st.session_state:
82
+ st.session_state['example_genres'] = ""
83
+ if 'run_example' not in st.session_state:
84
+ st.session_state['run_example'] = False
85
+
86
+ try:
87
+ # Load models and data
88
+ tfidf, knn_model = load_models()
89
+ df_cleaned, df_pandas = load_data()
90
+
91
+ # Pre-fill with example if one was selected
92
+ default_summary = st.session_state['example_summary'] if st.session_state['run_example'] else "A fantasy adventure about a young wizard learning magic."
93
+ default_genres = st.session_state['example_genres'] if st.session_state['run_example'] else "fantasy, adventure, magic"
94
+
95
+ # Main content
96
+ st.subheader("Find Book Recommendations")
97
+ st.write("Enter a book summary and genres to get personalized recommendations.")
98
+
99
+ col1, col2 = st.columns(2)
100
+
101
+ with col1:
102
+ input_summary = st.text_area("Book Summary", default_summary, height=150)
103
+
104
+ with col2:
105
+ input_genres = st.text_input("Genres (comma-separated)", default_genres)
106
+ num_recommendations = st.slider("Number of Recommendations",
107
+ min_value=1, max_value=20, value=5)
108
+
109
+ # Display recommendations immediately if example was selected
110
+ if st.session_state['run_example'] or st.button("Get Recommendations", type="primary"):
111
+ with st.spinner("Finding the best book matches for you..."):
112
+ # Use the current input values, which may come from examples or user input
113
+ recommendations = recommend_books_knn_out_of_dataset(
114
+ df_pandas, tfidf, knn_model, input_summary, input_genres, num_recommendations
115
+ )
116
+
117
+ st.subheader("πŸ“š Your Recommended Books")
118
+
119
+ for i, book in enumerate(recommendations):
120
+ with st.expander(f"{i+1}. {book['title']}"):
121
+ st.markdown(f"**Summary:** {book['summary']}")
122
+ st.markdown(f"**Genres:** {book['genres']}")
123
+
124
+ # Reset the example flag so it doesn't run again on rerender
125
+ st.session_state['run_example'] = False
126
+
127
+ # Example tabs section
128
+ st.subheader("Try these examples")
129
+ example_tabs = st.tabs(["Fantasy Adventure", "Romance", "Science Fiction", "Mystery"])
130
+
131
+ def set_example(summary, genres):
132
+ st.session_state['example_summary'] = summary
133
+ st.session_state['example_genres'] = genres
134
+ st.session_state['run_example'] = True
135
+ st.rerun()
136
+
137
+ with example_tabs[0]:
138
+ st.write("A magical journey through enchanted lands with dragons and wizards.")
139
+ st.write("Genres: fantasy, adventure, magic")
140
+ if st.button("Use this example", key="ex1"):
141
+ set_example(
142
+ "A magical journey through enchanted lands with dragons and wizards.",
143
+ "fantasy, adventure, magic"
144
+ )
145
+
146
+ with example_tabs[1]:
147
+ st.write("A love story between two people from different worlds who meet by chance.")
148
+ st.write("Genres: romance, contemporary, drama")
149
+ if st.button("Use this example", key="ex2"):
150
+ set_example(
151
+ "A love story between two people from different worlds who meet by chance.",
152
+ "romance, contemporary, drama"
153
+ )
154
+
155
+ with example_tabs[2]:
156
+ st.write("Space explorers discover an alien civilization that challenges their understanding of humanity.")
157
+ st.write("Genres: science fiction, space, aliens")
158
+ if st.button("Use this example", key="ex3"):
159
+ set_example(
160
+ "Space explorers discover an alien civilization that challenges their understanding of humanity.",
161
+ "science fiction, space, aliens"
162
+ )
163
+
164
+ with example_tabs[3]:
165
+ st.write("A detective investigates a series of mysterious disappearances in a small town.")
166
+ st.write("Genres: mystery, thriller, crime")
167
+ if st.button("Use this example", key="ex4"):
168
+ set_example(
169
+ "A detective investigates a series of mysterious disappearances in a small town.",
170
+ "mystery, thriller, crime"
171
+ )
172
+
173
+ except Exception as e:
174
+ st.error(f"An error occurred: {e}")
175
+ st.info("Make sure you have the required model files (tfidf_vectorizer.pkl, knn_model.pkl) and dataset (goodreadsV2.csv) in the same directory as this app.")
176
+ st.code("""
177
+ # Files needed:
178
+ - tfidf_vectorizer.pkl: Your trained TF-IDF vectorizer
179
+ - knn_model.pkl: Your trained KNN model
180
+ - goodreadsV2.csv: Your dataset with book information
181
+ """)
182
+
183
+ if __name__ == "__main__":
184
+ main()