luthrabhuvan commited on
Commit
e9fff82
·
verified ·
1 Parent(s): b3a05ba

Upload 3 files

Browse files
Files changed (3) hide show
  1. DockerFile +14 -0
  2. README.md +14 -10
  3. app.py +115 -0
DockerFile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim-buster
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ EXPOSE 5000
11
+
12
+ CMD ["gunicorn", "--bind", "0.0.0.0:5000", "app:app"]
13
+ # OR, for development/testing (not recommended for production):
14
+ # CMD ["python", "app.py"]
README.md CHANGED
@@ -1,10 +1,14 @@
1
- ---
2
- title: Book Recommender
3
- emoji: 👀
4
- colorFrom: indigo
5
- colorTo: indigo
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
1
+ # Book Recommender (Flask Version)
2
+
3
+ This project implements a content-based book recommendation system using Python, Flask, and scikit-learn. It allows users to upload a CSV or Excel file containing book titles and summaries, and then enter a book title to receive recommendations for similar books.
4
+
5
+ ## Dependencies
6
+
7
+ - Flask: Used for creating the web application.
8
+ - pandas: Used for data loading and manipulation.
9
+ - scikit-learn: Used for TF-IDF vectorization and cosine similarity calculation.
10
+
11
+ You can install these dependencies using pip:
12
+
13
+ ```bash
14
+ pip install Flask pandas scikit-learn
app.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, jsonify
2
+ import pandas as pd
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+
6
+ app = Flask(__name__)
7
+
8
+ class BookRecommender:
9
+ def __init__(self):
10
+ self.df = None
11
+ self.similarity_matrix = None
12
+
13
+ def load_data(self, filepath):
14
+ try:
15
+ if filepath.endswith('.csv'):
16
+ df = pd.read_csv(filepath)
17
+ elif filepath.endswith(('.xls', '.xlsx')):
18
+ df = pd.read_excel(filepath)
19
+ else:
20
+ raise ValueError("Unsupported file format. Please provide a CSV or Excel file.")
21
+ return df
22
+ except FileNotFoundError:
23
+ raise FileNotFoundError(f"File not found at {filepath}")
24
+ except ValueError as e:
25
+ raise ValueError(f"Error loading data: {e}")
26
+ except Exception as e:
27
+ raise Exception(f"Error loading data: {e}")
28
+
29
+ def preprocess_data(self, df, summary_column='summary', title_column='title'):
30
+ if df[summary_column].isnull().any():
31
+ df[summary_column] = df[summary_column].fillna('')
32
+ print("Handled missing values in summary column.")
33
+
34
+ if df[title_column].isnull().any():
35
+ df[title_column] = df[title_column].fillna('')
36
+ print("Handled missing values in title column.")
37
+
38
+ df = df.drop_duplicates(subset=[title_column, summary_column], keep='first')
39
+ print("Removed duplicate rows.")
40
+
41
+ df = df[~(df[title_column] == '') | (df[summary_column] == '')]
42
+ print("Removed rows with blank title and summary.")
43
+
44
+ return df
45
+
46
+ def create_tfidf_matrix(self, df, summary_column='summary'):
47
+ tfidf = TfidfVectorizer(stop_words='english')
48
+ tfidf_matrix = tfidf.fit_transform(df[summary_column])
49
+ return tfidf_matrix, tfidf
50
+
51
+ def calculate_similarity(self, tfidf_matrix):
52
+ similarity_matrix = cosine_similarity(tfidf_matrix)
53
+ return similarity_matrix
54
+
55
+ def recommend_books(self, book_title):
56
+ try:
57
+ book_index = self.df[self.df['title'] == book_title].index[0]
58
+ except IndexError:
59
+ return "Book title not found."
60
+ except Exception as e:
61
+ return f"An error occurred: {e}"
62
+
63
+ similar_books_indices = self.similarity_matrix[book_index].argsort()[::-1][1:6] # Fixed top_n to 5
64
+ recommended_books = self.df['title'].iloc[similar_books_indices].tolist()
65
+ return recommended_books
66
+
67
+ def load_and_process_data(self, filepath):
68
+ try:
69
+ self.df = self.load_data(filepath)
70
+ self.df = self.preprocess_data(self.df)
71
+ tfidf_matrix, _ = self.create_tfidf_matrix(self.df)
72
+ self.similarity_matrix = self.calculate_similarity(tfidf_matrix)
73
+ return True
74
+ except Exception as e:
75
+ print(f"Error during data loading/processing: {e}")
76
+ return False
77
+
78
+
79
+ recommender = BookRecommender()
80
+
81
+ @app.route("/", methods=["GET", "POST"])
82
+ def index():
83
+ message = ""
84
+ recommendations = None # Initialize recommendations
85
+ if request.method == "POST":
86
+ if 'file' in request.files:
87
+ file = request.files['file']
88
+ if file.filename != '':
89
+ try:
90
+ filepath = "uploaded_file." + file.filename.rsplit('.', 1)[1]
91
+ file.save(filepath)
92
+ if recommender.load_and_process_data(filepath):
93
+ message = "File uploaded and processed successfully!"
94
+ else:
95
+ message = "Error processing the file."
96
+ except Exception as e:
97
+ message = f"File upload failed: {e}"
98
+ else:
99
+ message = "No file selected."
100
+
101
+ elif 'book_title' in request.form:
102
+ book_title = request.form['book_title']
103
+ if recommender.df is None or recommender.similarity_matrix is None:
104
+ message = "Please upload and process a file first."
105
+ else:
106
+ recommendations = recommender.recommend_books(book_title)
107
+ if isinstance(recommendations, str): # Check if it is an error message.
108
+ message = recommendations
109
+ else:
110
+ message = "" # Clear any previous messages.
111
+ return render_template("index.html", message=message, recommendations=recommendations)
112
+
113
+
114
+ if __name__ == "__main__":
115
+ app.run(debug=True)