SurgeousJP commited on
Commit
8ffebd2
·
1 Parent(s): 15cb9bb

Convert notebook to py file and running it in docker

Browse files
.ipynb_checkpoints/plot_based_recommender_supabase-checkpoint.ipynb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afa1c81342bd382d45590fe490dcbfd659ec912efc57238035dec01a0d4f319b
3
+ size 36012
Dockerfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.8-slim
2
+
3
+ # Set working directory
4
+ WORKDIR /app
5
+
6
+ # Copy your application code (scripts, notebooks)
7
+ COPY . .
8
+
9
+ RUN pip install -r requirements.txt
10
+
11
+ EXPOSE 5000
12
+ EXPOSE 5001
13
+
14
+ # Command to run your application (replace with your actual command)
15
+ CMD ["python", "plot_based_recommender_supabase.py"]
plot_based_recommender_supabase.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:615747245a55b3eca4499c0f188f1a53be6a690b14debf81064606f9642a4c41
3
- size 31065263
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba3df418463084db0c0a2e43de93b5286852706b6774ace11e8dcdfdd0aaf21a
3
+ size 41180
plot_based_recommender_supabase.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # In[1]:
5
+
6
+
7
+ # get_ipython().system('pip install supabase')
8
+ # get_ipython().system('pip install flask')
9
+ # get_ipython().system('pip install flask-ngrok')
10
+ # get_ipython().system('pip install waitress')
11
+
12
+
13
+ # In[2]:
14
+
15
+
16
+ # pip install --upgrade supabase
17
+
18
+
19
+ # In[3]:
20
+
21
+
22
+ # pip list
23
+
24
+
25
+ # In[4]:
26
+
27
+
28
+ import pandas as pd
29
+ import numpy as np
30
+ from supabase import create_client, Client
31
+
32
+
33
+ # In[5]:
34
+
35
+
36
+ # Your Supabase project details
37
+ URL = "https://oflclzbsbgkadqiagxqk.supabase.co" # Supabase project URL
38
+ KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Im9mbGNsemJzYmdrYWRxaWFneHFrIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MDY0OTY3OTIsImV4cCI6MjAyMjA3Mjc5Mn0.2IGuSFqHbNp75vs-LskGjK0fw3ypqbiHJ9MKAAaYE8s" # Supabase API key
39
+ supabase: Client = create_client(URL, KEY)
40
+
41
+
42
+ # In[6]:
43
+
44
+
45
+ def convert_table_to_pandas_dataframe(supabase, table_name):
46
+ # Retrieve data from Supabase
47
+ data = supabase.table(table_name).select("*").execute()
48
+
49
+ # Convert to DataFrame
50
+ df = pd.DataFrame(data.data)
51
+
52
+ return df
53
+
54
+ books_df = convert_table_to_pandas_dataframe(supabase, "books")
55
+
56
+
57
+ # In[7]:
58
+
59
+
60
+ books_df['description'].head(5)
61
+
62
+
63
+ # ## Plot-based recommender
64
+
65
+ # In[8]:
66
+
67
+
68
+ #Import TfIdfVectorizer from scikit-learn
69
+ from sklearn.feature_extraction.text import TfidfVectorizer
70
+
71
+ #Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
72
+ tfidf = TfidfVectorizer(stop_words='english')
73
+
74
+ #Replace NaN with an empty string
75
+ books_df['descripion'] = books_df['description'].fillna('')
76
+
77
+ #Construct the required TF-IDF matrix by fitting and transforming the data
78
+ tfidf_matrix = tfidf.fit_transform(books_df['description'])
79
+
80
+ #Output the shape of tfidf_matrix
81
+ tfidf_matrix.shape
82
+
83
+
84
+ # In[9]:
85
+
86
+
87
+ tfidf
88
+
89
+
90
+ # In[10]:
91
+
92
+
93
+ print(tfidf_matrix[0].shape)
94
+
95
+
96
+ # In[11]:
97
+
98
+
99
+ # Import linear_kernel
100
+ from sklearn.metrics.pairwise import linear_kernel
101
+
102
+ # Compute the cosine similarity matrix
103
+ cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
104
+
105
+
106
+ # In[12]:
107
+
108
+
109
+ indices = pd.Series(books_df.index, index=books_df['title']).drop_duplicates()
110
+
111
+
112
+ # In[13]:
113
+
114
+
115
+ def get_original_book_id(title):
116
+ return books_df.loc[books_df['title'] == title, 'id'].values[0]
117
+
118
+
119
+ # In[14]:
120
+
121
+
122
+ # Function that takes in movie title as input and outputs most similar movies
123
+ def get_top_five_recommendations(title, cosine_sim=cosine_sim):
124
+ # Get the index of the movie that matches the title
125
+ idx = indices[title]
126
+
127
+ # Get the pairwsie similarity scores of all movies with that movie
128
+ sim_scores = list(enumerate(cosine_sim[idx]))
129
+
130
+ # Sort the movies based on the similarity scores
131
+ sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
132
+
133
+ # Get the scores of the 10 most similar movies
134
+ sim_scores = sim_scores[:10]
135
+
136
+ # Get the movie indices
137
+ book_indices = [i[0] for i in sim_scores]
138
+
139
+ # # Return the top 10 most similar movies
140
+ # return books_df['title'].iloc[book_indices]
141
+
142
+ ids = []
143
+ for title in books_df['title'].iloc[book_indices]:
144
+ ids.append(get_original_book_id(title))
145
+ return ids
146
+
147
+
148
+ # In[15]:
149
+
150
+
151
+ get_top_five_recommendations('Walls of Ash')
152
+
153
+
154
+ # In[16]:
155
+
156
+
157
+ pd.set_option('display.max_colwidth', None)
158
+
159
+
160
+ # In[17]:
161
+
162
+
163
+ books_df[books_df['id'].isin(get_top_five_recommendations('Walls of Ash'))]['url']
164
+
165
+
166
+ # In[18]:
167
+
168
+
169
+ from flask import Flask, jsonify, request
170
+ from flask_ngrok import run_with_ngrok
171
+
172
+
173
+ # In[19]:
174
+
175
+
176
+ app = Flask(__name__)
177
+ run_with_ngrok(app) # Start ngrok when app is run
178
+
179
+
180
+ # In[20]:
181
+
182
+
183
+ import json
184
+
185
+
186
+ # In[21]:
187
+
188
+
189
+ from waitress import serve
190
+
191
+
192
+ # In[23]:
193
+
194
+
195
+ # get_ipython().system('pip freeze > requirements.txt')
196
+
197
+
198
+ # In[24]:
199
+
200
+
201
+ # pip install pipdeptree
202
+
203
+
204
+ # In[25]:
205
+
206
+
207
+ # pipdeptree --output requirements.txt --graph >> requirements.txt
208
+
209
+
210
+ # In[65]:
211
+
212
+
213
+ @app.route('/predict/<int:id>', methods=['GET'])
214
+ def predict(id):
215
+ title = books_df[books_df['id'] == id]['title'].values[0]
216
+ print(title)
217
+ prediction_result = [int(x) for x in get_top_five_recommendations(title)]
218
+ return json.dumps(prediction_result)
219
+
220
+
221
+ # In[66]:
222
+
223
+
224
+ if __name__ == '__main__':
225
+ serve(app, host="0.0.0.0", port=5000)
226
+
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ supabase==2.4.3
2
+ supafunc==0.4.5
3
+ Flask==2.2.2
4
+ Werkzeug==2.2.2
5
+ flask-ngrok==0.0.25
6
+ waitress==3.0.0
7
+ scikit-image==0.19.2
8
+ scikit-learn==1.0.2
9
+ scipy==1.7.3
10
+ pandas==1.4.2
11
+ numpy==1.21.5
12
+ numpydoc==1.2