Convert notebook to py file and running it in docker

Browse files

Files changed (5) hide show

.ipynb_checkpoints/plot_based_recommender_supabase-checkpoint.ipynb +3 -0
Dockerfile +15 -0
plot_based_recommender_supabase.ipynb +2 -2
plot_based_recommender_supabase.py +226 -0
requirements.txt +12 -0

.ipynb_checkpoints/plot_based_recommender_supabase-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:afa1c81342bd382d45590fe490dcbfd659ec912efc57238035dec01a0d4f319b
+size 36012

Dockerfile ADDED Viewed

	@@ -0,0 +1,15 @@

+FROM python:3.8-slim
+# Set working directory
+WORKDIR /app
+# Copy your application code (scripts, notebooks)
+COPY . .
+RUN pip install -r requirements.txt
+EXPOSE 5000
+EXPOSE 5001
+# Command to run your application (replace with your actual command)
+CMD ["python", "plot_based_recommender_supabase.py"]

plot_based_recommender_supabase.ipynb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:615747245a55b3eca4499c0f188f1a53be6a690b14debf81064606f9642a4c41
-size 31065263

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba3df418463084db0c0a2e43de93b5286852706b6774ace11e8dcdfdd0aaf21a
+size 41180

plot_based_recommender_supabase.py ADDED Viewed

	@@ -0,0 +1,226 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[1]:
+# get_ipython().system('pip install supabase')
+# get_ipython().system('pip install flask')
+# get_ipython().system('pip install flask-ngrok')
+# get_ipython().system('pip install waitress')
+# In[2]:
+# pip install --upgrade supabase
+# In[3]:
+# pip list
+# In[4]:
+import pandas as pd
+import numpy as np
+from supabase import create_client, Client
+# In[5]:
+# Your Supabase project details
+URL = "https://oflclzbsbgkadqiagxqk.supabase.co"  # Supabase project URL
+KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Im9mbGNsemJzYmdrYWRxaWFneHFrIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MDY0OTY3OTIsImV4cCI6MjAyMjA3Mjc5Mn0.2IGuSFqHbNp75vs-LskGjK0fw3ypqbiHJ9MKAAaYE8s"                    # Supabase API key
+supabase: Client = create_client(URL, KEY)
+# In[6]:
+def convert_table_to_pandas_dataframe(supabase, table_name):
+    # Retrieve data from Supabase
+    data = supabase.table(table_name).select("*").execute()
+    # Convert to DataFrame
+    df = pd.DataFrame(data.data)
+    return df
+books_df = convert_table_to_pandas_dataframe(supabase, "books")
+# In[7]:
+books_df['description'].head(5)
+# ## Plot-based recommender
+# In[8]:
+#Import TfIdfVectorizer from scikit-learn
+from sklearn.feature_extraction.text import TfidfVectorizer
+#Define a TF-IDF Vectorizer Object. Remove all english stop words such as 'the', 'a'
+tfidf = TfidfVectorizer(stop_words='english')
+#Replace NaN with an empty string
+books_df['descripion'] = books_df['description'].fillna('')
+#Construct the required TF-IDF matrix by fitting and transforming the data
+tfidf_matrix = tfidf.fit_transform(books_df['description'])
+#Output the shape of tfidf_matrix
+tfidf_matrix.shape
+# In[9]:
+tfidf
+# In[10]:
+print(tfidf_matrix[0].shape)
+# In[11]:
+# Import linear_kernel
+from sklearn.metrics.pairwise import linear_kernel
+# Compute the cosine similarity matrix
+cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
+# In[12]:
+indices = pd.Series(books_df.index, index=books_df['title']).drop_duplicates()
+# In[13]:
+def get_original_book_id(title):
+    return books_df.loc[books_df['title'] == title, 'id'].values[0]
+# In[14]:
+# Function that takes in movie title as input and outputs most similar movies
+def get_top_five_recommendations(title, cosine_sim=cosine_sim):
+    # Get the index of the movie that matches the title
+    idx = indices[title]
+    # Get the pairwsie similarity scores of all movies with that movie
+    sim_scores = list(enumerate(cosine_sim[idx]))
+    # Sort the movies based on the similarity scores
+    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
+    # Get the scores of the 10 most similar movies
+    sim_scores = sim_scores[:10]
+    # Get the movie indices
+    book_indices = [i[0] for i in sim_scores]
+#     # Return the top 10 most similar movies
+#     return books_df['title'].iloc[book_indices]
+    ids = []
+    for title in books_df['title'].iloc[book_indices]:
+        ids.append(get_original_book_id(title))
+    return ids
+# In[15]:
+get_top_five_recommendations('Walls of Ash')
+# In[16]:
+pd.set_option('display.max_colwidth', None)
+# In[17]:
+books_df[books_df['id'].isin(get_top_five_recommendations('Walls of Ash'))]['url']
+# In[18]:
+from flask import Flask, jsonify, request
+from flask_ngrok import run_with_ngrok
+# In[19]:
+app = Flask(__name__)
+run_with_ngrok(app)  # Start ngrok when app is run
+# In[20]:
+import json
+# In[21]:
+from waitress import serve
+# In[23]:
+# get_ipython().system('pip freeze > requirements.txt')
+# In[24]:
+# pip install pipdeptree
+# In[25]:
+# pipdeptree --output requirements.txt --graph >> requirements.txt
+# In[65]:
+@app.route('/predict/<int:id>', methods=['GET'])
+def predict(id):
+    title = books_df[books_df['id'] == id]['title'].values[0]
+    print(title)
+    prediction_result = [int(x) for x in get_top_five_recommendations(title)]
+    return json.dumps(prediction_result)
+# In[66]:
+if __name__ == '__main__':
+    serve(app, host="0.0.0.0", port=5000)

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+supabase==2.4.3
+supafunc==0.4.5
+Flask==2.2.2
+Werkzeug==2.2.2
+flask-ngrok==0.0.25
+waitress==3.0.0
+scikit-image==0.19.2
+scikit-learn==1.0.2
+scipy==1.7.3
+pandas==1.4.2
+numpy==1.21.5
+numpydoc==1.2