|
|
|
|
|
|
|
|
|
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from supabase import create_client, Client |
|
|
|
|
|
|
|
|
URL = "https://oflclzbsbgkadqiagxqk.supabase.co" |
|
|
KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Im9mbGNsemJzYmdrYWRxaWFneHFrIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MDY0OTY3OTIsImV4cCI6MjAyMjA3Mjc5Mn0.2IGuSFqHbNp75vs-LskGjK0fw3ypqbiHJ9MKAAaYE8s" |
|
|
supabase: Client = create_client(URL, KEY) |
|
|
|
|
|
def convert_table_to_pandas_dataframe(supabase, table_name): |
|
|
|
|
|
data = supabase.table(table_name).select("*").execute() |
|
|
|
|
|
|
|
|
df = pd.DataFrame(data.data) |
|
|
|
|
|
return df |
|
|
|
|
|
books_df = convert_table_to_pandas_dataframe(supabase, "books") |
|
|
|
|
|
pd.set_option('display.max_colwidth', 50) |
|
|
pd.set_option('display.max_columns', None) |
|
|
|
|
|
books_df.head(5) |
|
|
|
|
|
books_df['combined'] = books_df['description'] + ' ' + books_df['title'] + ' ' + books_df['author_name'] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
|
|
|
|
|
|
|
tfidf = TfidfVectorizer(stop_words='english') |
|
|
|
|
|
|
|
|
tfidf_matrix = tfidf.fit_transform(books_df['combined']) |
|
|
|
|
|
feature_names = tfidf.get_feature_names() |
|
|
|
|
|
|
|
|
tfidf_matrix.shape |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
feature_names[2000:2500] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
document_tfidf_vector = tfidf_matrix[10] |
|
|
|
|
|
|
|
|
total_terms_in_document = document_tfidf_vector.sum() |
|
|
|
|
|
print("Document vector: ", tfidf_matrix[10]) |
|
|
print("Total terms in document {}: {}".format(10, total_terms_in_document)) |
|
|
|
|
|
tfidf |
|
|
|
|
|
print(tfidf_matrix[0].shape) |
|
|
|
|
|
|
|
|
|
|
|
from sklearn.metrics.pairwise import linear_kernel |
|
|
|
|
|
|
|
|
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix) |
|
|
|
|
|
indices = pd.Series(books_df.index, index=books_df['title']).drop_duplicates() |
|
|
|
|
|
def get_original_book_id(title): |
|
|
return books_df.loc[books_df['title'] == title, 'id'].values[0] |
|
|
|
|
|
|
|
|
def get_top_five_recommendations(title, cosine_sim=cosine_sim): |
|
|
|
|
|
idx = indices[title] |
|
|
|
|
|
|
|
|
sim_scores = list(enumerate(cosine_sim[idx])) |
|
|
|
|
|
|
|
|
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True) |
|
|
|
|
|
|
|
|
sim_scores = sim_scores[:11] |
|
|
|
|
|
|
|
|
book_indices = [i[0] for i in sim_scores] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ids = [] |
|
|
for title in books_df['title'].iloc[book_indices]: |
|
|
ids.append(get_original_book_id(title)) |
|
|
ids.pop(0) |
|
|
return ids |
|
|
|
|
|
get_top_five_recommendations('Walls of Ash') |
|
|
|
|
|
books_df[books_df['id'].isin(get_top_five_recommendations('Walls of Ash'))]['url'] |
|
|
|
|
|
|
|
|
from flask import Flask, jsonify, request |
|
|
from flask_ngrok import run_with_ngrok |
|
|
|
|
|
app = Flask(__name__) |
|
|
run_with_ngrok(app) |
|
|
|
|
|
import json |
|
|
@app.route('/predict/<int:id>', methods=['GET']) |
|
|
def predict(id): |
|
|
title = books_df[books_df['id'] == id]['title'].values[0] |
|
|
print(title) |
|
|
prediction_result = [int(x) for x in get_top_five_recommendations(title)] |
|
|
return json.dumps(prediction_result) |
|
|
|
|
|
from waitress import serve |
|
|
|
|
|
if __name__ == '__main__': |
|
|
serve(app, host="0.0.0.0", port=8080) |
|
|
|
|
|
|