Spaces:
Runtime error
Runtime error
Commit
·
3fbcf3c
1
Parent(s):
e2703c5
Upload app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""HS_Surprise Module_Metacritic_Games_Recomm.ipynb
|
| 3 |
+
|
| 4 |
+
Automatically generated by Colaboratory.
|
| 5 |
+
|
| 6 |
+
Original file is located at
|
| 7 |
+
https://colab.research.google.com/drive/1sZ9FI7bnnNBcvFa2rt78EKYeU2L4XrXi
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
!pip install surprise
|
| 11 |
+
|
| 12 |
+
from surprise import NMF, SVD, SVDpp, KNNBasic, KNNWithMeans, KNNWithZScore, CoClustering
|
| 13 |
+
from surprise.model_selection import cross_validate
|
| 14 |
+
from surprise import Reader, Dataset
|
| 15 |
+
|
| 16 |
+
!pip install gradio
|
| 17 |
+
|
| 18 |
+
import gradio as gr
|
| 19 |
+
|
| 20 |
+
import pandas as pd
|
| 21 |
+
import numpy as np
|
| 22 |
+
import scipy as sp
|
| 23 |
+
from scipy import sparse
|
| 24 |
+
from datetime import datetime
|
| 25 |
+
|
| 26 |
+
df = pd.read_csv("/content/metacritic_critic_reviews.csv", on_bad_lines='skip', encoding="utf-8")
|
| 27 |
+
df.dropna(inplace=True)
|
| 28 |
+
df.head()
|
| 29 |
+
|
| 30 |
+
#Create date column by converting the date into a datetime object then returning only the year
|
| 31 |
+
def add_year(full_date):
|
| 32 |
+
datetime_object = datetime.strptime(full_date, '%b %d, %Y')
|
| 33 |
+
return datetime_object.year
|
| 34 |
+
|
| 35 |
+
df['year'] = df['date'].apply(add_year)
|
| 36 |
+
#Add the year in brackets to the name of the game to avoid confusion
|
| 37 |
+
def year_game(row):
|
| 38 |
+
calendar_year = str(row['year'])
|
| 39 |
+
year_game_combined = str(row['game']) + " (" + calendar_year + ")"
|
| 40 |
+
return year_game_combined
|
| 41 |
+
|
| 42 |
+
df['game'] = df.apply(year_game, axis=1)
|
| 43 |
+
|
| 44 |
+
#['PC', '3DS', 'PlayStation Vita', 'Wii U', 'PlayStation 4','Xbox One', 'Switch']
|
| 45 |
+
df = df[df['platform'] == 'PlayStation 4']
|
| 46 |
+
|
| 47 |
+
#Filter by games since 2015 onwards
|
| 48 |
+
over_2015 = df[df['year'] >= 2015]
|
| 49 |
+
|
| 50 |
+
#Group by average score then sort by descending
|
| 51 |
+
top_recent_scorers = over_2015.groupby('game')['score'].mean().sort_values(ascending=False)
|
| 52 |
+
#Only show top 20 games
|
| 53 |
+
top_40_games = top_recent_scorers.index[:40]
|
| 54 |
+
#top_100_games = top_recent_scorers.index[:100]
|
| 55 |
+
|
| 56 |
+
df.shape
|
| 57 |
+
|
| 58 |
+
combined_games_data = df[['game','name','score']]
|
| 59 |
+
algorithms = [NMF(), SVD(), SVDpp(), KNNWithZScore(), CoClustering()]
|
| 60 |
+
|
| 61 |
+
def recommender(user_prof, user_algo=KNNWithZScore(), combined_games_data=combined_games_data): # top_100_games=top_100_games,
|
| 62 |
+
|
| 63 |
+
my_ratings = user_prof[user_prof['score'] != 0]
|
| 64 |
+
combined_games_data = pd.concat([combined_games_data, my_ratings], axis=0)
|
| 65 |
+
combined_games_data.columns = ['itemID', 'userID', 'rating']
|
| 66 |
+
|
| 67 |
+
# use the transform method group by userID and count to keep the games with more than reviews within user profile. Ideally 20 or more.
|
| 68 |
+
combined_games_data['reviews'] = combined_games_data.groupby(['itemID'])['rating'].transform('count')
|
| 69 |
+
combined_games_data = combined_games_data[combined_games_data.reviews>=my_ratings.shape[0]][['userID', 'itemID', 'rating']]
|
| 70 |
+
|
| 71 |
+
reader = Reader(rating_scale=(1.0, 100.0))
|
| 72 |
+
data = Dataset.load_from_df(combined_games_data, reader)
|
| 73 |
+
|
| 74 |
+
unique_ids = combined_games_data['itemID'].unique()
|
| 75 |
+
|
| 76 |
+
iids1001 = combined_games_data.loc[combined_games_data['userID']==1001, 'itemID']
|
| 77 |
+
|
| 78 |
+
games_to_predict = np.setdiff1d(unique_ids,iids1001)
|
| 79 |
+
|
| 80 |
+
for i in range(len(algorithms)):
|
| 81 |
+
if i == 'NMF':
|
| 82 |
+
user_algo = NMF()
|
| 83 |
+
elif i == 'SVD':
|
| 84 |
+
user_algo = SVD()
|
| 85 |
+
elif i == 'SVDpp':
|
| 86 |
+
user_algo = SVDpp()
|
| 87 |
+
elif i == 'KNN':
|
| 88 |
+
user_algo = KNNWithZScore()
|
| 89 |
+
elif i == 'CoClustering':
|
| 90 |
+
user_algo = CoClustering()
|
| 91 |
+
else:
|
| 92 |
+
user_algo = NMF()
|
| 93 |
+
|
| 94 |
+
algo = user_algo
|
| 95 |
+
algo.fit(data.build_full_trainset())
|
| 96 |
+
|
| 97 |
+
my_recs = []
|
| 98 |
+
for iid in games_to_predict:
|
| 99 |
+
my_recs.append((iid, algo.predict(uid=1001,iid=iid).est))
|
| 100 |
+
|
| 101 |
+
result = pd.DataFrame(my_recs, columns=['iid', 'predictions']).sort_values('predictions', ascending=False).head(10)
|
| 102 |
+
return result
|
| 103 |
+
|
| 104 |
+
default_entries = []
|
| 105 |
+
for record in range(len(top_40_games)):
|
| 106 |
+
default_entries.append([top_40_games[record], 1001, 0])
|
| 107 |
+
|
| 108 |
+
iface = gr.Interface(recommender,
|
| 109 |
+
inputs=[gr.inputs.Dataframe(
|
| 110 |
+
headers=['game','name','score'],
|
| 111 |
+
default=default_entries
|
| 112 |
+
),
|
| 113 |
+
gr.inputs.Radio(['NMF', 'SVD', 'SVDpp', 'KNN', 'CoClustering'])],
|
| 114 |
+
outputs="dataframe",
|
| 115 |
+
)
|
| 116 |
+
iface.launch(debug=True)
|