Spaces:
Sleeping
Sleeping
Jarno Pohjonen commited on
Commit ·
af5cf7c
1
Parent(s): 9c3a55c
add csv data and endpoints to show user track history from it
Browse files- .gitignore +2 -0
- data/music_info.csv +3 -0
- data/user_listening_history_10k.csv +3 -0
- recommender.py +16 -0
- server.py +14 -22
- tracks.py +40 -0
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.idea
|
| 2 |
+
__pycache__
|
data/music_info.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d930430f811ba3c77f217b3f456f2b6271c238b828d6d9ad76e889b5d725f187
|
| 3 |
+
size 14985870
|
data/user_listening_history_10k.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47a82d52ec512f00bf1a3416ecbf153aaa478266e87f6d3c0c4bff85ce4e1d4a
|
| 3 |
+
size 620427
|
recommender.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastai.learner import Learner
|
| 2 |
+
import pandas as pd
|
| 3 |
+
|
| 4 |
+
def get_recommendations_for_user(learn: Learner, user_id: str, limit: int = 5):
|
| 5 |
+
# TODO: Fetch list of not listened songs as entries
|
| 6 |
+
not_listened_songs = ["Revelry, Kings of Leon, 2008", "Gears, Miss May I, 2010", "Sexy Bitch, David Guetta, 2009"]
|
| 7 |
+
input_dataframe = pd.DataFrame({'user_id': ["440abe26940ae9d9268157222a4a3d5735d44ed8"] * len(not_listened_songs), 'entry': not_listened_songs})
|
| 8 |
+
test_dl = learn.dls.test_dl(input_dataframe)
|
| 9 |
+
predictions = learn.get_preds(dl=test_dl)
|
| 10 |
+
|
| 11 |
+
# TODO: Return recommendations in track format
|
| 12 |
+
return {
|
| 13 |
+
"user_id": user_id,
|
| 14 |
+
"limit": limit,
|
| 15 |
+
"recommendations": predictions[0].numpy().tolist()
|
| 16 |
+
}
|
server.py
CHANGED
|
@@ -1,12 +1,14 @@
|
|
| 1 |
from fastai.collab import load_learner
|
| 2 |
from fastapi import FastAPI, Query
|
| 3 |
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
-
from custom_models import DotProductBias
|
| 5 |
import asyncio
|
| 6 |
import uvicorn
|
| 7 |
-
import pandas as pd
|
| 8 |
import os
|
| 9 |
|
|
|
|
|
|
|
|
|
|
| 10 |
# FastAPI app
|
| 11 |
app = FastAPI()
|
| 12 |
|
|
@@ -35,28 +37,18 @@ async def startup_event():
|
|
| 35 |
tasks = [asyncio.ensure_future(setup_learner())] # assign some task
|
| 36 |
learn = (await asyncio.gather(*tasks))[0]
|
| 37 |
|
| 38 |
-
@app.get(
|
| 39 |
-
async def
|
| 40 |
-
return
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
]
|
| 47 |
-
}
|
| 48 |
|
| 49 |
@app.get("/recommend/{user_id}")
|
| 50 |
-
async def
|
| 51 |
-
|
| 52 |
-
print(user_id)
|
| 53 |
-
not_listened_songs = ["Revelry, Kings of Leon, 2008", "Gears, Miss May I, 2010", "Sexy Bitch, David Guetta, 2009"]
|
| 54 |
-
input_dataframe = pd.DataFrame({'user_id': ["440abe26940ae9d9268157222a4a3d5735d44ed8"] * len(not_listened_songs), 'entry': not_listened_songs})
|
| 55 |
-
test_dl = learn.dls.test_dl(input_dataframe)
|
| 56 |
-
predictions = learn.get_preds(dl=test_dl)
|
| 57 |
-
print(predictions)
|
| 58 |
-
#pred = learn.predict(file)
|
| 59 |
-
return {"result": predictions[0].numpy().tolist()}
|
| 60 |
|
| 61 |
if __name__ == "__main__":
|
| 62 |
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))
|
|
|
|
| 1 |
from fastai.collab import load_learner
|
| 2 |
from fastapi import FastAPI, Query
|
| 3 |
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
+
from custom_models import DotProductBias
|
| 5 |
import asyncio
|
| 6 |
import uvicorn
|
|
|
|
| 7 |
import os
|
| 8 |
|
| 9 |
+
from tracks import get_top_tracks_for_user, get_users_with_track_interactions
|
| 10 |
+
from recommender import get_recommendations_for_user
|
| 11 |
+
|
| 12 |
# FastAPI app
|
| 13 |
app = FastAPI()
|
| 14 |
|
|
|
|
| 37 |
tasks = [asyncio.ensure_future(setup_learner())] # assign some task
|
| 38 |
learn = (await asyncio.gather(*tasks))[0]
|
| 39 |
|
| 40 |
+
@app.get("/users")
|
| 41 |
+
async def get_users(limit: int = Query(10)):
|
| 42 |
+
return get_users_with_track_interactions(limit=limit)
|
| 43 |
+
|
| 44 |
+
@app.get('/users/{user_id}')
|
| 45 |
+
async def get_user_track_history(user_id: str, limit:int = Query(5)):
|
| 46 |
+
user_history = get_top_tracks_for_user(user_id, limit)
|
| 47 |
+
return {"user_id": user_id, "history": user_history}
|
|
|
|
|
|
|
| 48 |
|
| 49 |
@app.get("/recommend/{user_id}")
|
| 50 |
+
async def get_recommendations(user_id: str, num_recommendations: int = Query(5)):
|
| 51 |
+
return get_recommendations_for_user(learn, user_id, num_recommendations)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
if __name__ == "__main__":
|
| 54 |
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))
|
tracks.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
# Read the CSV files
|
| 4 |
+
print("Reading music info csv ...")
|
| 5 |
+
tracks_df = pd.read_csv('data/music_info.csv')
|
| 6 |
+
|
| 7 |
+
print("Reading user listening history ...")
|
| 8 |
+
track_interactions_df = pd.read_csv('data/user_listening_history_10k.csv', nrows=1000)
|
| 9 |
+
|
| 10 |
+
# Merge the dataframes on 'track_id'
|
| 11 |
+
dataframe = pd.merge(tracks_df, track_interactions_df, on='track_id', how='left')
|
| 12 |
+
|
| 13 |
+
# Convert all NaN values to empty strings and all columns to string type
|
| 14 |
+
dataframe.fillna('', inplace=True)
|
| 15 |
+
dataframe = dataframe.astype(str)
|
| 16 |
+
|
| 17 |
+
# Group by 'user_id' and then create a list of dictionaries for each group
|
| 18 |
+
lookup_table = {user_id: group.drop('user_id', axis=1).to_dict('records')
|
| 19 |
+
for user_id, group in dataframe.groupby('user_id')}
|
| 20 |
+
|
| 21 |
+
def get_users_with_track_interactions(ascending=False, limit=10):
|
| 22 |
+
# Count the number of rows for each 'user_id'
|
| 23 |
+
playcount_summary = track_interactions_df.groupby('user_id').size().reset_index(name='track_interactions')
|
| 24 |
+
# Sort the DataFrame based on 'track_interactions', either ascending or descending
|
| 25 |
+
playcount_summary.sort_values(by='track_interactions', ascending=ascending, inplace=True)
|
| 26 |
+
# Limit the results if limit is specified
|
| 27 |
+
if limit is not None:
|
| 28 |
+
playcount_summary = playcount_summary.head(limit)
|
| 29 |
+
# Convert the DataFrame to a list of dictionaries
|
| 30 |
+
return playcount_summary.to_dict(orient='records')
|
| 31 |
+
|
| 32 |
+
def get_top_tracks_for_user(user_id: str, limit=20):
|
| 33 |
+
# Retrieve the user's track list from the lookup table or an empty list if not found
|
| 34 |
+
track_list = lookup_table.get(user_id, [])
|
| 35 |
+
# Sort the track list by 'playcount' in descending order (assuming 'playcount' is stored as a string)
|
| 36 |
+
sorted_tracks = sorted(track_list, key=lambda x: int(x['playcount']) if 'playcount' in x and x['playcount'].isdigit() else 0, reverse=True)
|
| 37 |
+
# Apply the limit if specified
|
| 38 |
+
if limit is not None:
|
| 39 |
+
sorted_tracks = sorted_tracks[:limit]
|
| 40 |
+
return sorted_tracks
|