Upload 7 files
Browse files- B20AI049(PCS-2)Code.py +99 -0
- app.py +142 -0
- knnmodel.pkl +3 -0
- load.py +5 -0
- movies (1).csv +0 -0
- requirement.txt +5 -0
- user_ratings.csv +0 -0
B20AI049(PCS-2)Code.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#This is a code implementation of the Reinforcement learning based TCP congestion control
|
| 2 |
+
#Importing the neccssary libaraies
|
| 3 |
+
import gym
|
| 4 |
+
import network_sim
|
| 5 |
+
import tensorflow as tf
|
| 6 |
+
|
| 7 |
+
from stable_baselines.common.policies import MlpPolicy
|
| 8 |
+
from stable_baselines.common.policies import FeedForwardPolicy
|
| 9 |
+
from stable_baselines import PPO1
|
| 10 |
+
|
| 11 |
+
import os
|
| 12 |
+
import sys
|
| 13 |
+
import inspect
|
| 14 |
+
#atking the current directory from the filename
|
| 15 |
+
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
| 16 |
+
#making the current directory as parent directory
|
| 17 |
+
parentdir = os.path.dirname(currentdir)
|
| 18 |
+
#adding the parent directory to the system path
|
| 19 |
+
sys.path.insert(0,parentdir)
|
| 20 |
+
# makign the arguement passer as parent directory
|
| 21 |
+
from common.simple_arg_parse import arg_or_default
|
| 22 |
+
|
| 23 |
+
arch_str = arg_or_default("--arch", default="32,16")
|
| 24 |
+
#if the made arguenment is empty then it will take the default value
|
| 25 |
+
if arch_str == "":
|
| 26 |
+
arch = []
|
| 27 |
+
else:#Else it will take the arguement value
|
| 28 |
+
arch = [int(layer_width) for layer_width in arch_str.split(",")]
|
| 29 |
+
#Prinitng the architecture
|
| 30 |
+
print("Architecture is: %s" % str(arch))
|
| 31 |
+
|
| 32 |
+
#Making the Initial training session as none
|
| 33 |
+
training_sess = None
|
| 34 |
+
|
| 35 |
+
#Making the feed forward policy
|
| 36 |
+
class MyMlpPolicy(FeedForwardPolicy):
|
| 37 |
+
#defining the constructor function with parameters (Session, Object space, Action space , Number of environments Number of steps, number of batches , making the reuse as false so that new model can be trained)
|
| 38 |
+
def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, **_kwargs):
|
| 39 |
+
super(MyMlpPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse, net_arch=[{"pi":arch, "vf":arch}],
|
| 40 |
+
feature_extraction="mlp", **_kwargs)
|
| 41 |
+
#Now making the training session as global variable
|
| 42 |
+
global training_sess
|
| 43 |
+
#making the training session as the current session
|
| 44 |
+
training_sess = sess
|
| 45 |
+
#making the environment of a specified type
|
| 46 |
+
env = gym.make('PccNs-v0')
|
| 47 |
+
|
| 48 |
+
#defining a gamma value for the rate of training session
|
| 49 |
+
gamma = arg_or_default("--gamma", default=0.99)
|
| 50 |
+
print("gamma = %f" % gamma)
|
| 51 |
+
|
| 52 |
+
#making the model
|
| 53 |
+
model = PPO1(MyMlpPolicy, env, verbose=1, schedule='constant', timesteps_per_actorbatch=8192, optim_batchsize=2048, gamma=gamma)
|
| 54 |
+
|
| 55 |
+
for i in range(0, 6):
|
| 56 |
+
with model.graph.as_default():
|
| 57 |
+
#saving the trained model for further use
|
| 58 |
+
saver = tf.train.Saver()
|
| 59 |
+
saver.save(training_sess, "./pcc_model_%d.ckpt" % i)
|
| 60 |
+
#making the model learn in 1600 seconds for 410 epochs
|
| 61 |
+
model.learn(total_timesteps=(1600 * 410))
|
| 62 |
+
|
| 63 |
+
#Now making a deafult directory for exporting the saved model
|
| 64 |
+
default_export_dir = "/tmp/pcc_saved_models/model_A/"
|
| 65 |
+
#again making the qrguement passer for storing in specific directory or making it save in deafult directory
|
| 66 |
+
export_dir = arg_or_default("--model-dir", default=default_export_dir)
|
| 67 |
+
#Taking the model as default model
|
| 68 |
+
with model.graph.as_default():
|
| 69 |
+
|
| 70 |
+
pol = model.policy_pi#act_model
|
| 71 |
+
|
| 72 |
+
obs_ph = pol.obs_ph
|
| 73 |
+
#making the action in the action space
|
| 74 |
+
act = pol.deterministic_action
|
| 75 |
+
#taking out any sample action from the action space
|
| 76 |
+
sampled_act = pol.action
|
| 77 |
+
# Session, Object space, Action space , Number of environments Number of steps, number of batches , making the reuse as false so that new model can be trained)
|
| 78 |
+
obs_input = tf.saved_model.utils.build_tensor_info(obs_ph)
|
| 79 |
+
#build_tensor_info
|
| 80 |
+
outputs_tensor_info = tf.saved_model.utils.build_tensor_info(act)
|
| 81 |
+
stochastic_act_tensor_info = tf.saved_model.utils.build_tensor_info(sampled_act)
|
| 82 |
+
#A SignatureDef protocol buffer constructed based on the supplied arguments.
|
| 83 |
+
signature = tf.saved_model.signature_def_utils.build_signature_def(
|
| 84 |
+
inputs={"ob":obs_input},
|
| 85 |
+
outputs={"act":outputs_tensor_info, "stochastic_act":stochastic_act_tensor_info},
|
| 86 |
+
method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME)
|
| 87 |
+
|
| 88 |
+
#"""
|
| 89 |
+
signature_map = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
|
| 90 |
+
signature}
|
| 91 |
+
#Building the SavedModel protocol buffer and saving variables and assets.
|
| 92 |
+
model_builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
|
| 93 |
+
#adding the sessions and tages and the signature map to the model builder
|
| 94 |
+
model_builder.add_meta_graph_and_variables(model.sess,
|
| 95 |
+
tags=[tf.saved_model.tag_constants.SERVING],
|
| 96 |
+
signature_def_map=signature_map,
|
| 97 |
+
clear_devices=True)
|
| 98 |
+
#now saving the model builder in form of text.
|
| 99 |
+
model_builder.save(as_text=True)
|
app.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pickle
|
| 3 |
+
import streamlit as st
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import requests
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
movies=pd.read_csv('C:\codes\PRML Project\movies (1).csv')
|
| 9 |
+
ratings=pd.read_csv('C:/codes/PRML Project/user_ratings.csv')
|
| 10 |
+
|
| 11 |
+
movies['title'] = movies['title'].str.strip().str[:-7]
|
| 12 |
+
#removing the | from the genre and replacing it by space
|
| 13 |
+
movies['genres']=movies['genres'].str.replace('|', ' ')
|
| 14 |
+
|
| 15 |
+
from sklearn.feature_extraction.text import CountVectorizer
|
| 16 |
+
#making an obejct of it
|
| 17 |
+
cv=CountVectorizer()
|
| 18 |
+
# we have a token in the genres so we will now make tokens out of it
|
| 19 |
+
genres_tokens=cv.fit_transform(movies['genres'].values)
|
| 20 |
+
|
| 21 |
+
genres_features=cv.get_feature_names_out()
|
| 22 |
+
|
| 23 |
+
genres_tokens=pd.DataFrame(genres_tokens.toarray(),columns=genres_features.tolist())
|
| 24 |
+
|
| 25 |
+
genres_tokens['combined']=genres_tokens.values.tolist()
|
| 26 |
+
|
| 27 |
+
movies['genres']=genres_tokens['combined']
|
| 28 |
+
|
| 29 |
+
#preprocessing of rating column
|
| 30 |
+
|
| 31 |
+
pivot_mat = ratings.pivot(index='movieId',columns='userId',values='rating')
|
| 32 |
+
|
| 33 |
+
pivot_mat.fillna(0,inplace=True)
|
| 34 |
+
|
| 35 |
+
vote_movie = [[],[]]
|
| 36 |
+
user_votes = [[],[]]
|
| 37 |
+
sh = pivot_mat.shape
|
| 38 |
+
for i in range(sh[0]):
|
| 39 |
+
r,c = np.unique(pivot_mat.values[i],return_counts=True)
|
| 40 |
+
user_votes[0].append(np.sum(c[1:]))
|
| 41 |
+
user_votes[1].append(pivot_mat.index[i])
|
| 42 |
+
for i in range(sh[1]):
|
| 43 |
+
r,c = np.unique(pivot_mat.values[:,i],return_counts=True)
|
| 44 |
+
vote_movie[0].append(np.sum(c[1:]))
|
| 45 |
+
vote_movie[1].append(i+1)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
vote_movie = np.array(vote_movie).T
|
| 49 |
+
user_votes = np.array(user_votes).T
|
| 50 |
+
|
| 51 |
+
pivot_mat = pivot_mat.loc[user_votes[:,1][user_votes[:,0] > 10],:]
|
| 52 |
+
|
| 53 |
+
zc = 0
|
| 54 |
+
for i in range(pivot_mat.shape[0]):
|
| 55 |
+
for j in range(pivot_mat.shape[1]):
|
| 56 |
+
if pivot_mat.iloc[i,j] == 0:
|
| 57 |
+
zc+=1
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
from scipy.sparse import csr_matrix
|
| 61 |
+
csr_data = csr_matrix(pivot_mat.values)
|
| 62 |
+
pivot_mat.reset_index(inplace=True)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def dist_rec(movie_name,rec):
|
| 67 |
+
try:
|
| 68 |
+
arr = np.array(movies[movies['title'] == movie_name].values[0][2])
|
| 69 |
+
except:
|
| 70 |
+
return "Movie not found"
|
| 71 |
+
|
| 72 |
+
mov = movies[movies['title'] != movie_name].values
|
| 73 |
+
dis = []
|
| 74 |
+
recommendations=[]
|
| 75 |
+
for i in mov:
|
| 76 |
+
dis.append(np.sqrt((np.sum((np.array(i[2]) - arr)**2)))) # similar to the K-means clustering decision.
|
| 77 |
+
# print("The Recommendations for " + movie_name + " are :\n")
|
| 78 |
+
# for i in range(rec):
|
| 79 |
+
# print(mov[:,1][np.argmin(dis)])
|
| 80 |
+
# dis[np.argmin(dis)] = 9999
|
| 81 |
+
for i in range(rec):
|
| 82 |
+
recommendations.append(mov[:,1][np.argmin(dis)])
|
| 83 |
+
dis[np.argmin(dis)] = 9999
|
| 84 |
+
return recommendations
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
#defining the K-means clustering decision
|
| 88 |
+
from sklearn.neighbors import NearestNeighbors
|
| 89 |
+
knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1)
|
| 90 |
+
knn.fit(csr_data)
|
| 91 |
+
|
| 92 |
+
def knn_reccomendation(movie_name,n_movies_to_reccomend):
|
| 93 |
+
movie_list = movies[movies['title'].str.contains(movie_name)]
|
| 94 |
+
if len(movie_list):
|
| 95 |
+
movie_idx= movie_list.iloc[0]['movieId']
|
| 96 |
+
movie_idx = pivot_mat[pivot_mat['movieId'] == movie_idx].index[0]
|
| 97 |
+
distances , indices = knn.kneighbors(csr_data[movie_idx],n_neighbors=n_movies_to_reccomend+1)
|
| 98 |
+
rec_movie_indices = sorted(list(zip(indices.squeeze().tolist(),distances.squeeze().tolist())),key=lambda x: x[1])[:0:-1]
|
| 99 |
+
recommend_frame = []
|
| 100 |
+
for val in rec_movie_indices:
|
| 101 |
+
movie_idx = pivot_mat.iloc[val[0]]['movieId']
|
| 102 |
+
idx = movies[movies['movieId'] == movie_idx].index
|
| 103 |
+
recommend_frame.append({'Title':movies.iloc[idx]['title'].values[0],'Distance':val[1]})
|
| 104 |
+
df = pd.DataFrame(recommend_frame,index=range(1,n_movies_to_reccomend+1))
|
| 105 |
+
return df
|
| 106 |
+
else:
|
| 107 |
+
return "No movies found. Please check your input"
|
| 108 |
+
|
| 109 |
+
st.header('Movie Recommender System')
|
| 110 |
+
|
| 111 |
+
option = st.selectbox(
|
| 112 |
+
'Which model would you like to use?',
|
| 113 |
+
('Genre based', 'KNN-based'))
|
| 114 |
+
|
| 115 |
+
selected_movie = st.text_input(
|
| 116 |
+
"Type a movie name to get recommendations"
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
number_of_recommendations = st.number_input(
|
| 120 |
+
"Type the number of recommendations to get"
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
if st.button('Show Recommendations'):
|
| 124 |
+
|
| 125 |
+
if option=='Genre based':
|
| 126 |
+
movie_recommendations = dist_rec(selected_movie,int(number_of_recommendations))
|
| 127 |
+
|
| 128 |
+
st.text(f"Here are {number_of_recommendations} recommendations for {selected_movie}")
|
| 129 |
+
|
| 130 |
+
for i in range(int(number_of_recommendations)):
|
| 131 |
+
st.text(f"{i+1}. {movie_recommendations[i]}")
|
| 132 |
+
|
| 133 |
+
elif option=='KNN-based':
|
| 134 |
+
movie_recommendations = knn_reccomendation(selected_movie,int(number_of_recommendations))
|
| 135 |
+
|
| 136 |
+
st.text(f"Here are {number_of_recommendations} recommendations for {selected_movie}")
|
| 137 |
+
|
| 138 |
+
for i in movie_recommendations['Title']:
|
| 139 |
+
st.text(i)
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
|
knnmodel.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9737bd78d8fa2631ba751b3671e2181f961c67c0c58eee30bfcdf7955012f6d2
|
| 3 |
+
size 884118
|
load.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import joblib
|
| 2 |
+
|
| 3 |
+
knn_reccomendation=joblib.load('C:\codes\PRML Project\knnmodel.pkl')
|
| 4 |
+
|
| 5 |
+
knn_reccomendation.recommendations('Iron Man',3)
|
movies (1).csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
requirement.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
numpy==1.22.3
|
| 2 |
+
streamlit==1.8.0
|
| 3 |
+
pandas==1.4.1
|
| 4 |
+
scikit-learn==1.0.2
|
| 5 |
+
gsheetsdb==0.1.13.1
|
user_ratings.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|