Vageesh1 commited on
Commit
1ed1542
·
1 Parent(s): 41c91ff

Upload 7 files

Browse files
Files changed (7) hide show
  1. B20AI049(PCS-2)Code.py +99 -0
  2. app.py +142 -0
  3. knnmodel.pkl +3 -0
  4. load.py +5 -0
  5. movies (1).csv +0 -0
  6. requirement.txt +5 -0
  7. user_ratings.csv +0 -0
B20AI049(PCS-2)Code.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #This is a code implementation of the Reinforcement learning based TCP congestion control
2
+ #Importing the neccssary libaraies
3
+ import gym
4
+ import network_sim
5
+ import tensorflow as tf
6
+
7
+ from stable_baselines.common.policies import MlpPolicy
8
+ from stable_baselines.common.policies import FeedForwardPolicy
9
+ from stable_baselines import PPO1
10
+
11
+ import os
12
+ import sys
13
+ import inspect
14
+ #atking the current directory from the filename
15
+ currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
16
+ #making the current directory as parent directory
17
+ parentdir = os.path.dirname(currentdir)
18
+ #adding the parent directory to the system path
19
+ sys.path.insert(0,parentdir)
20
+ # makign the arguement passer as parent directory
21
+ from common.simple_arg_parse import arg_or_default
22
+
23
+ arch_str = arg_or_default("--arch", default="32,16")
24
+ #if the made arguenment is empty then it will take the default value
25
+ if arch_str == "":
26
+ arch = []
27
+ else:#Else it will take the arguement value
28
+ arch = [int(layer_width) for layer_width in arch_str.split(",")]
29
+ #Prinitng the architecture
30
+ print("Architecture is: %s" % str(arch))
31
+
32
+ #Making the Initial training session as none
33
+ training_sess = None
34
+
35
+ #Making the feed forward policy
36
+ class MyMlpPolicy(FeedForwardPolicy):
37
+ #defining the constructor function with parameters (Session, Object space, Action space , Number of environments Number of steps, number of batches , making the reuse as false so that new model can be trained)
38
+ def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, **_kwargs):
39
+ super(MyMlpPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse, net_arch=[{"pi":arch, "vf":arch}],
40
+ feature_extraction="mlp", **_kwargs)
41
+ #Now making the training session as global variable
42
+ global training_sess
43
+ #making the training session as the current session
44
+ training_sess = sess
45
+ #making the environment of a specified type
46
+ env = gym.make('PccNs-v0')
47
+
48
+ #defining a gamma value for the rate of training session
49
+ gamma = arg_or_default("--gamma", default=0.99)
50
+ print("gamma = %f" % gamma)
51
+
52
+ #making the model
53
+ model = PPO1(MyMlpPolicy, env, verbose=1, schedule='constant', timesteps_per_actorbatch=8192, optim_batchsize=2048, gamma=gamma)
54
+
55
+ for i in range(0, 6):
56
+ with model.graph.as_default():
57
+ #saving the trained model for further use
58
+ saver = tf.train.Saver()
59
+ saver.save(training_sess, "./pcc_model_%d.ckpt" % i)
60
+ #making the model learn in 1600 seconds for 410 epochs
61
+ model.learn(total_timesteps=(1600 * 410))
62
+
63
+ #Now making a deafult directory for exporting the saved model
64
+ default_export_dir = "/tmp/pcc_saved_models/model_A/"
65
+ #again making the qrguement passer for storing in specific directory or making it save in deafult directory
66
+ export_dir = arg_or_default("--model-dir", default=default_export_dir)
67
+ #Taking the model as default model
68
+ with model.graph.as_default():
69
+
70
+ pol = model.policy_pi#act_model
71
+
72
+ obs_ph = pol.obs_ph
73
+ #making the action in the action space
74
+ act = pol.deterministic_action
75
+ #taking out any sample action from the action space
76
+ sampled_act = pol.action
77
+ # Session, Object space, Action space , Number of environments Number of steps, number of batches , making the reuse as false so that new model can be trained)
78
+ obs_input = tf.saved_model.utils.build_tensor_info(obs_ph)
79
+ #build_tensor_info
80
+ outputs_tensor_info = tf.saved_model.utils.build_tensor_info(act)
81
+ stochastic_act_tensor_info = tf.saved_model.utils.build_tensor_info(sampled_act)
82
+ #A SignatureDef protocol buffer constructed based on the supplied arguments.
83
+ signature = tf.saved_model.signature_def_utils.build_signature_def(
84
+ inputs={"ob":obs_input},
85
+ outputs={"act":outputs_tensor_info, "stochastic_act":stochastic_act_tensor_info},
86
+ method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME)
87
+
88
+ #"""
89
+ signature_map = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
90
+ signature}
91
+ #Building the SavedModel protocol buffer and saving variables and assets.
92
+ model_builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
93
+ #adding the sessions and tages and the signature map to the model builder
94
+ model_builder.add_meta_graph_and_variables(model.sess,
95
+ tags=[tf.saved_model.tag_constants.SERVING],
96
+ signature_def_map=signature_map,
97
+ clear_devices=True)
98
+ #now saving the model builder in form of text.
99
+ model_builder.save(as_text=True)
app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pickle
3
+ import streamlit as st
4
+ import pandas as pd
5
+ import requests
6
+
7
+
8
+ movies=pd.read_csv('C:\codes\PRML Project\movies (1).csv')
9
+ ratings=pd.read_csv('C:/codes/PRML Project/user_ratings.csv')
10
+
11
+ movies['title'] = movies['title'].str.strip().str[:-7]
12
+ #removing the | from the genre and replacing it by space
13
+ movies['genres']=movies['genres'].str.replace('|', ' ')
14
+
15
+ from sklearn.feature_extraction.text import CountVectorizer
16
+ #making an obejct of it
17
+ cv=CountVectorizer()
18
+ # we have a token in the genres so we will now make tokens out of it
19
+ genres_tokens=cv.fit_transform(movies['genres'].values)
20
+
21
+ genres_features=cv.get_feature_names_out()
22
+
23
+ genres_tokens=pd.DataFrame(genres_tokens.toarray(),columns=genres_features.tolist())
24
+
25
+ genres_tokens['combined']=genres_tokens.values.tolist()
26
+
27
+ movies['genres']=genres_tokens['combined']
28
+
29
+ #preprocessing of rating column
30
+
31
+ pivot_mat = ratings.pivot(index='movieId',columns='userId',values='rating')
32
+
33
+ pivot_mat.fillna(0,inplace=True)
34
+
35
+ vote_movie = [[],[]]
36
+ user_votes = [[],[]]
37
+ sh = pivot_mat.shape
38
+ for i in range(sh[0]):
39
+ r,c = np.unique(pivot_mat.values[i],return_counts=True)
40
+ user_votes[0].append(np.sum(c[1:]))
41
+ user_votes[1].append(pivot_mat.index[i])
42
+ for i in range(sh[1]):
43
+ r,c = np.unique(pivot_mat.values[:,i],return_counts=True)
44
+ vote_movie[0].append(np.sum(c[1:]))
45
+ vote_movie[1].append(i+1)
46
+
47
+
48
+ vote_movie = np.array(vote_movie).T
49
+ user_votes = np.array(user_votes).T
50
+
51
+ pivot_mat = pivot_mat.loc[user_votes[:,1][user_votes[:,0] > 10],:]
52
+
53
+ zc = 0
54
+ for i in range(pivot_mat.shape[0]):
55
+ for j in range(pivot_mat.shape[1]):
56
+ if pivot_mat.iloc[i,j] == 0:
57
+ zc+=1
58
+
59
+
60
+ from scipy.sparse import csr_matrix
61
+ csr_data = csr_matrix(pivot_mat.values)
62
+ pivot_mat.reset_index(inplace=True)
63
+
64
+
65
+
66
+ def dist_rec(movie_name,rec):
67
+ try:
68
+ arr = np.array(movies[movies['title'] == movie_name].values[0][2])
69
+ except:
70
+ return "Movie not found"
71
+
72
+ mov = movies[movies['title'] != movie_name].values
73
+ dis = []
74
+ recommendations=[]
75
+ for i in mov:
76
+ dis.append(np.sqrt((np.sum((np.array(i[2]) - arr)**2)))) # similar to the K-means clustering decision.
77
+ # print("The Recommendations for " + movie_name + " are :\n")
78
+ # for i in range(rec):
79
+ # print(mov[:,1][np.argmin(dis)])
80
+ # dis[np.argmin(dis)] = 9999
81
+ for i in range(rec):
82
+ recommendations.append(mov[:,1][np.argmin(dis)])
83
+ dis[np.argmin(dis)] = 9999
84
+ return recommendations
85
+
86
+
87
+ #defining the K-means clustering decision
88
+ from sklearn.neighbors import NearestNeighbors
89
+ knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1)
90
+ knn.fit(csr_data)
91
+
92
+ def knn_reccomendation(movie_name,n_movies_to_reccomend):
93
+ movie_list = movies[movies['title'].str.contains(movie_name)]
94
+ if len(movie_list):
95
+ movie_idx= movie_list.iloc[0]['movieId']
96
+ movie_idx = pivot_mat[pivot_mat['movieId'] == movie_idx].index[0]
97
+ distances , indices = knn.kneighbors(csr_data[movie_idx],n_neighbors=n_movies_to_reccomend+1)
98
+ rec_movie_indices = sorted(list(zip(indices.squeeze().tolist(),distances.squeeze().tolist())),key=lambda x: x[1])[:0:-1]
99
+ recommend_frame = []
100
+ for val in rec_movie_indices:
101
+ movie_idx = pivot_mat.iloc[val[0]]['movieId']
102
+ idx = movies[movies['movieId'] == movie_idx].index
103
+ recommend_frame.append({'Title':movies.iloc[idx]['title'].values[0],'Distance':val[1]})
104
+ df = pd.DataFrame(recommend_frame,index=range(1,n_movies_to_reccomend+1))
105
+ return df
106
+ else:
107
+ return "No movies found. Please check your input"
108
+
109
+ st.header('Movie Recommender System')
110
+
111
+ option = st.selectbox(
112
+ 'Which model would you like to use?',
113
+ ('Genre based', 'KNN-based'))
114
+
115
+ selected_movie = st.text_input(
116
+ "Type a movie name to get recommendations"
117
+ )
118
+
119
+ number_of_recommendations = st.number_input(
120
+ "Type the number of recommendations to get"
121
+ )
122
+
123
+ if st.button('Show Recommendations'):
124
+
125
+ if option=='Genre based':
126
+ movie_recommendations = dist_rec(selected_movie,int(number_of_recommendations))
127
+
128
+ st.text(f"Here are {number_of_recommendations} recommendations for {selected_movie}")
129
+
130
+ for i in range(int(number_of_recommendations)):
131
+ st.text(f"{i+1}. {movie_recommendations[i]}")
132
+
133
+ elif option=='KNN-based':
134
+ movie_recommendations = knn_reccomendation(selected_movie,int(number_of_recommendations))
135
+
136
+ st.text(f"Here are {number_of_recommendations} recommendations for {selected_movie}")
137
+
138
+ for i in movie_recommendations['Title']:
139
+ st.text(i)
140
+
141
+
142
+
knnmodel.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9737bd78d8fa2631ba751b3671e2181f961c67c0c58eee30bfcdf7955012f6d2
3
+ size 884118
load.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import joblib
2
+
3
+ knn_reccomendation=joblib.load('C:\codes\PRML Project\knnmodel.pkl')
4
+
5
+ knn_reccomendation.recommendations('Iron Man',3)
movies (1).csv ADDED
The diff for this file is too large to render. See raw diff
 
requirement.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ numpy==1.22.3
2
+ streamlit==1.8.0
3
+ pandas==1.4.1
4
+ scikit-learn==1.0.2
5
+ gsheetsdb==0.1.13.1
user_ratings.csv ADDED
The diff for this file is too large to render. See raw diff