Spaces:

Vageesh1
/

Movie-Recommendation-System

Build error

App Files Files Community

Vageesh1 commited on Jun 5, 2023

Commit

1ed1542

1 Parent(s): 41c91ff

Upload 7 files

Browse files

Files changed (7) hide show

B20AI049(PCS-2)Code.py +99 -0
app.py +142 -0
knnmodel.pkl +3 -0
load.py +5 -0
movies (1).csv +0 -0
requirement.txt +5 -0
user_ratings.csv +0 -0

B20AI049(PCS-2)Code.py ADDED Viewed

	@@ -0,0 +1,99 @@

+#This is a code implementation of the Reinforcement learning based TCP congestion control
+#Importing the neccssary libaraies
+import gym
+import network_sim
+import tensorflow as tf
+from stable_baselines.common.policies import MlpPolicy
+from stable_baselines.common.policies import FeedForwardPolicy
+from stable_baselines import PPO1
+import os
+import sys
+import inspect
+#atking the current directory from the filename
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+#making the current directory as parent directory
+parentdir = os.path.dirname(currentdir)
+#adding the parent directory to the system path
+sys.path.insert(0,parentdir)
+# makign the arguement passer as parent directory
+from common.simple_arg_parse import arg_or_default
+arch_str = arg_or_default("--arch", default="32,16")
+#if the made arguenment is empty then it will take the default value
+if arch_str == "":
+    arch = []
+else:#Else it will take the arguement value
+    arch = [int(layer_width) for layer_width in arch_str.split(",")]
+#Prinitng the architecture
+print("Architecture is: %s" % str(arch))
+#Making the Initial training session as none
+training_sess = None
+#Making the feed forward policy
+class MyMlpPolicy(FeedForwardPolicy):
+    #defining the constructor function with parameters (Session, Object space, Action space , Number of environments Number of steps, number of batches , making the reuse as false so that new model can be trained)
+    def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, **_kwargs):
+        super(MyMlpPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse, net_arch=[{"pi":arch, "vf":arch}],
+                                        feature_extraction="mlp", **_kwargs)
+        #Now making the training session as global variable
+        global training_sess
+        #making the training session as the current session
+        training_sess = sess
+#making the environment of a specified type
+env = gym.make('PccNs-v0')
+#defining a gamma value for the rate of training session
+gamma = arg_or_default("--gamma", default=0.99)
+print("gamma = %f" % gamma)
+#making the model
+model = PPO1(MyMlpPolicy, env, verbose=1, schedule='constant', timesteps_per_actorbatch=8192, optim_batchsize=2048, gamma=gamma)
+for i in range(0, 6):
+    with model.graph.as_default():
+        #saving the trained model for further use
+        saver = tf.train.Saver()
+        saver.save(training_sess, "./pcc_model_%d.ckpt" % i)
+        #making the model learn in 1600 seconds for 410 epochs
+    model.learn(total_timesteps=(1600 * 410))
+#Now making a deafult directory for exporting the saved model
+default_export_dir = "/tmp/pcc_saved_models/model_A/"
+#again making the qrguement passer for storing in specific directory or making it save in deafult directory
+export_dir = arg_or_default("--model-dir", default=default_export_dir)
+#Taking the model as default model
+with model.graph.as_default():
+    pol = model.policy_pi#act_model
+    obs_ph = pol.obs_ph
+    #making the action in the action space
+    act = pol.deterministic_action
+    #taking out any sample action from the action space
+    sampled_act = pol.action
+    # Session, Object space, Action space , Number of environments Number of steps, number of batches , making the reuse as false so that new model can be trained)
+    obs_input = tf.saved_model.utils.build_tensor_info(obs_ph)
+    #build_tensor_info
+    outputs_tensor_info = tf.saved_model.utils.build_tensor_info(act)
+    stochastic_act_tensor_info = tf.saved_model.utils.build_tensor_info(sampled_act)
+    #A SignatureDef protocol buffer constructed based on the supplied arguments.
+    signature = tf.saved_model.signature_def_utils.build_signature_def(
+        inputs={"ob":obs_input},
+        outputs={"act":outputs_tensor_info, "stochastic_act":stochastic_act_tensor_info},
+        method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME)
+    #"""
+    signature_map = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
+                     signature}
+    #Building the SavedModel protocol buffer and saving variables and assets.
+    model_builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
+    #adding the sessions and tages and the signature map to the model builder
+    model_builder.add_meta_graph_and_variables(model.sess,
+        tags=[tf.saved_model.tag_constants.SERVING],
+        signature_def_map=signature_map,
+        clear_devices=True)
+        #now saving the model builder in form of text.
+    model_builder.save(as_text=True)

app.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import numpy as np
+import pickle
+import streamlit as st
+import pandas as pd
+import requests
+movies=pd.read_csv('C:\codes\PRML Project\movies (1).csv')
+ratings=pd.read_csv('C:/codes/PRML Project/user_ratings.csv')
+movies['title'] = movies['title'].str.strip().str[:-7]
+#removing the | from the genre and replacing it by space
+movies['genres']=movies['genres'].str.replace('|', ' ')
+from sklearn.feature_extraction.text import CountVectorizer
+#making an obejct of it
+cv=CountVectorizer()
+# we have a token in the genres so we will now make tokens out of it
+genres_tokens=cv.fit_transform(movies['genres'].values)
+genres_features=cv.get_feature_names_out()
+genres_tokens=pd.DataFrame(genres_tokens.toarray(),columns=genres_features.tolist())
+genres_tokens['combined']=genres_tokens.values.tolist()
+movies['genres']=genres_tokens['combined']
+#preprocessing of rating column
+pivot_mat = ratings.pivot(index='movieId',columns='userId',values='rating')
+pivot_mat.fillna(0,inplace=True)
+vote_movie = [[],[]]
+user_votes = [[],[]]
+sh = pivot_mat.shape
+for i in range(sh[0]):
+  r,c = np.unique(pivot_mat.values[i],return_counts=True)
+  user_votes[0].append(np.sum(c[1:]))
+  user_votes[1].append(pivot_mat.index[i])
+for i in range(sh[1]):
+  r,c = np.unique(pivot_mat.values[:,i],return_counts=True)
+  vote_movie[0].append(np.sum(c[1:]))
+  vote_movie[1].append(i+1)
+vote_movie = np.array(vote_movie).T
+user_votes = np.array(user_votes).T
+pivot_mat = pivot_mat.loc[user_votes[:,1][user_votes[:,0] > 10],:]
+zc = 0
+for i in range(pivot_mat.shape[0]):
+  for j in range(pivot_mat.shape[1]):
+    if pivot_mat.iloc[i,j] == 0:
+      zc+=1
+from scipy.sparse import csr_matrix
+csr_data = csr_matrix(pivot_mat.values)
+pivot_mat.reset_index(inplace=True)
+def dist_rec(movie_name,rec):
+  try:
+    arr = np.array(movies[movies['title'] == movie_name].values[0][2])
+  except:
+    return "Movie not found"
+  mov = movies[movies['title'] != movie_name].values
+  dis = []
+  recommendations=[]
+  for i in mov:
+    dis.append(np.sqrt((np.sum((np.array(i[2]) - arr)**2)))) # similar to the K-means clustering decision.
+  # print("The Recommendations for " + movie_name + " are :\n")
+  # for i in range(rec):
+  #   print(mov[:,1][np.argmin(dis)])
+  #   dis[np.argmin(dis)] = 9999
+  for i in range(rec):
+    recommendations.append(mov[:,1][np.argmin(dis)])
+    dis[np.argmin(dis)] = 9999
+  return recommendations
+#defining the K-means clustering decision
+from sklearn.neighbors import NearestNeighbors
+knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1)
+knn.fit(csr_data)
+def knn_reccomendation(movie_name,n_movies_to_reccomend):
+    movie_list = movies[movies['title'].str.contains(movie_name)]
+    if len(movie_list):
+        movie_idx= movie_list.iloc[0]['movieId']
+        movie_idx = pivot_mat[pivot_mat['movieId'] == movie_idx].index[0]
+        distances , indices = knn.kneighbors(csr_data[movie_idx],n_neighbors=n_movies_to_reccomend+1)
+        rec_movie_indices = sorted(list(zip(indices.squeeze().tolist(),distances.squeeze().tolist())),key=lambda x: x[1])[:0:-1]
+        recommend_frame = []
+        for val in rec_movie_indices:
+            movie_idx = pivot_mat.iloc[val[0]]['movieId']
+            idx = movies[movies['movieId'] == movie_idx].index
+            recommend_frame.append({'Title':movies.iloc[idx]['title'].values[0],'Distance':val[1]})
+        df = pd.DataFrame(recommend_frame,index=range(1,n_movies_to_reccomend+1))
+        return df
+    else:
+        return "No movies found. Please check your input"
+st.header('Movie Recommender System')
+option = st.selectbox(
+     'Which model would you like to use?',
+     ('Genre based', 'KNN-based'))
+selected_movie = st.text_input(
+    "Type a movie name to get recommendations"
+)
+number_of_recommendations = st.number_input(
+      "Type the number of recommendations to get"
+)
+if st.button('Show Recommendations'):
+  if option=='Genre based':
+        movie_recommendations = dist_rec(selected_movie,int(number_of_recommendations))
+        st.text(f"Here are {number_of_recommendations} recommendations for {selected_movie}")
+        for i in range(int(number_of_recommendations)):
+          st.text(f"{i+1}. {movie_recommendations[i]}")
+  elif option=='KNN-based':
+        movie_recommendations = knn_reccomendation(selected_movie,int(number_of_recommendations))
+        st.text(f"Here are {number_of_recommendations} recommendations for {selected_movie}")
+        for i in movie_recommendations['Title']:
+          st.text(i)

knnmodel.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9737bd78d8fa2631ba751b3671e2181f961c67c0c58eee30bfcdf7955012f6d2
+size 884118

load.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import joblib
+knn_reccomendation=joblib.load('C:\codes\PRML Project\knnmodel.pkl')
+knn_reccomendation.recommendations('Iron Man',3)

movies (1).csv ADDED Viewed

The diff for this file is too large to render. See raw diff

requirement.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+numpy==1.22.3
+streamlit==1.8.0
+pandas==1.4.1
+scikit-learn==1.0.2
+gsheetsdb==0.1.13.1

user_ratings.csv ADDED Viewed

The diff for this file is too large to render. See raw diff