# -*- coding: utf-8 -*-
"""Recommendation_paper.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/16l0DYZhK4q7tjYvRqpa1IMyIatp1Gtd1
"""

!pip install -q -U sentence-transformers

import torch
import pandas as pd

from  sentence_transformers import SentenceTransformer,util

data=pd.read_csv("/content/arxiv_data.csv")

data.head(10)

titles=data["titles"]

titles.head(5)

model=SentenceTransformer("all-MiniLM-L6-v2")

Encoded_titles=model.encode(titles,device="cuda")

keep_counter=0
for title,encode in zip(titles,Encoded_titles):
  print("Titles",title)
  print("Encoded Data",encode)
  print("length of Encoded Data",len(encode))
  if(keep_counter==5):
    break;
  keep_counter+=1

type(titles),type(Encoded_titles)

#checking the shape of embeding
Encoded_titles.shape

!pip install huggingface-cli

!pip install huggingface_hub

from huggingface_hub import notebook_login
notebook_login()

from google.colab import userdata
userdata.get('secretName')


!pip install datasets

model.push_to_hub("1998Shubham007/ModelRecomm")


#Now save your embedding,Title,model
import pickle

#saving  embedding
with open("embedding.pkl","wb") as f:
  pickle.dump(Encoded_titles,f)

with open("ModelRec.pkl","wb") as f:
  pickle.dump(model,f)

with open("Titles.pkl","wb") as f:
  pickle.dump(titles,f)


#Loading the saved Embedding
with open("/content/embedding.pkl","rb") as f:
  Lencode=pickle.load(f)

#Loading the saved Model
with open("/content/ModelRec.pkl","rb") as f:
  lModelRec=pickle.load(f)

#Prediction

def recomm(inputPaper):
  encodePaper=lModelRec.encode(inputPaper)
  cosine_score=util.cos_sim(Lencode,encodePaper)
  Top_score=torch.topk(cosine_score,dim=0,k=4)
  paperList=[]
  for i in Top_score.indices:
    paperList.append(titles[i.item()])
  return paperList

value=input("enter the paper name")

papers=recomm(value)

print(papers)