File size: 1,920 Bytes
a44dd82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# -*- coding: utf-8 -*-
"""Recommendation_paper.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/16l0DYZhK4q7tjYvRqpa1IMyIatp1Gtd1
"""

!pip install -q -U sentence-transformers

import torch
import pandas as pd

from  sentence_transformers import SentenceTransformer,util

data=pd.read_csv("/content/arxiv_data.csv")

data.head(10)

titles=data["titles"]

titles.head(5)

model=SentenceTransformer("all-MiniLM-L6-v2")

Encoded_titles=model.encode(titles,device="cuda")

keep_counter=0
for title,encode in zip(titles,Encoded_titles):
  print("Titles",title)
  print("Encoded Data",encode)
  print("length of Encoded Data",len(encode))
  if(keep_counter==5):
    break;
  keep_counter+=1

type(titles),type(Encoded_titles)

#checking the shape of embeding
Encoded_titles.shape

!pip install huggingface-cli

!pip install huggingface_hub

from huggingface_hub import notebook_login
notebook_login()

from google.colab import userdata
userdata.get('secretName')



!pip install datasets

model.push_to_hub("1998Shubham007/ModelRecomm")



#Now save your embedding,Title,model
import pickle

#saving  embedding
with open("embedding.pkl","wb") as f:
  pickle.dump(Encoded_titles,f)

with open("ModelRec.pkl","wb") as f:
  pickle.dump(model,f)

with open("Titles.pkl","wb") as f:
  pickle.dump(titles,f)



#Loading the saved Embedding
with open("/content/embedding.pkl","rb") as f:
  Lencode=pickle.load(f)

#Loading the saved Model
with open("/content/ModelRec.pkl","rb") as f:
  lModelRec=pickle.load(f)

#Prediction

def recomm(inputPaper):
  encodePaper=lModelRec.encode(inputPaper)
  cosine_score=util.cos_sim(Lencode,encodePaper)
  Top_score=torch.topk(cosine_score,dim=0,k=4)
  paperList=[]
  for i in Top_score.indices:
    paperList.append(titles[i.item()])
  return paperList

value=input("enter the paper name")

papers=recomm(value)

print(papers)