kevinkyi commited on
Commit
c35e65d
·
verified ·
1 Parent(s): 8b38fa9

Add selection.py

Browse files
Files changed (1) hide show
  1. selection.py +15 -0
selection.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sklearn.feature_extraction.text import TfidfVectorizer
3
+ from sklearn.metrics.pairwise import cosine_similarity
4
+
5
+ class TfidfRetriever:
6
+ def __init__(self, train_texts):
7
+ self.vec = TfidfVectorizer(ngram_range=(1,2), min_df=1)
8
+ self.X = self.vec.fit_transform(train_texts)
9
+ self.texts = list(train_texts)
10
+
11
+ def topk(self, query_text, k=1):
12
+ q = self.vec.transform([query_text])
13
+ sims = cosine_similarity(q, self.X)[0]
14
+ idxs = np.argsort(-sims)[:k]
15
+ return [(self.texts[i], float(sims[i])) for i in idxs]