Update pipeline.py
#7
by
alighadami77
- opened
- pipeline.py +13 -56
pipeline.py
CHANGED
|
@@ -42,64 +42,21 @@ class PreTrainedPipeline():
|
|
| 42 |
similarities = distance.cdist(embeddings.reshape((1,300)), self.comparisons, "cosine")[0]
|
| 43 |
top_indices = similarities.argsort()[:10]
|
| 44 |
top_words = [[self.id2h[str(top_indices[i])]] for i in range(10)]
|
| 45 |
-
|
|
|
|
|
|
|
| 46 |
|
| 47 |
return [
|
| 48 |
[
|
| 49 |
-
{'label': top_words[0], 'score': 0},
|
| 50 |
-
{'label': top_words[1], 'score':
|
| 51 |
-
{'label': top_words[2], 'score':
|
| 52 |
-
{'label': top_words[3], 'score':
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
]
|
| 54 |
]
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
# return [
|
| 58 |
-
# [ # Sample output, call the model here TODO
|
| 59 |
-
# {'label': 'POSITIVE', 'score': 0.05},
|
| 60 |
-
# {'label': 'NEGATIVE', 'score': 0.03},
|
| 61 |
-
# {'label': 'معنی', 'score': 0.92},
|
| 62 |
-
# {'label': f'{inputs}', 'score': 0},
|
| 63 |
-
# ]
|
| 64 |
-
# ]
|
| 65 |
-
|
| 66 |
-
# def RevDict(sent,flag,model):
|
| 67 |
-
# """
|
| 68 |
-
# This function recieves a sentence from the user, and turns back top_10 (for flag=0) or top_100 (for flag=1) predictions.
|
| 69 |
-
# the input sentence will be normalized, and stop words will be removed
|
| 70 |
-
# """
|
| 71 |
-
|
| 72 |
-
# normalizer = Normalizer()
|
| 73 |
-
# X_Normalized = normalizer.normalize(sent)
|
| 74 |
-
# X_Tokens = word_tokenize(X_Normalized)
|
| 75 |
-
# stopwords = [normalizer.normalize(x.strip()) for x in codecs.open(r"stopwords.txt",'r','utf-8').readlines()]
|
| 76 |
-
# X_Tokens = [t for t in X_Tokens if t not in stopwords]
|
| 77 |
-
# preprocessed = [' '.join(X_Tokens)][0]
|
| 78 |
-
# sent_ids = sent2id([preprocessed])
|
| 79 |
-
# output=np.array((model.predict(sent_ids.reshape((1,20))).tolist()[0]))
|
| 80 |
-
# distances=distance.cdist(output.reshape((1,300)), comparison_matrix, "cosine")[0]
|
| 81 |
-
# min_index_100 = distances.argsort()[:100]
|
| 82 |
-
# min_index_10 = distances.argsort()[:10]
|
| 83 |
-
|
| 84 |
-
# temp=[]
|
| 85 |
-
# if flag == 0:
|
| 86 |
-
# for i in range(10):
|
| 87 |
-
# temp.append(id2h[str(min_index_10[i])])
|
| 88 |
-
# elif flag == 1:
|
| 89 |
-
# for i in range(100):
|
| 90 |
-
# temp.append(id2h[str(min_index_100[i])])
|
| 91 |
-
|
| 92 |
-
# for i in range(len(temp)):
|
| 93 |
-
# print(temp[i])
|
| 94 |
-
|
| 95 |
-
# def sent2id(sents):
|
| 96 |
-
# sents_id=np.zeros((len(sents),20))
|
| 97 |
-
# for j in tqdm(range(len(sents))):
|
| 98 |
-
# for i,word in enumerate(sents[j].split()):
|
| 99 |
-
# try:
|
| 100 |
-
# sents_id[j,i] = t2id[word]
|
| 101 |
-
# except:
|
| 102 |
-
# sents_id[j,i] = t2id['UNK']
|
| 103 |
-
# if i==19:
|
| 104 |
-
# break
|
| 105 |
-
# return sents_id
|
|
|
|
| 42 |
similarities = distance.cdist(embeddings.reshape((1,300)), self.comparisons, "cosine")[0]
|
| 43 |
top_indices = similarities.argsort()[:10]
|
| 44 |
top_words = [[self.id2h[str(top_indices[i])]] for i in range(10)]
|
| 45 |
+
logits = np.exp(-10*np.array(similarities[top_indices]))
|
| 46 |
+
softmax_probs = tf.nn.softmax(logits).numpy()
|
| 47 |
+
top_scores = [round(float(softmax_probs[i]), 3) for i in range(10)]
|
| 48 |
|
| 49 |
return [
|
| 50 |
[
|
| 51 |
+
{'label': top_words[0], 'score': top_scores[0]},
|
| 52 |
+
{'label': top_words[1], 'score': top_scores[1]},
|
| 53 |
+
{'label': top_words[2], 'score': top_scores[2]},
|
| 54 |
+
{'label': top_words[3], 'score': top_scores[3]},
|
| 55 |
+
{'label': top_words[4], 'score': top_scores[4]},
|
| 56 |
+
{'label': top_words[5], 'score': top_scores[5]},
|
| 57 |
+
{'label': top_words[6], 'score': top_scores[6]},
|
| 58 |
+
{'label': top_words[7], 'score': top_scores[7]},
|
| 59 |
+
{'label': top_words[8], 'score': top_scores[8]},
|
| 60 |
+
{'label': top_words[9], 'score': top_scores[9]},
|
| 61 |
]
|
| 62 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|