Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
!pip install transformers
|
| 2 |
+
from transformers import AutoTokenizer, AutoModel
|
| 3 |
+
from torch.nn import functional as F
|
| 4 |
+
tokenizer = AutoTokenizer.from_pretrained('deepset/sentence_bert')
|
| 5 |
+
model = AutoModel.from_pretrained('deepset/sentence_bert')
|
| 6 |
+
|
| 7 |
+
sentence = 'Who are you voting for in 2020?'
|
| 8 |
+
labels = ['business', 'art & culture', 'politics']
|
| 9 |
+
|
| 10 |
+
# run inputs through model and mean-pool over the sequence
|
| 11 |
+
# dimension to get sequence-level representations
|
| 12 |
+
inputs = tokenizer.batch_encode_plus([sentence] + labels,
|
| 13 |
+
return_tensors='pt',
|
| 14 |
+
pad_to_max_length=True)
|
| 15 |
+
input_ids = inputs['input_ids']
|
| 16 |
+
attention_mask = inputs['attention_mask']
|
| 17 |
+
output = model(input_ids, attention_mask=attention_mask)[0]
|
| 18 |
+
sentence_rep = output[:1].mean(dim=1)
|
| 19 |
+
label_reps = output[1:].mean(dim=1)
|
| 20 |
+
|
| 21 |
+
# now find the labels with the highest cosine similarities to
|
| 22 |
+
# the sentence
|
| 23 |
+
similarities = F.cosine_similarity(sentence_rep, label_reps)
|
| 24 |
+
closest = similarities.argsort(descending=True)
|
| 25 |
+
for ind in closest:
|
| 26 |
+
print(f'label: {labels[ind]} \t similarity: {similarities[ind]}')
|