Uan Sholanbayev
commited on
Commit
·
8f18779
1
Parent(s):
0efa6b7
add custom handler
Browse files- handler.py +36 -0
- main_test.py +57 -0
- requirements.txt +4 -0
handler.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Dict, Any
|
| 2 |
+
import numpy as np
|
| 3 |
+
from transformers import BertTokenizer, BertModel
|
| 4 |
+
import torch
|
| 5 |
+
import pickle
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def unpickle_obj(filepath):
|
| 9 |
+
with open(filepath, 'rb') as f_in:
|
| 10 |
+
data = pickle.load(f_in)
|
| 11 |
+
print(f"unpickled {filepath}")
|
| 12 |
+
return data
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class EndpointHandler():
|
| 16 |
+
def __init__(self, path=""):
|
| 17 |
+
self.model = unpickle_obj(path)
|
| 18 |
+
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
|
| 19 |
+
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 20 |
+
self.bert = BertModel.from_pretrained('bert-base-uncased').to(self.device)
|
| 21 |
+
|
| 22 |
+
def get_embeddings(self, texts: List[str]):
|
| 23 |
+
inputs = self.tokenizer(texts, return_tensors='pt', truncation=True,
|
| 24 |
+
padding=True, max_length=512).to(self.device)
|
| 25 |
+
with torch.no_grad():
|
| 26 |
+
outputs = self.bert(**inputs)
|
| 27 |
+
return outputs.last_hidden_state.mean(dim=1).cpu().numpy()
|
| 28 |
+
|
| 29 |
+
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
| 30 |
+
queries = data['queries']
|
| 31 |
+
texts = data['texts']
|
| 32 |
+
queries_vec = self.get_embeddings(queries)
|
| 33 |
+
texts_vec = self.get_embeddings(texts)
|
| 34 |
+
diff = (np.array(texts_vec)[:, np.newaxis] - np.array(queries_vec))\
|
| 35 |
+
.reshape(-1, len(queries_vec[0]))
|
| 36 |
+
return self.model.predict_proba(diff)
|
main_test.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from handler import EndpointHandler
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
summary = "The article discusses the principles and process of doing " \
|
| 5 |
+
"great work, emphasizing the importance of choosing the " \
|
| 6 |
+
"right field, developing a deep interest, and working hard." \
|
| 7 |
+
" It also highlights the significance of curiosity, delight," \
|
| 8 |
+
" and the desire to do something impressive. The author " \
|
| 9 |
+
"further discusses the challenges of figuring out what to" \
|
| 10 |
+
" work on, the dangers of procrastination, and the importance" \
|
| 11 |
+
" of consistency and exponential growth. The article " \
|
| 12 |
+
"concludes by highlighting the importance of intellectual " \
|
| 13 |
+
"honesty and avoiding affectation.\n\n• Choosing the right " \
|
| 14 |
+
"field\n • The author suggests that the work one chooses " \
|
| 15 |
+
"should be something they have a natural aptitude for, a deep" \
|
| 16 |
+
" interest in, and offers scope to do great work. The author" \
|
| 17 |
+
" also emphasizes the importance of working on one's own " \
|
| 18 |
+
"projects and following one's curiosity.\n\n• Figuring out what" \
|
| 19 |
+
" to work on\n • The author discusses the challenges of " \
|
| 20 |
+
"figuring out what to work on, especially when young and " \
|
| 21 |
+
"inexperienced. The author suggests trying lots of things, " \
|
| 22 |
+
"meeting lots of people, reading lots of books, and asking" \
|
| 23 |
+
" lots of questions to discover one's interests.\n\n• The " \
|
| 24 |
+
"dangers of procrastination\n • The author warns against " \
|
| 25 |
+
"procrastination, especially per-project procrastination, which" \
|
| 26 |
+
" can lead to significant delays in starting ambitious " \
|
| 27 |
+
"projects. The author suggests regularly asking oneself" \
|
| 28 |
+
" if they are working on what they most want to work on.\n\n•" \
|
| 29 |
+
" The importance of consistency and exponential growth\n • " \
|
| 30 |
+
"The author emphasizes the importance of consistency in work" \
|
| 31 |
+
" and the potential for exponential growth. The author " \
|
| 32 |
+
"suggests that work that compounds can lead to exponential " \
|
| 33 |
+
"growth, but warns that the early stages of exponential growth" \
|
| 34 |
+
" can feel flat and be undervalued.\n\n• Intellectual honesty " \
|
| 35 |
+
"and avoiding affectation\n • The author concludes by " \
|
| 36 |
+
"highlighting the importance of intellectual honesty and " \
|
| 37 |
+
"avoiding affectation. The author suggests that being earnest" \
|
| 38 |
+
" and intellectually honest can help one see new ideas and truths."
|
| 39 |
+
query = "What are the principles of doing great work according to the article?"
|
| 40 |
+
ir_query = "What are the best recipes for a vegan diet?"
|
| 41 |
+
semi_relevant_query = "Did Cristiano Ronaldo did a great work in Saudi league?"
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
# init handler
|
| 45 |
+
my_handler = EndpointHandler(path="./bert_lr.pkl")
|
| 46 |
+
|
| 47 |
+
# prepare sample payload
|
| 48 |
+
relevant_payload = {"queries": [query], "texts": [summary]}
|
| 49 |
+
irrelevant_payload = {"queries": semi_relevant_query, "texts": [summary]}
|
| 50 |
+
|
| 51 |
+
# test the handler
|
| 52 |
+
relevant_pred=my_handler(relevant_payload)
|
| 53 |
+
irrelevant_pred=my_handler(irrelevant_payload)
|
| 54 |
+
|
| 55 |
+
# show results
|
| 56 |
+
print("relevant_pred", relevant_pred)
|
| 57 |
+
print("irrelevant_pred", irrelevant_pred)
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
numpy==1.26.0
|
| 2 |
+
torch==2.1.0
|
| 3 |
+
transformers==4.34.0
|
| 4 |
+
scikit-learn==1.3.1
|