hylee
commited on
Commit
·
d0d0944
1
Parent(s):
813a1db
add CPU checks
Browse files- handler.py +16 -1
handler.py
CHANGED
|
@@ -9,6 +9,8 @@ from utils import MultiHeadModel, BertInputBuilder, get_num_words
|
|
| 9 |
|
| 10 |
import transformers
|
| 11 |
from transformers import BertTokenizer, BertForSequenceClassification
|
|
|
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
transformers.logging.set_verbosity_debug()
|
|
@@ -241,21 +243,34 @@ class EndpointHandler():
|
|
| 241 |
transcript.add_utterance(Utterance(**utt))
|
| 242 |
|
| 243 |
print("Running inference on %d examples..." % transcript.length())
|
| 244 |
-
|
|
|
|
| 245 |
# Uptake
|
| 246 |
uptake_model = UptakeModel(
|
| 247 |
self.device, self.tokenizer, self.input_builder)
|
| 248 |
uptake_model.run_inference(transcript, min_prev_words=params['uptake_min_num_words'],
|
| 249 |
uptake_speaker=params.pop("uptake_speaker", None))
|
|
|
|
|
|
|
| 250 |
del uptake_model
|
|
|
|
|
|
|
| 251 |
# Reasoning
|
| 252 |
reasoning_model = ReasoningModel(
|
| 253 |
self.device, self.tokenizer, self.input_builder)
|
| 254 |
reasoning_model.run_inference(transcript)
|
|
|
|
|
|
|
| 255 |
del reasoning_model
|
|
|
|
|
|
|
| 256 |
# Question
|
| 257 |
question_model = QuestionModel(
|
| 258 |
self.device, self.tokenizer, self.input_builder)
|
| 259 |
question_model.run_inference(transcript)
|
|
|
|
|
|
|
| 260 |
del question_model
|
|
|
|
|
|
|
| 261 |
return transcript.to_dict()
|
|
|
|
| 9 |
|
| 10 |
import transformers
|
| 11 |
from transformers import BertTokenizer, BertForSequenceClassification
|
| 12 |
+
import psutil
|
| 13 |
+
import time
|
| 14 |
|
| 15 |
|
| 16 |
transformers.logging.set_verbosity_debug()
|
|
|
|
| 243 |
transcript.add_utterance(Utterance(**utt))
|
| 244 |
|
| 245 |
print("Running inference on %d examples..." % transcript.length())
|
| 246 |
+
cpu_percent = psutil.cpu_percent()
|
| 247 |
+
print(f"CPU Usage before models loaded: {cpu_percent}%")
|
| 248 |
# Uptake
|
| 249 |
uptake_model = UptakeModel(
|
| 250 |
self.device, self.tokenizer, self.input_builder)
|
| 251 |
uptake_model.run_inference(transcript, min_prev_words=params['uptake_min_num_words'],
|
| 252 |
uptake_speaker=params.pop("uptake_speaker", None))
|
| 253 |
+
cpu_percent = psutil.cpu_percent()
|
| 254 |
+
print(f"CPU Usage after model 1 loaded: {cpu_percent}%")
|
| 255 |
del uptake_model
|
| 256 |
+
cpu_percent = psutil.cpu_percent()
|
| 257 |
+
print(f"CPU Usage after model 1 deleted: {cpu_percent}%")
|
| 258 |
# Reasoning
|
| 259 |
reasoning_model = ReasoningModel(
|
| 260 |
self.device, self.tokenizer, self.input_builder)
|
| 261 |
reasoning_model.run_inference(transcript)
|
| 262 |
+
cpu_percent = psutil.cpu_percent()
|
| 263 |
+
print(f"CPU Usage after model 2 loaded: {cpu_percent}%")
|
| 264 |
del reasoning_model
|
| 265 |
+
cpu_percent = psutil.cpu_percent()
|
| 266 |
+
print(f"CPU Usage after model 2 deleted: {cpu_percent}%")
|
| 267 |
# Question
|
| 268 |
question_model = QuestionModel(
|
| 269 |
self.device, self.tokenizer, self.input_builder)
|
| 270 |
question_model.run_inference(transcript)
|
| 271 |
+
cpu_percent = psutil.cpu_percent()
|
| 272 |
+
print(f"CPU Usage after model 3 loaded: {cpu_percent}%")
|
| 273 |
del question_model
|
| 274 |
+
cpu_percent = psutil.cpu_percent()
|
| 275 |
+
print(f"CPU Usage after model 3 deleted: {cpu_percent}%")
|
| 276 |
return transcript.to_dict()
|