hylee
commited on
Commit
·
7841567
1
Parent(s):
1f758f7
add virtual mem logging
Browse files- handler.py +32 -4
handler.py
CHANGED
|
@@ -246,36 +246,64 @@ class EndpointHandler():
|
|
| 246 |
logging.set_verbosity_info()
|
| 247 |
logger = logging.get_logger("transformers")
|
| 248 |
logger.info(f"CPU Usage before models loaded: {cpu_percent}%")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
# Uptake
|
| 250 |
uptake_model = UptakeModel(
|
| 251 |
self.device, self.tokenizer, self.input_builder)
|
| 252 |
uptake_model.run_inference(transcript, min_prev_words=params['uptake_min_num_words'],
|
| 253 |
uptake_speaker=params.pop("uptake_speaker", None))
|
| 254 |
cpu_percent = psutil.cpu_percent()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
logger.info(f"CPU Usage after model 1 loaded: {cpu_percent}%")
|
| 256 |
del uptake_model
|
| 257 |
cpu_percent = psutil.cpu_percent()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
logger.info(f"CPU Usage after model 1 deleted: {cpu_percent}%")
|
| 259 |
# Reasoning
|
| 260 |
reasoning_model = ReasoningModel(
|
| 261 |
self.device, self.tokenizer, self.input_builder)
|
| 262 |
reasoning_model.run_inference(transcript)
|
| 263 |
cpu_percent = psutil.cpu_percent()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
logger.info(f"CPU Usage after model 2 loaded: {cpu_percent}%")
|
| 265 |
-
print(f"CPU Usage after model 2 loaded: {cpu_percent}%")
|
| 266 |
del reasoning_model
|
| 267 |
cpu_percent = psutil.cpu_percent()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
logger.info(f"CPU Usage after model 2 deleted: {cpu_percent}%")
|
| 269 |
-
print(f"CPU Usage after model 2 deleted: {cpu_percent}%")
|
| 270 |
# Question
|
| 271 |
question_model = QuestionModel(
|
| 272 |
self.device, self.tokenizer, self.input_builder)
|
| 273 |
question_model.run_inference(transcript)
|
| 274 |
cpu_percent = psutil.cpu_percent()
|
| 275 |
logger.info(f"CPU Usage after model 3 loaded: {cpu_percent}%")
|
| 276 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
del question_model
|
| 278 |
cpu_percent = psutil.cpu_percent()
|
| 279 |
logger.info(f"CPU Usage after model 3 deleted: {cpu_percent}%")
|
| 280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
return transcript.to_dict()
|
|
|
|
| 246 |
logging.set_verbosity_info()
|
| 247 |
logger = logging.get_logger("transformers")
|
| 248 |
logger.info(f"CPU Usage before models loaded: {cpu_percent}%")
|
| 249 |
+
mem_info = psutil.virtual_memory()
|
| 250 |
+
used_mem = mem_info.used / (1024 ** 3) # Convert to gigabytes
|
| 251 |
+
total_mem = mem_info.total / (1024 ** 3) # Convert to gigabytes
|
| 252 |
+
logger.info(f"Used Memory before models loaded: {used_mem:.2f} GB, Total RAM: {total_mem:.2f} GB")
|
| 253 |
# Uptake
|
| 254 |
uptake_model = UptakeModel(
|
| 255 |
self.device, self.tokenizer, self.input_builder)
|
| 256 |
uptake_model.run_inference(transcript, min_prev_words=params['uptake_min_num_words'],
|
| 257 |
uptake_speaker=params.pop("uptake_speaker", None))
|
| 258 |
cpu_percent = psutil.cpu_percent()
|
| 259 |
+
mem_info = psutil.virtual_memory()
|
| 260 |
+
used_mem = mem_info.used / (1024 ** 3) # Convert to gigabytes
|
| 261 |
+
total_mem = mem_info.total / (1024 ** 3) # Convert to gigabytes
|
| 262 |
+
logger.info(f"Used Memory after model 1 loaded: {used_mem:.2f} GB, Total Mem: {total_mem:.2f} GB")
|
| 263 |
logger.info(f"CPU Usage after model 1 loaded: {cpu_percent}%")
|
| 264 |
del uptake_model
|
| 265 |
cpu_percent = psutil.cpu_percent()
|
| 266 |
+
mem_info = psutil.virtual_memory()
|
| 267 |
+
used_mem = mem_info.used / (1024 ** 3) # Convert to gigabytes
|
| 268 |
+
total_mem = mem_info.total / (1024 ** 3) # Convert to gigabytes
|
| 269 |
+
logger.info(f"Used Memory after model 1 deleted: {used_mem:.2f} GB, Total Mem: {total_mem:.2f} GB")
|
| 270 |
logger.info(f"CPU Usage after model 1 deleted: {cpu_percent}%")
|
| 271 |
# Reasoning
|
| 272 |
reasoning_model = ReasoningModel(
|
| 273 |
self.device, self.tokenizer, self.input_builder)
|
| 274 |
reasoning_model.run_inference(transcript)
|
| 275 |
cpu_percent = psutil.cpu_percent()
|
| 276 |
+
mem_info = psutil.virtual_memory()
|
| 277 |
+
used_mem = mem_info.used / (1024 ** 3) # Convert to gigabytes
|
| 278 |
+
total_mem = mem_info.total / (1024 ** 3) # Convert to gigabytes
|
| 279 |
+
logger.info(f"Used Memory after model 2 loaded: {used_mem:.2f} GB, Total Mem: {total_mem:.2f} GB")
|
| 280 |
logger.info(f"CPU Usage after model 2 loaded: {cpu_percent}%")
|
| 281 |
+
# print(f"CPU Usage after model 2 loaded: {cpu_percent}%")
|
| 282 |
del reasoning_model
|
| 283 |
cpu_percent = psutil.cpu_percent()
|
| 284 |
+
mem_info = psutil.virtual_memory()
|
| 285 |
+
used_mem = mem_info.used / (1024 ** 3) # Convert to gigabytes
|
| 286 |
+
total_mem = mem_info.total / (1024 ** 3) # Convert to gigabytes
|
| 287 |
+
logger.info(f"Used Memory after model 2 deleted: {used_mem:.2f} GB, Total Mem: {total_mem:.2f} GB")
|
| 288 |
logger.info(f"CPU Usage after model 2 deleted: {cpu_percent}%")
|
| 289 |
+
# print(f"CPU Usage after model 2 deleted: {cpu_percent}%")
|
| 290 |
# Question
|
| 291 |
question_model = QuestionModel(
|
| 292 |
self.device, self.tokenizer, self.input_builder)
|
| 293 |
question_model.run_inference(transcript)
|
| 294 |
cpu_percent = psutil.cpu_percent()
|
| 295 |
logger.info(f"CPU Usage after model 3 loaded: {cpu_percent}%")
|
| 296 |
+
mem_info = psutil.virtual_memory()
|
| 297 |
+
used_mem = mem_info.used / (1024 ** 3) # Convert to gigabytes
|
| 298 |
+
total_mem = mem_info.total / (1024 ** 3) # Convert to gigabytes
|
| 299 |
+
logger.info(f"Used Memory after model 3 loaded: {used_mem:.2f} GB, Total Mem: {total_mem:.2f} GB")
|
| 300 |
+
# print(f"CPU Usage after model 3 loaded: {cpu_percent}%")
|
| 301 |
del question_model
|
| 302 |
cpu_percent = psutil.cpu_percent()
|
| 303 |
logger.info(f"CPU Usage after model 3 deleted: {cpu_percent}%")
|
| 304 |
+
mem_info = psutil.virtual_memory()
|
| 305 |
+
used_mem = mem_info.used / (1024 ** 3) # Convert to gigabytes
|
| 306 |
+
total_mem = mem_info.total / (1024 ** 3) # Convert to gigabytes
|
| 307 |
+
logger.info(f"Used Memory after model 3 deleted: {used_mem:.2f} GB, Total Mem: {total_mem:.2f} GB")
|
| 308 |
+
# print(f"CPU Usage after model 3 deleted: {cpu_percent}%")
|
| 309 |
return transcript.to_dict()
|