Luis J Camargo commited on
Commit
68fabc6
·
1 Parent(s): f36c0ac

perf: Optimize CPU usage by setting PyTorch threads to 1 and enabling low memory model loading, and remove an unnecessary return.

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -8,6 +8,8 @@ import torch.nn as nn
8
  import psutil
9
  import gc
10
 
 
 
11
  # === CUSTOM MODEL CLASSES ===
12
  class WhisperEncoderOnlyConfig(WhisperConfig):
13
  model_type = "whisper_encoder_classifier"
@@ -78,7 +80,7 @@ MODEL_REPO = "tachiwin/language_classification_enconly_model_2"
78
 
79
  print("Loading model on CPU...")
80
  processor = WhisperProcessor.from_pretrained(MODEL_REPO)
81
- model = WhisperEncoderOnlyForClassification.from_pretrained(MODEL_REPO)
82
  model.eval()
83
 
84
  print("Model loaded successfully!")
@@ -131,7 +133,7 @@ def predict_language(audio):
131
  return_tensors="pt",
132
  )
133
  print("[LOG] DID EXTRACT")
134
- return None
135
  # Delete raw audio array immediately as it's now in 'inputs'
136
  del audio_array
137
  gc.collect()
 
8
  import psutil
9
  import gc
10
 
11
+ torch.set_num_threads(1)
12
+
13
  # === CUSTOM MODEL CLASSES ===
14
  class WhisperEncoderOnlyConfig(WhisperConfig):
15
  model_type = "whisper_encoder_classifier"
 
80
 
81
  print("Loading model on CPU...")
82
  processor = WhisperProcessor.from_pretrained(MODEL_REPO)
83
+ model = WhisperEncoderOnlyForClassification.from_pretrained(MODEL_REPO, low_cpu_mem_usage=True)
84
  model.eval()
85
 
86
  print("Model loaded successfully!")
 
133
  return_tensors="pt",
134
  )
135
  print("[LOG] DID EXTRACT")
136
+
137
  # Delete raw audio array immediately as it's now in 'inputs'
138
  del audio_array
139
  gc.collect()