Mustafa Öztürk commited on
Commit
c399765
·
1 Parent(s): 7a29d91

Collect garbage after quantization to reduce peak RAM

Browse files
Files changed (1) hide show
  1. app/ml/model_loader.py +5 -0
app/ml/model_loader.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import torch
2
  from detoxify import Detoxify
3
  from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
@@ -33,6 +35,7 @@ def load_system():
33
  dtype=torch.qint8,
34
  )
35
  model_o.eval()
 
36
  except Exception:
37
  pass
38
 
@@ -55,6 +58,7 @@ def load_system():
55
  {torch.nn.Linear},
56
  dtype=torch.qint8,
57
  )
 
58
  except Exception:
59
  pass
60
  try:
@@ -63,6 +67,7 @@ def load_system():
63
  {torch.nn.Linear},
64
  dtype=torch.qint8,
65
  )
 
66
  except Exception:
67
  pass
68
 
 
1
+ import gc
2
+
3
  import torch
4
  from detoxify import Detoxify
5
  from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
 
35
  dtype=torch.qint8,
36
  )
37
  model_o.eval()
38
+ gc.collect()
39
  except Exception:
40
  pass
41
 
 
58
  {torch.nn.Linear},
59
  dtype=torch.qint8,
60
  )
61
+ gc.collect()
62
  except Exception:
63
  pass
64
  try:
 
67
  {torch.nn.Linear},
68
  dtype=torch.qint8,
69
  )
70
+ gc.collect()
71
  except Exception:
72
  pass
73