jena-shreyas commited on
Commit
2a63fd4
·
1 Parent(s): 5644567

Add CUDA cache freeing before model re-loading

Browse files
Files changed (1) hide show
  1. app.py +10 -0
app.py CHANGED
@@ -1,6 +1,8 @@
1
  import os
2
  import sys
3
  from pathlib import Path
 
 
4
  import gradio as gr
5
 
6
  # Allow importing your models package
@@ -31,6 +33,14 @@ def load_model_with_quantization(quantization_mode: str):
31
  """Load or reload the model with specified quantization"""
32
  global model, current_quantization
33
 
 
 
 
 
 
 
 
 
34
  load_8bit = False
35
  load_4bit = False
36
 
 
1
  import os
2
  import sys
3
  from pathlib import Path
4
+ import gc
5
+ import torch
6
  import gradio as gr
7
 
8
  # Allow importing your models package
 
33
  """Load or reload the model with specified quantization"""
34
  global model, current_quantization
35
 
36
+ # Free GPU memory if model already exists
37
+ if model is not None:
38
+ print("Unloading existing model and freeing GPU memory...")
39
+ del model
40
+ gc.collect()
41
+ torch.cuda.empty_cache()
42
+ print("GPU memory cleared.")
43
+
44
  load_8bit = False
45
  load_4bit = False
46