Spaces:

jena-shreyas
/

Video-Inference-Demo

Sleeping

jena-shreyas commited on 24 days ago

Commit

2a63fd4

1 Parent(s): 5644567

Add CUDA cache freeing before model re-loading

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import os
 import sys
 from pathlib import Path
 import gradio as gr
 # Allow importing your models package
@@ -31,6 +33,14 @@ def load_model_with_quantization(quantization_mode: str):
     """Load or reload the model with specified quantization"""
     global model, current_quantization
     load_8bit = False
     load_4bit = False

 import os
 import sys
 from pathlib import Path
+import gc
+import torch
 import gradio as gr
 # Allow importing your models package
     """Load or reload the model with specified quantization"""
     global model, current_quantization
+    # Free GPU memory if model already exists
+    if model is not None:
+        print("Unloading existing model and freeing GPU memory...")
+        del model
+        gc.collect()
+        torch.cuda.empty_cache()
+        print("GPU memory cleared.")
     load_8bit = False
     load_4bit = False