Sahil Seemant commited on
Commit
e10f558
·
1 Parent(s): 68a7bfa

Add offload_folder to support low-memory loading on Hugging Face

Browse files
Files changed (1) hide show
  1. chat_gui.py +2 -1
chat_gui.py CHANGED
@@ -317,7 +317,8 @@ if st.session_state.messages and st.session_state.messages[-1]["role"] == "user"
317
  load_kwargs = {
318
  "device_map": "auto",
319
  "token": hf_token,
320
- "trust_remote_code": True
 
321
  }
322
 
323
  # Only apply 4-bit quantization if NOT natively quantized (Mistral is FP8)
 
317
  load_kwargs = {
318
  "device_map": "auto",
319
  "token": hf_token,
320
+ "trust_remote_code": True,
321
+ "offload_folder": "/tmp/offload"
322
  }
323
 
324
  # Only apply 4-bit quantization if NOT natively quantized (Mistral is FP8)