Update app.py
Browse files
app.py
CHANGED
|
@@ -97,13 +97,14 @@ def load_model_and_tokenizer():
|
|
| 97 |
#"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
| 98 |
#"facebook/opt-125m"
|
| 99 |
# Load tokenizer and model from disk (without trust_remote_code)
|
| 100 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 101 |
if torch.cuda.is_available():
|
| 102 |
# Load model on GPU if CUDA is available
|
| 103 |
model = AutoModelForCausalLM.from_pretrained(
|
| 104 |
model_name,
|
| 105 |
torch_dtype=torch.float16,
|
| 106 |
device_map="auto" # Automatically map model layers to GPU
|
|
|
|
| 107 |
)
|
| 108 |
else:
|
| 109 |
# Load model on CPU if no GPU is available
|
|
@@ -111,6 +112,7 @@ def load_model_and_tokenizer():
|
|
| 111 |
model_name,
|
| 112 |
torch_dtype=torch.float32, # Use float32 for compatibility with CPU
|
| 113 |
low_cpu_mem_usage=True # Reduce memory usage on CPU
|
|
|
|
| 114 |
)
|
| 115 |
return model, tokenizer
|
| 116 |
|
|
|
|
| 97 |
#"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
| 98 |
#"facebook/opt-125m"
|
| 99 |
# Load tokenizer and model from disk (without trust_remote_code)
|
| 100 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name,cache_dir="/app/hf_cache" )
|
| 101 |
if torch.cuda.is_available():
|
| 102 |
# Load model on GPU if CUDA is available
|
| 103 |
model = AutoModelForCausalLM.from_pretrained(
|
| 104 |
model_name,
|
| 105 |
torch_dtype=torch.float16,
|
| 106 |
device_map="auto" # Automatically map model layers to GPU
|
| 107 |
+
,cache_dir="/app/hf_cache"
|
| 108 |
)
|
| 109 |
else:
|
| 110 |
# Load model on CPU if no GPU is available
|
|
|
|
| 112 |
model_name,
|
| 113 |
torch_dtype=torch.float32, # Use float32 for compatibility with CPU
|
| 114 |
low_cpu_mem_usage=True # Reduce memory usage on CPU
|
| 115 |
+
, cache_dir="/app/hf_cache"
|
| 116 |
)
|
| 117 |
return model, tokenizer
|
| 118 |
|