Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -315,7 +315,7 @@ class Phi3MiniEducationalLLM(Runnable):
|
|
| 315 |
model_path,
|
| 316 |
quantization_config=quant_config,
|
| 317 |
device_map="auto",
|
| 318 |
-
|
| 319 |
trust_remote_code=True,
|
| 320 |
low_cpu_mem_usage=True,
|
| 321 |
token=hf_token
|
|
@@ -343,7 +343,7 @@ class Phi3MiniEducationalLLM(Runnable):
|
|
| 343 |
"""Optimized model loading for Phi-3-mini."""
|
| 344 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 345 |
model_path,
|
| 346 |
-
|
| 347 |
device_map="auto", # Let transformers decide placement
|
| 348 |
trust_remote_code=True,
|
| 349 |
low_cpu_mem_usage=True,
|
|
@@ -407,7 +407,7 @@ class Phi3MiniEducationalLLM(Runnable):
|
|
| 407 |
repetition_penalty=1.1,
|
| 408 |
pad_token_id=self.tokenizer.eos_token_id,
|
| 409 |
early_stopping=True,
|
| 410 |
-
use_cache=
|
| 411 |
)
|
| 412 |
|
| 413 |
# Decode only new tokens
|
|
|
|
| 315 |
model_path,
|
| 316 |
quantization_config=quant_config,
|
| 317 |
device_map="auto",
|
| 318 |
+
torch_dtype=torch.float16,
|
| 319 |
trust_remote_code=True,
|
| 320 |
low_cpu_mem_usage=True,
|
| 321 |
token=hf_token
|
|
|
|
| 343 |
"""Optimized model loading for Phi-3-mini."""
|
| 344 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 345 |
model_path,
|
| 346 |
+
torch_dtype=torch.float16, # Use float16 to save memory
|
| 347 |
device_map="auto", # Let transformers decide placement
|
| 348 |
trust_remote_code=True,
|
| 349 |
low_cpu_mem_usage=True,
|
|
|
|
| 407 |
repetition_penalty=1.1,
|
| 408 |
pad_token_id=self.tokenizer.eos_token_id,
|
| 409 |
early_stopping=True,
|
| 410 |
+
use_cache=False # Disable cache to avoid compatibility issues
|
| 411 |
)
|
| 412 |
|
| 413 |
# Decode only new tokens
|