jdesiree commited on
Commit
b02e1c2
·
verified ·
1 Parent(s): 22e6f1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -315,7 +315,7 @@ class Phi3MiniEducationalLLM(Runnable):
315
  model_path,
316
  quantization_config=quant_config,
317
  device_map="auto",
318
- dtype=torch.float16,
319
  trust_remote_code=True,
320
  low_cpu_mem_usage=True,
321
  token=hf_token
@@ -343,7 +343,7 @@ class Phi3MiniEducationalLLM(Runnable):
343
  """Optimized model loading for Phi-3-mini."""
344
  self.model = AutoModelForCausalLM.from_pretrained(
345
  model_path,
346
- dtype=torch.float16, # Use float16 to save memory
347
  device_map="auto", # Let transformers decide placement
348
  trust_remote_code=True,
349
  low_cpu_mem_usage=True,
@@ -407,7 +407,7 @@ class Phi3MiniEducationalLLM(Runnable):
407
  repetition_penalty=1.1,
408
  pad_token_id=self.tokenizer.eos_token_id,
409
  early_stopping=True,
410
- use_cache=True
411
  )
412
 
413
  # Decode only new tokens
 
315
  model_path,
316
  quantization_config=quant_config,
317
  device_map="auto",
318
+ torch_dtype=torch.float16,
319
  trust_remote_code=True,
320
  low_cpu_mem_usage=True,
321
  token=hf_token
 
343
  """Optimized model loading for Phi-3-mini."""
344
  self.model = AutoModelForCausalLM.from_pretrained(
345
  model_path,
346
+ torch_dtype=torch.float16, # Use float16 to save memory
347
  device_map="auto", # Let transformers decide placement
348
  trust_remote_code=True,
349
  low_cpu_mem_usage=True,
 
407
  repetition_penalty=1.1,
408
  pad_token_id=self.tokenizer.eos_token_id,
409
  early_stopping=True,
410
+ use_cache=False # Disable cache to avoid compatibility issues
411
  )
412
 
413
  # Decode only new tokens