hamxaameer commited on
Commit
7980cb3
·
verified ·
1 Parent(s): 25c4058

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -123,12 +123,12 @@ def initialize_llm():
123
  model.eval()
124
 
125
  # Create pipeline for generation
 
126
  logger.info(" Creating text-generation pipeline...")
127
  llm_client = pipeline(
128
  "text-generation",
129
  model=model,
130
  tokenizer=tokenizer,
131
- device=0 if device == "cuda" else -1,
132
  max_new_tokens=512,
133
  pad_token_id=tokenizer.eos_token_id
134
  )
 
123
  model.eval()
124
 
125
  # Create pipeline for generation
126
+ # NOTE: When using accelerate/quantization, do NOT specify device parameter
127
  logger.info(" Creating text-generation pipeline...")
128
  llm_client = pipeline(
129
  "text-generation",
130
  model=model,
131
  tokenizer=tokenizer,
 
132
  max_new_tokens=512,
133
  pad_token_id=tokenizer.eos_token_id
134
  )