Update app.py
Browse files
app.py
CHANGED
|
@@ -123,12 +123,12 @@ def initialize_llm():
|
|
| 123 |
model.eval()
|
| 124 |
|
| 125 |
# Create pipeline for generation
|
|
|
|
| 126 |
logger.info(" Creating text-generation pipeline...")
|
| 127 |
llm_client = pipeline(
|
| 128 |
"text-generation",
|
| 129 |
model=model,
|
| 130 |
tokenizer=tokenizer,
|
| 131 |
-
device=0 if device == "cuda" else -1,
|
| 132 |
max_new_tokens=512,
|
| 133 |
pad_token_id=tokenizer.eos_token_id
|
| 134 |
)
|
|
|
|
| 123 |
model.eval()
|
| 124 |
|
| 125 |
# Create pipeline for generation
|
| 126 |
+
# NOTE: When using accelerate/quantization, do NOT specify device parameter
|
| 127 |
logger.info(" Creating text-generation pipeline...")
|
| 128 |
llm_client = pipeline(
|
| 129 |
"text-generation",
|
| 130 |
model=model,
|
| 131 |
tokenizer=tokenizer,
|
|
|
|
| 132 |
max_new_tokens=512,
|
| 133 |
pad_token_id=tokenizer.eos_token_id
|
| 134 |
)
|