Update app.py
Browse files
app.py
CHANGED
|
@@ -159,6 +159,7 @@ def initialize_llm():
|
|
| 159 |
logger.info(" Model ready for inference")
|
| 160 |
|
| 161 |
# Create pipeline for generation
|
|
|
|
| 162 |
logger.info(" Creating text-generation pipeline...")
|
| 163 |
llm_client = pipeline(
|
| 164 |
"text-generation",
|
|
@@ -166,8 +167,7 @@ def initialize_llm():
|
|
| 166 |
tokenizer=tokenizer,
|
| 167 |
max_new_tokens=200, # Reduced for faster generation
|
| 168 |
pad_token_id=tokenizer.eos_token_id,
|
| 169 |
-
eos_token_id=tokenizer.eos_token_id
|
| 170 |
-
device=0 if device == "cuda" else -1 # -1 for CPU
|
| 171 |
)
|
| 172 |
|
| 173 |
CONFIG["llm_model"] = LOCAL_PHI_MODEL
|
|
|
|
| 159 |
logger.info(" Model ready for inference")
|
| 160 |
|
| 161 |
# Create pipeline for generation
|
| 162 |
+
# CRITICAL: Do NOT specify device when using device_map="auto"
|
| 163 |
logger.info(" Creating text-generation pipeline...")
|
| 164 |
llm_client = pipeline(
|
| 165 |
"text-generation",
|
|
|
|
| 167 |
tokenizer=tokenizer,
|
| 168 |
max_new_tokens=200, # Reduced for faster generation
|
| 169 |
pad_token_id=tokenizer.eos_token_id,
|
| 170 |
+
eos_token_id=tokenizer.eos_token_id
|
|
|
|
| 171 |
)
|
| 172 |
|
| 173 |
CONFIG["llm_model"] = LOCAL_PHI_MODEL
|