hamxaameer commited on
Commit
7a3d769
·
verified ·
1 Parent(s): 5e4b481

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -159,6 +159,7 @@ def initialize_llm():
159
  logger.info(" Model ready for inference")
160
 
161
  # Create pipeline for generation
 
162
  logger.info(" Creating text-generation pipeline...")
163
  llm_client = pipeline(
164
  "text-generation",
@@ -166,8 +167,7 @@ def initialize_llm():
166
  tokenizer=tokenizer,
167
  max_new_tokens=200, # Reduced for faster generation
168
  pad_token_id=tokenizer.eos_token_id,
169
- eos_token_id=tokenizer.eos_token_id,
170
- device=0 if device == "cuda" else -1 # -1 for CPU
171
  )
172
 
173
  CONFIG["llm_model"] = LOCAL_PHI_MODEL
 
159
  logger.info(" Model ready for inference")
160
 
161
  # Create pipeline for generation
162
+ # CRITICAL: Do NOT specify device when using device_map="auto"
163
  logger.info(" Creating text-generation pipeline...")
164
  llm_client = pipeline(
165
  "text-generation",
 
167
  tokenizer=tokenizer,
168
  max_new_tokens=200, # Reduced for faster generation
169
  pad_token_id=tokenizer.eos_token_id,
170
+ eos_token_id=tokenizer.eos_token_id
 
171
  )
172
 
173
  CONFIG["llm_model"] = LOCAL_PHI_MODEL