developer-lunark commited on
Commit
f383abd
·
verified ·
1 Parent(s): a29ae54

Add error info to mock response for debugging

Browse files
Files changed (1) hide show
  1. app.py +21 -4
app.py CHANGED
@@ -163,10 +163,12 @@ class ModelManager:
163
  self.current_model = None
164
  self.current_model_name = None
165
  self.tokenizer = None
 
166
 
167
  def load_model(self, model_name: str):
168
  """Load model with 4-bit quantization and LoRA adapter"""
169
  if not GPU_AVAILABLE:
 
170
  return False
171
 
172
  if self.current_model_name == model_name:
@@ -177,10 +179,13 @@ class ModelManager:
177
 
178
  model_info = MODELS.get(model_name)
179
  if not model_info:
 
180
  return False
181
 
182
  try:
183
  print(f"Loading {model_name}...")
 
 
184
 
185
  # 4-bit quantization config
186
  bnb_config = BitsAndBytesConfig(
@@ -191,35 +196,46 @@ class ModelManager:
191
  )
192
 
193
  # Load base model
 
194
  base_model = AutoModelForCausalLM.from_pretrained(
195
  model_info["base_model"],
196
  quantization_config=bnb_config,
197
  device_map="auto",
198
  trust_remote_code=True,
199
  )
 
200
 
201
  # Load LoRA adapter
 
202
  self.current_model = PeftModel.from_pretrained(
203
  base_model,
204
  model_info["hf_repo"],
205
  trust_remote_code=True,
206
  )
207
  self.current_model.eval()
 
208
 
209
  # Load tokenizer
 
210
  self.tokenizer = AutoTokenizer.from_pretrained(
211
  model_info["base_model"],
212
  trust_remote_code=True,
213
  )
214
  if self.tokenizer.pad_token is None:
215
  self.tokenizer.pad_token = self.tokenizer.eos_token
 
216
 
217
  self.current_model_name = model_name
218
- print(f"Loaded {model_name} successfully")
 
219
  return True
220
 
221
  except Exception as e:
222
- print(f"Error loading {model_name}: {e}")
 
 
 
 
223
  self.unload_model()
224
  return False
225
 
@@ -269,8 +285,9 @@ class ModelManager:
269
  return self._mock_response(model_name)
270
 
271
  def _mock_response(self, model_name: str) -> str:
272
- """Fallback mock response"""
273
- return f"<think>\n모델 {model_name} 응답을 생성 중...\n</think>\n\n안녕~ 반가워!"
 
274
 
275
  # Global model manager
276
  model_manager = ModelManager()
 
163
  self.current_model = None
164
  self.current_model_name = None
165
  self.tokenizer = None
166
+ self.last_error = None
167
 
168
  def load_model(self, model_name: str):
169
  """Load model with 4-bit quantization and LoRA adapter"""
170
  if not GPU_AVAILABLE:
171
+ self.last_error = "GPU not available"
172
  return False
173
 
174
  if self.current_model_name == model_name:
 
179
 
180
  model_info = MODELS.get(model_name)
181
  if not model_info:
182
+ self.last_error = f"Model {model_name} not found in registry"
183
  return False
184
 
185
  try:
186
  print(f"Loading {model_name}...")
187
+ print(f" Base model: {model_info['base_model']}")
188
+ print(f" LoRA adapter: {model_info['hf_repo']}")
189
 
190
  # 4-bit quantization config
191
  bnb_config = BitsAndBytesConfig(
 
196
  )
197
 
198
  # Load base model
199
+ print(" Loading base model...")
200
  base_model = AutoModelForCausalLM.from_pretrained(
201
  model_info["base_model"],
202
  quantization_config=bnb_config,
203
  device_map="auto",
204
  trust_remote_code=True,
205
  )
206
+ print(" Base model loaded!")
207
 
208
  # Load LoRA adapter
209
+ print(" Loading LoRA adapter...")
210
  self.current_model = PeftModel.from_pretrained(
211
  base_model,
212
  model_info["hf_repo"],
213
  trust_remote_code=True,
214
  )
215
  self.current_model.eval()
216
+ print(" LoRA adapter loaded!")
217
 
218
  # Load tokenizer
219
+ print(" Loading tokenizer...")
220
  self.tokenizer = AutoTokenizer.from_pretrained(
221
  model_info["base_model"],
222
  trust_remote_code=True,
223
  )
224
  if self.tokenizer.pad_token is None:
225
  self.tokenizer.pad_token = self.tokenizer.eos_token
226
+ print(" Tokenizer loaded!")
227
 
228
  self.current_model_name = model_name
229
+ self.last_error = None
230
+ print(f"Loaded {model_name} successfully!")
231
  return True
232
 
233
  except Exception as e:
234
+ import traceback
235
+ error_msg = f"{type(e).__name__}: {str(e)}"
236
+ print(f"Error loading {model_name}: {error_msg}")
237
+ traceback.print_exc()
238
+ self.last_error = error_msg
239
  self.unload_model()
240
  return False
241
 
 
285
  return self._mock_response(model_name)
286
 
287
  def _mock_response(self, model_name: str) -> str:
288
+ """Fallback mock response with error info"""
289
+ error_info = f"\nError: {self.last_error}" if self.last_error else ""
290
+ return f"<think>\n[Mock Mode] 모델 로딩 실패{error_info}\n</think>\n\n안녕~ 반가워!"
291
 
292
  # Global model manager
293
  model_manager = ModelManager()