Nada commited on
Commit
9b24e41
·
1 Parent(s): 5499036
Files changed (1) hide show
  1. chatbot.py +17 -14
chatbot.py CHANGED
@@ -318,27 +318,26 @@ Response:"""
318
 
319
  def _load_emotion_model(self):
320
  try:
321
- # Use safetensors explicitly for the primary model
322
  return pipeline(
323
  "text-classification",
324
  model="SamLowe/roberta-base-go_emotions",
325
  top_k=None,
326
- device_map="auto" if self.device == "cuda" else None,
327
  cache_dir=CACHE_DIR,
328
- use_safetensors=True # Explicitly use safetensors
329
  )
330
  except Exception as e:
331
  logger.error(f"Error loading emotion model: {e}")
332
- # Fallback to a simpler model with safetensors
333
  try:
334
  return pipeline(
335
  "text-classification",
336
  model="j-hartmann/emotion-english-distilroberta-base",
337
  return_all_scores=True,
338
- device_map="auto" if self.device == "cuda" else None,
339
  cache_dir=CACHE_DIR,
340
- use_safetensors=True, # Explicitly use safetensors
341
- from_tf=True # Use TensorFlow weights if available
342
  )
343
  except Exception as e:
344
  logger.error(f"Error loading fallback emotion model: {e}")
@@ -347,8 +346,8 @@ Response:"""
347
 
348
  def _initialize_llm(self, model_name: str, use_4bit: bool):
349
  try:
350
- # Configure quantization if needed
351
- if use_4bit:
352
  quantization_config = BitsAndBytesConfig(
353
  load_in_4bit=True,
354
  bnb_4bit_compute_dtype=torch.float16,
@@ -357,22 +356,25 @@ Response:"""
357
  )
358
  else:
359
  quantization_config = None
 
360
 
361
  # Load base model
362
  logger.info(f"Loading base model: {model_name}")
363
  base_model = AutoModelForCausalLM.from_pretrained(
364
  model_name,
365
  quantization_config=quantization_config,
366
- device_map="auto" if self.device == "cuda" else None,
367
  trust_remote_code=True,
368
- cache_dir=CACHE_DIR
 
369
  )
370
 
371
  # Load tokenizer
372
  logger.info("Loading tokenizer")
373
  tokenizer = AutoTokenizer.from_pretrained(
374
  model_name,
375
- cache_dir=CACHE_DIR
 
376
  )
377
  tokenizer.pad_token = tokenizer.eos_token
378
 
@@ -381,7 +383,8 @@ Response:"""
381
  model = PeftModel.from_pretrained(
382
  base_model,
383
  self.peft_model_path,
384
- cache_dir=CACHE_DIR
 
385
  )
386
  logger.info("Successfully loaded PEFT model")
387
 
@@ -395,7 +398,7 @@ Response:"""
395
  top_p=0.95,
396
  repetition_penalty=1.1,
397
  do_sample=True,
398
- device_map="auto" if self.device == "cuda" else None
399
  )
400
 
401
  # Create LangChain wrapper
 
318
 
319
  def _load_emotion_model(self):
320
  try:
321
+ # Load emotion model directly from Hugging Face
322
  return pipeline(
323
  "text-classification",
324
  model="SamLowe/roberta-base-go_emotions",
325
  top_k=None,
326
+ device_map="auto" if torch.cuda.is_available() else None,
327
  cache_dir=CACHE_DIR,
328
+ local_files_only=False # Ensure we download from Hugging Face
329
  )
330
  except Exception as e:
331
  logger.error(f"Error loading emotion model: {e}")
332
+ # Fallback to a simpler model
333
  try:
334
  return pipeline(
335
  "text-classification",
336
  model="j-hartmann/emotion-english-distilroberta-base",
337
  return_all_scores=True,
338
+ device_map="auto" if torch.cuda.is_available() else None,
339
  cache_dir=CACHE_DIR,
340
+ local_files_only=False # Ensure we download from Hugging Face
 
341
  )
342
  except Exception as e:
343
  logger.error(f"Error loading fallback emotion model: {e}")
 
346
 
347
  def _initialize_llm(self, model_name: str, use_4bit: bool):
348
  try:
349
+ # Configure quantization only if CUDA is available
350
+ if use_4bit and torch.cuda.is_available():
351
  quantization_config = BitsAndBytesConfig(
352
  load_in_4bit=True,
353
  bnb_4bit_compute_dtype=torch.float16,
 
356
  )
357
  else:
358
  quantization_config = None
359
+ logger.info("CUDA not available, running without quantization")
360
 
361
  # Load base model
362
  logger.info(f"Loading base model: {model_name}")
363
  base_model = AutoModelForCausalLM.from_pretrained(
364
  model_name,
365
  quantization_config=quantization_config,
366
+ device_map="auto" if torch.cuda.is_available() else None,
367
  trust_remote_code=True,
368
+ cache_dir=CACHE_DIR,
369
+ use_auth_token=os.environ.get('HF_TOKEN') # Add auth token for gated models
370
  )
371
 
372
  # Load tokenizer
373
  logger.info("Loading tokenizer")
374
  tokenizer = AutoTokenizer.from_pretrained(
375
  model_name,
376
+ cache_dir=CACHE_DIR,
377
+ use_auth_token=os.environ.get('HF_TOKEN') # Add auth token for gated models
378
  )
379
  tokenizer.pad_token = tokenizer.eos_token
380
 
 
383
  model = PeftModel.from_pretrained(
384
  base_model,
385
  self.peft_model_path,
386
+ cache_dir=CACHE_DIR,
387
+ use_auth_token=os.environ.get('HF_TOKEN') # Add auth token for gated models
388
  )
389
  logger.info("Successfully loaded PEFT model")
390
 
 
398
  top_p=0.95,
399
  repetition_penalty=1.1,
400
  do_sample=True,
401
+ device_map="auto" if torch.cuda.is_available() else None
402
  )
403
 
404
  # Create LangChain wrapper