Spaces:
Paused
Paused
Nada commited on
Commit ·
9b24e41
1
Parent(s): 5499036
up
Browse files- chatbot.py +17 -14
chatbot.py
CHANGED
|
@@ -318,27 +318,26 @@ Response:"""
|
|
| 318 |
|
| 319 |
def _load_emotion_model(self):
|
| 320 |
try:
|
| 321 |
-
#
|
| 322 |
return pipeline(
|
| 323 |
"text-classification",
|
| 324 |
model="SamLowe/roberta-base-go_emotions",
|
| 325 |
top_k=None,
|
| 326 |
-
device_map="auto" if
|
| 327 |
cache_dir=CACHE_DIR,
|
| 328 |
-
|
| 329 |
)
|
| 330 |
except Exception as e:
|
| 331 |
logger.error(f"Error loading emotion model: {e}")
|
| 332 |
-
# Fallback to a simpler model
|
| 333 |
try:
|
| 334 |
return pipeline(
|
| 335 |
"text-classification",
|
| 336 |
model="j-hartmann/emotion-english-distilroberta-base",
|
| 337 |
return_all_scores=True,
|
| 338 |
-
device_map="auto" if
|
| 339 |
cache_dir=CACHE_DIR,
|
| 340 |
-
|
| 341 |
-
from_tf=True # Use TensorFlow weights if available
|
| 342 |
)
|
| 343 |
except Exception as e:
|
| 344 |
logger.error(f"Error loading fallback emotion model: {e}")
|
|
@@ -347,8 +346,8 @@ Response:"""
|
|
| 347 |
|
| 348 |
def _initialize_llm(self, model_name: str, use_4bit: bool):
|
| 349 |
try:
|
| 350 |
-
# Configure quantization if
|
| 351 |
-
if use_4bit:
|
| 352 |
quantization_config = BitsAndBytesConfig(
|
| 353 |
load_in_4bit=True,
|
| 354 |
bnb_4bit_compute_dtype=torch.float16,
|
|
@@ -357,22 +356,25 @@ Response:"""
|
|
| 357 |
)
|
| 358 |
else:
|
| 359 |
quantization_config = None
|
|
|
|
| 360 |
|
| 361 |
# Load base model
|
| 362 |
logger.info(f"Loading base model: {model_name}")
|
| 363 |
base_model = AutoModelForCausalLM.from_pretrained(
|
| 364 |
model_name,
|
| 365 |
quantization_config=quantization_config,
|
| 366 |
-
device_map="auto" if
|
| 367 |
trust_remote_code=True,
|
| 368 |
-
cache_dir=CACHE_DIR
|
|
|
|
| 369 |
)
|
| 370 |
|
| 371 |
# Load tokenizer
|
| 372 |
logger.info("Loading tokenizer")
|
| 373 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 374 |
model_name,
|
| 375 |
-
cache_dir=CACHE_DIR
|
|
|
|
| 376 |
)
|
| 377 |
tokenizer.pad_token = tokenizer.eos_token
|
| 378 |
|
|
@@ -381,7 +383,8 @@ Response:"""
|
|
| 381 |
model = PeftModel.from_pretrained(
|
| 382 |
base_model,
|
| 383 |
self.peft_model_path,
|
| 384 |
-
cache_dir=CACHE_DIR
|
|
|
|
| 385 |
)
|
| 386 |
logger.info("Successfully loaded PEFT model")
|
| 387 |
|
|
@@ -395,7 +398,7 @@ Response:"""
|
|
| 395 |
top_p=0.95,
|
| 396 |
repetition_penalty=1.1,
|
| 397 |
do_sample=True,
|
| 398 |
-
device_map="auto" if
|
| 399 |
)
|
| 400 |
|
| 401 |
# Create LangChain wrapper
|
|
|
|
| 318 |
|
| 319 |
def _load_emotion_model(self):
|
| 320 |
try:
|
| 321 |
+
# Load emotion model directly from Hugging Face
|
| 322 |
return pipeline(
|
| 323 |
"text-classification",
|
| 324 |
model="SamLowe/roberta-base-go_emotions",
|
| 325 |
top_k=None,
|
| 326 |
+
device_map="auto" if torch.cuda.is_available() else None,
|
| 327 |
cache_dir=CACHE_DIR,
|
| 328 |
+
local_files_only=False # Ensure we download from Hugging Face
|
| 329 |
)
|
| 330 |
except Exception as e:
|
| 331 |
logger.error(f"Error loading emotion model: {e}")
|
| 332 |
+
# Fallback to a simpler model
|
| 333 |
try:
|
| 334 |
return pipeline(
|
| 335 |
"text-classification",
|
| 336 |
model="j-hartmann/emotion-english-distilroberta-base",
|
| 337 |
return_all_scores=True,
|
| 338 |
+
device_map="auto" if torch.cuda.is_available() else None,
|
| 339 |
cache_dir=CACHE_DIR,
|
| 340 |
+
local_files_only=False # Ensure we download from Hugging Face
|
|
|
|
| 341 |
)
|
| 342 |
except Exception as e:
|
| 343 |
logger.error(f"Error loading fallback emotion model: {e}")
|
|
|
|
| 346 |
|
| 347 |
def _initialize_llm(self, model_name: str, use_4bit: bool):
|
| 348 |
try:
|
| 349 |
+
# Configure quantization only if CUDA is available
|
| 350 |
+
if use_4bit and torch.cuda.is_available():
|
| 351 |
quantization_config = BitsAndBytesConfig(
|
| 352 |
load_in_4bit=True,
|
| 353 |
bnb_4bit_compute_dtype=torch.float16,
|
|
|
|
| 356 |
)
|
| 357 |
else:
|
| 358 |
quantization_config = None
|
| 359 |
+
logger.info("CUDA not available, running without quantization")
|
| 360 |
|
| 361 |
# Load base model
|
| 362 |
logger.info(f"Loading base model: {model_name}")
|
| 363 |
base_model = AutoModelForCausalLM.from_pretrained(
|
| 364 |
model_name,
|
| 365 |
quantization_config=quantization_config,
|
| 366 |
+
device_map="auto" if torch.cuda.is_available() else None,
|
| 367 |
trust_remote_code=True,
|
| 368 |
+
cache_dir=CACHE_DIR,
|
| 369 |
+
use_auth_token=os.environ.get('HF_TOKEN') # Add auth token for gated models
|
| 370 |
)
|
| 371 |
|
| 372 |
# Load tokenizer
|
| 373 |
logger.info("Loading tokenizer")
|
| 374 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 375 |
model_name,
|
| 376 |
+
cache_dir=CACHE_DIR,
|
| 377 |
+
use_auth_token=os.environ.get('HF_TOKEN') # Add auth token for gated models
|
| 378 |
)
|
| 379 |
tokenizer.pad_token = tokenizer.eos_token
|
| 380 |
|
|
|
|
| 383 |
model = PeftModel.from_pretrained(
|
| 384 |
base_model,
|
| 385 |
self.peft_model_path,
|
| 386 |
+
cache_dir=CACHE_DIR,
|
| 387 |
+
use_auth_token=os.environ.get('HF_TOKEN') # Add auth token for gated models
|
| 388 |
)
|
| 389 |
logger.info("Successfully loaded PEFT model")
|
| 390 |
|
|
|
|
| 398 |
top_p=0.95,
|
| 399 |
repetition_penalty=1.1,
|
| 400 |
do_sample=True,
|
| 401 |
+
device_map="auto" if torch.cuda.is_available() else None
|
| 402 |
)
|
| 403 |
|
| 404 |
# Create LangChain wrapper
|