Spaces:
Running
Running
Comprehensive fix: Update transformers for Qwen3 support and improve LoRA adapter compatibility
Browse files- backend/config.py +3 -2
- backend/models/character_manager.py +69 -14
- requirements.txt +4 -4
backend/config.py
CHANGED
|
@@ -18,8 +18,9 @@ class Settings(BaseSettings):
|
|
| 18 |
API_PORT: int = int(os.getenv("API_PORT", "8000"))
|
| 19 |
DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
|
| 20 |
|
| 21 |
-
# Model Configuration -
|
| 22 |
-
BASE_MODEL: str = os.getenv("BASE_MODEL", "Qwen/
|
|
|
|
| 23 |
DEVICE: str = os.getenv("DEVICE", "cpu") # CPU for Spaces (you use cuda locally)
|
| 24 |
MAX_LENGTH: int = int(os.getenv("MAX_LENGTH", "1024"))
|
| 25 |
TEMPERATURE: float = float(os.getenv("TEMPERATURE", "0.8"))
|
|
|
|
| 18 |
API_PORT: int = int(os.getenv("API_PORT", "8000"))
|
| 19 |
DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
|
| 20 |
|
| 21 |
+
# Model Configuration - Try Qwen3 first, fallback to compatible model
|
| 22 |
+
BASE_MODEL: str = os.getenv("BASE_MODEL", "Qwen/Qwen2.5-0.5B-Instruct") # Use compatible model for now
|
| 23 |
+
QWEN3_MODEL: str = "Qwen/Qwen3-0.6B" # Your original training model (will try first)
|
| 24 |
DEVICE: str = os.getenv("DEVICE", "cpu") # CPU for Spaces (you use cuda locally)
|
| 25 |
MAX_LENGTH: int = int(os.getenv("MAX_LENGTH", "1024"))
|
| 26 |
TEMPERATURE: float = float(os.getenv("TEMPERATURE", "0.8"))
|
backend/models/character_manager.py
CHANGED
|
@@ -35,16 +35,47 @@ class CharacterManager:
|
|
| 35 |
logger.info(f"Working from directory: {parent_dir}")
|
| 36 |
logger.info(f"LoRA adapters path: {settings.LORA_ADAPTERS_PATH}")
|
| 37 |
|
|
|
|
|
|
|
|
|
|
| 38 |
try:
|
| 39 |
-
|
| 40 |
-
logger.info(f"Loading tokenizer from: {settings.BASE_MODEL}")
|
| 41 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
| 42 |
-
|
| 43 |
trust_remote_code=True,
|
| 44 |
-
use_fast=True,
|
| 45 |
-
cache_dir=None
|
| 46 |
)
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
# Smart GPU/CPU loading
|
| 49 |
cuda_available = torch.cuda.is_available()
|
| 50 |
use_gpu = settings.DEVICE == "cuda" and cuda_available
|
|
@@ -178,7 +209,9 @@ class CharacterManager:
|
|
| 178 |
# Remove problematic parameters that cause LoraConfig errors
|
| 179 |
problematic_params = [
|
| 180 |
'alora_invocation_tokens', 'arrow_config',
|
| 181 |
-
'ensure_weight_tying', 'peft_version'
|
|
|
|
|
|
|
| 182 |
]
|
| 183 |
|
| 184 |
for param in problematic_params:
|
|
@@ -199,15 +232,37 @@ class CharacterManager:
|
|
| 199 |
temp_model_file = os.path.join(temp_dir, "adapter_model.safetensors")
|
| 200 |
shutil.copy2(os.path.join(adapter_path, "adapter_model.safetensors"), temp_model_file)
|
| 201 |
|
| 202 |
-
# Load with cleaned config
|
| 203 |
logger.info(f"Loading LoRA adapter with cleaned config for {character_id}")
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
|
| 212 |
self.character_models[character_id] = model_with_adapter
|
| 213 |
logger.info(f"✅ Successfully loaded LoRA adapter for {character_id} with cleaned config")
|
|
|
|
| 35 |
logger.info(f"Working from directory: {parent_dir}")
|
| 36 |
logger.info(f"LoRA adapters path: {settings.LORA_ADAPTERS_PATH}")
|
| 37 |
|
| 38 |
+
# First try your original Qwen3-0.6B model
|
| 39 |
+
qwen3_model = getattr(settings, 'QWEN3_MODEL', 'Qwen/Qwen3-0.6B')
|
| 40 |
+
|
| 41 |
try:
|
| 42 |
+
logger.info(f"Attempting to load original training model: {qwen3_model}")
|
|
|
|
| 43 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
| 44 |
+
qwen3_model,
|
| 45 |
trust_remote_code=True,
|
| 46 |
+
use_fast=True,
|
| 47 |
+
cache_dir=None
|
| 48 |
)
|
| 49 |
|
| 50 |
+
# If tokenizer works, try the model
|
| 51 |
+
if settings.DEVICE == "cuda" and torch.cuda.is_available():
|
| 52 |
+
self.base_model = AutoModelForCausalLM.from_pretrained(
|
| 53 |
+
qwen3_model,
|
| 54 |
+
torch_dtype=torch.float16,
|
| 55 |
+
device_map="auto",
|
| 56 |
+
trust_remote_code=True
|
| 57 |
+
)
|
| 58 |
+
else:
|
| 59 |
+
self.base_model = AutoModelForCausalLM.from_pretrained(
|
| 60 |
+
qwen3_model,
|
| 61 |
+
torch_dtype=torch.float32,
|
| 62 |
+
trust_remote_code=True
|
| 63 |
+
)
|
| 64 |
+
logger.info(f"✅ Successfully loaded original model: {qwen3_model}")
|
| 65 |
+
|
| 66 |
+
except Exception as e:
|
| 67 |
+
logger.warning(f"Original model {qwen3_model} failed: {e}")
|
| 68 |
+
|
| 69 |
+
# Fallback to compatible model
|
| 70 |
+
try:
|
| 71 |
+
logger.info(f"Loading compatible fallback model: {settings.BASE_MODEL}")
|
| 72 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
| 73 |
+
settings.BASE_MODEL,
|
| 74 |
+
trust_remote_code=True,
|
| 75 |
+
use_fast=True,
|
| 76 |
+
cache_dir=None
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
# Smart GPU/CPU loading
|
| 80 |
cuda_available = torch.cuda.is_available()
|
| 81 |
use_gpu = settings.DEVICE == "cuda" and cuda_available
|
|
|
|
| 209 |
# Remove problematic parameters that cause LoraConfig errors
|
| 210 |
problematic_params = [
|
| 211 |
'alora_invocation_tokens', 'arrow_config',
|
| 212 |
+
'ensure_weight_tying', 'peft_version', 'corda_config',
|
| 213 |
+
'eva_config', 'megatron_config', 'megatron_core',
|
| 214 |
+
'loftq_config', 'qalora_group_size'
|
| 215 |
]
|
| 216 |
|
| 217 |
for param in problematic_params:
|
|
|
|
| 232 |
temp_model_file = os.path.join(temp_dir, "adapter_model.safetensors")
|
| 233 |
shutil.copy2(os.path.join(adapter_path, "adapter_model.safetensors"), temp_model_file)
|
| 234 |
|
| 235 |
+
# Load with cleaned config - try different approaches
|
| 236 |
logger.info(f"Loading LoRA adapter with cleaned config for {character_id}")
|
| 237 |
+
|
| 238 |
+
try:
|
| 239 |
+
# First attempt: Standard loading
|
| 240 |
+
model_with_adapter = PeftModel.from_pretrained(
|
| 241 |
+
self.base_model,
|
| 242 |
+
temp_dir,
|
| 243 |
+
adapter_name=character_id,
|
| 244 |
+
is_trainable=False,
|
| 245 |
+
torch_dtype=torch.float32,
|
| 246 |
+
)
|
| 247 |
+
except Exception as inner_e:
|
| 248 |
+
logger.warning(f"Standard LoRA loading failed: {inner_e}")
|
| 249 |
+
|
| 250 |
+
# Second attempt: Force compatibility mode
|
| 251 |
+
logger.info("Trying compatibility mode for LoRA loading")
|
| 252 |
+
|
| 253 |
+
# Update config to match current model architecture
|
| 254 |
+
config_data['base_model_name_or_path'] = self.base_model.config._name_or_path
|
| 255 |
+
|
| 256 |
+
with open(temp_config_file, 'w') as f:
|
| 257 |
+
json.dump(config_data, f, indent=2)
|
| 258 |
+
|
| 259 |
+
model_with_adapter = PeftModel.from_pretrained(
|
| 260 |
+
self.base_model,
|
| 261 |
+
temp_dir,
|
| 262 |
+
adapter_name=character_id,
|
| 263 |
+
is_trainable=False,
|
| 264 |
+
torch_dtype=torch.float32,
|
| 265 |
+
)
|
| 266 |
|
| 267 |
self.character_models[character_id] = model_with_adapter
|
| 268 |
logger.info(f"✅ Successfully loaded LoRA adapter for {character_id} with cleaned config")
|
requirements.txt
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
-
# Core ML Libraries -
|
| 2 |
-
torch>=2.0.0
|
| 3 |
-
transformers>=4.
|
| 4 |
-
peft>=0.8.0
|
| 5 |
accelerate>=0.24.0
|
| 6 |
datasets>=2.14.0
|
| 7 |
huggingface-hub>=0.19.0
|
|
|
|
| 1 |
+
# Core ML Libraries - Latest versions for Qwen3 support
|
| 2 |
+
torch>=2.0.0
|
| 3 |
+
transformers>=4.45.0
|
| 4 |
+
peft>=0.8.0
|
| 5 |
accelerate>=0.24.0
|
| 6 |
datasets>=2.14.0
|
| 7 |
huggingface-hub>=0.19.0
|