ButterM40 commited on
Commit
bf6aead
·
1 Parent(s): aaf38b0

Comprehensive fix: Update transformers for Qwen3 support and improve LoRA adapter compatibility

Browse files
backend/config.py CHANGED
@@ -18,8 +18,9 @@ class Settings(BaseSettings):
18
  API_PORT: int = int(os.getenv("API_PORT", "8000"))
19
  DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
20
 
21
- # Model Configuration - MUST match your LoRA training base model
22
- BASE_MODEL: str = os.getenv("BASE_MODEL", "Qwen/Qwen3-0.6B") # Your LoRA adapters are trained on this exact model
 
23
  DEVICE: str = os.getenv("DEVICE", "cpu") # CPU for Spaces (you use cuda locally)
24
  MAX_LENGTH: int = int(os.getenv("MAX_LENGTH", "1024"))
25
  TEMPERATURE: float = float(os.getenv("TEMPERATURE", "0.8"))
 
18
  API_PORT: int = int(os.getenv("API_PORT", "8000"))
19
  DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
20
 
21
+ # Model Configuration - Try Qwen3 first, fallback to compatible model
22
+ BASE_MODEL: str = os.getenv("BASE_MODEL", "Qwen/Qwen2.5-0.5B-Instruct") # Use compatible model for now
23
+ QWEN3_MODEL: str = "Qwen/Qwen3-0.6B" # Your original training model (will try first)
24
  DEVICE: str = os.getenv("DEVICE", "cpu") # CPU for Spaces (you use cuda locally)
25
  MAX_LENGTH: int = int(os.getenv("MAX_LENGTH", "1024"))
26
  TEMPERATURE: float = float(os.getenv("TEMPERATURE", "0.8"))
backend/models/character_manager.py CHANGED
@@ -35,16 +35,47 @@ class CharacterManager:
35
  logger.info(f"Working from directory: {parent_dir}")
36
  logger.info(f"LoRA adapters path: {settings.LORA_ADAPTERS_PATH}")
37
 
 
 
 
38
  try:
39
- # Load tokenizer from HuggingFace Hub (for Spaces deployment)
40
- logger.info(f"Loading tokenizer from: {settings.BASE_MODEL}")
41
  self.tokenizer = AutoTokenizer.from_pretrained(
42
- settings.BASE_MODEL,
43
  trust_remote_code=True,
44
- use_fast=True, # Use fast tokenizer
45
- cache_dir=None # Use default HF cache
46
  )
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  # Smart GPU/CPU loading
49
  cuda_available = torch.cuda.is_available()
50
  use_gpu = settings.DEVICE == "cuda" and cuda_available
@@ -178,7 +209,9 @@ class CharacterManager:
178
  # Remove problematic parameters that cause LoraConfig errors
179
  problematic_params = [
180
  'alora_invocation_tokens', 'arrow_config',
181
- 'ensure_weight_tying', 'peft_version'
 
 
182
  ]
183
 
184
  for param in problematic_params:
@@ -199,15 +232,37 @@ class CharacterManager:
199
  temp_model_file = os.path.join(temp_dir, "adapter_model.safetensors")
200
  shutil.copy2(os.path.join(adapter_path, "adapter_model.safetensors"), temp_model_file)
201
 
202
- # Load with cleaned config
203
  logger.info(f"Loading LoRA adapter with cleaned config for {character_id}")
204
- model_with_adapter = PeftModel.from_pretrained(
205
- self.base_model,
206
- temp_dir,
207
- adapter_name=character_id,
208
- is_trainable=False,
209
- torch_dtype=torch.float32,
210
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
  self.character_models[character_id] = model_with_adapter
213
  logger.info(f"✅ Successfully loaded LoRA adapter for {character_id} with cleaned config")
 
35
  logger.info(f"Working from directory: {parent_dir}")
36
  logger.info(f"LoRA adapters path: {settings.LORA_ADAPTERS_PATH}")
37
 
38
+ # First try your original Qwen3-0.6B model
39
+ qwen3_model = getattr(settings, 'QWEN3_MODEL', 'Qwen/Qwen3-0.6B')
40
+
41
  try:
42
+ logger.info(f"Attempting to load original training model: {qwen3_model}")
 
43
  self.tokenizer = AutoTokenizer.from_pretrained(
44
+ qwen3_model,
45
  trust_remote_code=True,
46
+ use_fast=True,
47
+ cache_dir=None
48
  )
49
 
50
+ # If tokenizer works, try the model
51
+ if settings.DEVICE == "cuda" and torch.cuda.is_available():
52
+ self.base_model = AutoModelForCausalLM.from_pretrained(
53
+ qwen3_model,
54
+ torch_dtype=torch.float16,
55
+ device_map="auto",
56
+ trust_remote_code=True
57
+ )
58
+ else:
59
+ self.base_model = AutoModelForCausalLM.from_pretrained(
60
+ qwen3_model,
61
+ torch_dtype=torch.float32,
62
+ trust_remote_code=True
63
+ )
64
+ logger.info(f"✅ Successfully loaded original model: {qwen3_model}")
65
+
66
+ except Exception as e:
67
+ logger.warning(f"Original model {qwen3_model} failed: {e}")
68
+
69
+ # Fallback to compatible model
70
+ try:
71
+ logger.info(f"Loading compatible fallback model: {settings.BASE_MODEL}")
72
+ self.tokenizer = AutoTokenizer.from_pretrained(
73
+ settings.BASE_MODEL,
74
+ trust_remote_code=True,
75
+ use_fast=True,
76
+ cache_dir=None
77
+ )
78
+
79
  # Smart GPU/CPU loading
80
  cuda_available = torch.cuda.is_available()
81
  use_gpu = settings.DEVICE == "cuda" and cuda_available
 
209
  # Remove problematic parameters that cause LoraConfig errors
210
  problematic_params = [
211
  'alora_invocation_tokens', 'arrow_config',
212
+ 'ensure_weight_tying', 'peft_version', 'corda_config',
213
+ 'eva_config', 'megatron_config', 'megatron_core',
214
+ 'loftq_config', 'qalora_group_size'
215
  ]
216
 
217
  for param in problematic_params:
 
232
  temp_model_file = os.path.join(temp_dir, "adapter_model.safetensors")
233
  shutil.copy2(os.path.join(adapter_path, "adapter_model.safetensors"), temp_model_file)
234
 
235
+ # Load with cleaned config - try different approaches
236
  logger.info(f"Loading LoRA adapter with cleaned config for {character_id}")
237
+
238
+ try:
239
+ # First attempt: Standard loading
240
+ model_with_adapter = PeftModel.from_pretrained(
241
+ self.base_model,
242
+ temp_dir,
243
+ adapter_name=character_id,
244
+ is_trainable=False,
245
+ torch_dtype=torch.float32,
246
+ )
247
+ except Exception as inner_e:
248
+ logger.warning(f"Standard LoRA loading failed: {inner_e}")
249
+
250
+ # Second attempt: Force compatibility mode
251
+ logger.info("Trying compatibility mode for LoRA loading")
252
+
253
+ # Update config to match current model architecture
254
+ config_data['base_model_name_or_path'] = self.base_model.config._name_or_path
255
+
256
+ with open(temp_config_file, 'w') as f:
257
+ json.dump(config_data, f, indent=2)
258
+
259
+ model_with_adapter = PeftModel.from_pretrained(
260
+ self.base_model,
261
+ temp_dir,
262
+ adapter_name=character_id,
263
+ is_trainable=False,
264
+ torch_dtype=torch.float32,
265
+ )
266
 
267
  self.character_models[character_id] = model_with_adapter
268
  logger.info(f"✅ Successfully loaded LoRA adapter for {character_id} with cleaned config")
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
- # Core ML Libraries - Fixed versions for compatibility
2
- torch>=2.0.0,<2.5.0
3
- transformers>=4.36.0,<4.50.0
4
- peft>=0.8.0,<0.14.0
5
  accelerate>=0.24.0
6
  datasets>=2.14.0
7
  huggingface-hub>=0.19.0
 
1
+ # Core ML Libraries - Latest versions for Qwen3 support
2
+ torch>=2.0.0
3
+ transformers>=4.45.0
4
+ peft>=0.8.0
5
  accelerate>=0.24.0
6
  datasets>=2.14.0
7
  huggingface-hub>=0.19.0