Spaces:
Running
Running
CRITICAL FIX: Use Qwen/Qwen3-0.6B to match LoRA adapter training base model
Browse files- backend/config.py +8 -8
- backend/models/character_manager.py +3 -2
backend/config.py
CHANGED
|
@@ -18,19 +18,19 @@ class Settings(BaseSettings):
|
|
| 18 |
API_PORT: int = int(os.getenv("API_PORT", "8000"))
|
| 19 |
DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
|
| 20 |
|
| 21 |
-
# Model Configuration -
|
| 22 |
-
BASE_MODEL: str = os.getenv("BASE_MODEL", "Qwen/
|
| 23 |
-
DEVICE: str = os.getenv("DEVICE", "cpu") #
|
| 24 |
-
MAX_LENGTH: int = int(os.getenv("MAX_LENGTH", "
|
| 25 |
-
TEMPERATURE: float = float(os.getenv("TEMPERATURE", "0.
|
| 26 |
TOP_P: float = float(os.getenv("TOP_P", "0.9"))
|
| 27 |
|
| 28 |
-
# Audio Configuration
|
| 29 |
SAMPLE_RATE: int = int(os.getenv("SAMPLE_RATE", "22050"))
|
| 30 |
AUDIO_FORMAT: str = os.getenv("AUDIO_FORMAT", "wav")
|
| 31 |
-
ENABLE_VOICE: bool = os.getenv("ENABLE_VOICE", "False").lower() == "true" #
|
| 32 |
|
| 33 |
-
# Character Configuration
|
| 34 |
DEFAULT_CHARACTER: str = os.getenv("DEFAULT_CHARACTER", "moses")
|
| 35 |
|
| 36 |
@property
|
|
|
|
| 18 |
API_PORT: int = int(os.getenv("API_PORT", "8000"))
|
| 19 |
DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
|
| 20 |
|
| 21 |
+
# Model Configuration - MUST match your LoRA training base model
|
| 22 |
+
BASE_MODEL: str = os.getenv("BASE_MODEL", "Qwen/Qwen3-0.6B") # Your LoRA adapters are trained on this exact model
|
| 23 |
+
DEVICE: str = os.getenv("DEVICE", "cpu") # CPU for Spaces (you use cuda locally)
|
| 24 |
+
MAX_LENGTH: int = int(os.getenv("MAX_LENGTH", "1024"))
|
| 25 |
+
TEMPERATURE: float = float(os.getenv("TEMPERATURE", "0.8"))
|
| 26 |
TOP_P: float = float(os.getenv("TOP_P", "0.9"))
|
| 27 |
|
| 28 |
+
# Audio Configuration - Match your .env settings
|
| 29 |
SAMPLE_RATE: int = int(os.getenv("SAMPLE_RATE", "22050"))
|
| 30 |
AUDIO_FORMAT: str = os.getenv("AUDIO_FORMAT", "wav")
|
| 31 |
+
ENABLE_VOICE: bool = os.getenv("ENABLE_VOICE", "False").lower() == "true" # You use True locally, False for deployment
|
| 32 |
|
| 33 |
+
# Character Configuration - Match your .env
|
| 34 |
DEFAULT_CHARACTER: str = os.getenv("DEFAULT_CHARACTER", "moses")
|
| 35 |
|
| 36 |
@property
|
backend/models/character_manager.py
CHANGED
|
@@ -76,8 +76,9 @@ class CharacterManager:
|
|
| 76 |
logger.error(f"Failed to load base model {settings.BASE_MODEL}: {e}")
|
| 77 |
logger.info("Trying alternative Qwen models...")
|
| 78 |
try:
|
| 79 |
-
# Try Qwen2.5
|
| 80 |
-
fallback_model = "Qwen/Qwen2.5-0.5B-Instruct"
|
|
|
|
| 81 |
self.tokenizer = AutoTokenizer.from_pretrained(fallback_model, trust_remote_code=True)
|
| 82 |
if settings.DEVICE == "cuda" and torch.cuda.is_available():
|
| 83 |
self.base_model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
| 76 |
logger.error(f"Failed to load base model {settings.BASE_MODEL}: {e}")
|
| 77 |
logger.info("Trying alternative Qwen models...")
|
| 78 |
try:
|
| 79 |
+
# Try compatible Qwen3 fallback first, then Qwen2.5 if needed
|
| 80 |
+
fallback_model = "Qwen/Qwen2.5-0.5B-Instruct"
|
| 81 |
+
logger.warning(f"Primary model {settings.BASE_MODEL} failed, trying fallback: {fallback_model}")
|
| 82 |
self.tokenizer = AutoTokenizer.from_pretrained(fallback_model, trust_remote_code=True)
|
| 83 |
if settings.DEVICE == "cuda" and torch.cuda.is_available():
|
| 84 |
self.base_model = AutoModelForCausalLM.from_pretrained(
|