Spaces:

ButterM40
/

Roleplay-Chat-Box

Running

ButterM40 commited on Nov 20, 2025

Commit

aaf38b0

1 Parent(s): 4b68f15

CRITICAL FIX: Use Qwen/Qwen3-0.6B to match LoRA adapter training base model

Files changed (2) hide show

backend/config.py CHANGED Viewed

@@ -18,19 +18,19 @@ class Settings(BaseSettings):
     API_PORT: int = int(os.getenv("API_PORT", "8000"))
     DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
-    # Model Configuration - Match your local Qwen3 model
-    BASE_MODEL: str = os.getenv("BASE_MODEL", "Qwen/Qwen2.5-0.5B-Instruct")
-    DEVICE: str = os.getenv("DEVICE", "cpu")  # Default to CPU for Spaces
-    MAX_LENGTH: int = int(os.getenv("MAX_LENGTH", "2048"))
-    TEMPERATURE: float = float(os.getenv("TEMPERATURE", "0.7"))
     TOP_P: float = float(os.getenv("TOP_P", "0.9"))
-    # Audio Configuration
     SAMPLE_RATE: int = int(os.getenv("SAMPLE_RATE", "22050"))
     AUDIO_FORMAT: str = os.getenv("AUDIO_FORMAT", "wav")
-    ENABLE_VOICE: bool = os.getenv("ENABLE_VOICE", "False").lower() == "true"  # Disabled by default for easier deployment
-    # Character Configuration
     DEFAULT_CHARACTER: str = os.getenv("DEFAULT_CHARACTER", "moses")
     @property

     API_PORT: int = int(os.getenv("API_PORT", "8000"))
     DEBUG: bool = os.getenv("DEBUG", "True").lower() == "true"
+    # Model Configuration - MUST match your LoRA training base model
+    BASE_MODEL: str = os.getenv("BASE_MODEL", "Qwen/Qwen3-0.6B")  # Your LoRA adapters are trained on this exact model
+    DEVICE: str = os.getenv("DEVICE", "cpu")  # CPU for Spaces (you use cuda locally)
+    MAX_LENGTH: int = int(os.getenv("MAX_LENGTH", "1024"))
+    TEMPERATURE: float = float(os.getenv("TEMPERATURE", "0.8"))
     TOP_P: float = float(os.getenv("TOP_P", "0.9"))
+    # Audio Configuration - Match your .env settings
     SAMPLE_RATE: int = int(os.getenv("SAMPLE_RATE", "22050"))
     AUDIO_FORMAT: str = os.getenv("AUDIO_FORMAT", "wav")
+    ENABLE_VOICE: bool = os.getenv("ENABLE_VOICE", "False").lower() == "true"  # You use True locally, False for deployment
+    # Character Configuration - Match your .env
     DEFAULT_CHARACTER: str = os.getenv("DEFAULT_CHARACTER", "moses")
     @property

backend/models/character_manager.py CHANGED Viewed

@@ -76,8 +76,9 @@ class CharacterManager:
             logger.error(f"Failed to load base model {settings.BASE_MODEL}: {e}")
             logger.info("Trying alternative Qwen models...")
             try:
-                # Try Qwen2.5-0.5B as backup
-                fallback_model = "Qwen/Qwen2.5-0.5B-Instruct"
                 self.tokenizer = AutoTokenizer.from_pretrained(fallback_model, trust_remote_code=True)
                 if settings.DEVICE == "cuda" and torch.cuda.is_available():
                     self.base_model = AutoModelForCausalLM.from_pretrained(

             logger.error(f"Failed to load base model {settings.BASE_MODEL}: {e}")
             logger.info("Trying alternative Qwen models...")
             try:
+                # Try compatible Qwen3 fallback first, then Qwen2.5 if needed
+                fallback_model = "Qwen/Qwen2.5-0.5B-Instruct"
+                logger.warning(f"Primary model {settings.BASE_MODEL} failed, trying fallback: {fallback_model}")
                 self.tokenizer = AutoTokenizer.from_pretrained(fallback_model, trust_remote_code=True)
                 if settings.DEVICE == "cuda" and torch.cuda.is_available():
                     self.base_model = AutoModelForCausalLM.from_pretrained(