Spaces:

ButterM40
/

Roleplay-Chat-Box

Running

App Files Files Community

ButterM40 commited on Nov 20, 2025

Commit

f92a42b

1 Parent(s): de2021f

Fix: Use Qwen3-0.6B (correct model) with proper PEFT adapter switching via set_adapter()

Browse files

Files changed (1) hide show

backend/models/lightweight_character_manager.py +66 -55

backend/models/lightweight_character_manager.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
-from peft import PeftModel, PeftConfig, set_peft_model_state_dict, get_peft_model_state_dict
 import logging
 from typing import Dict, List
 import os
@@ -11,20 +11,22 @@ from config import settings
 logger = logging.getLogger(__name__)
 class CharacterManager:
-    """Lightweight character manager that swaps LoRA adapters on a single base model"""
     def __init__(self):
         self.base_model = None
         self.tokenizer = None
         self.current_character = None
-        self.character_adapters = {}  # Store adapter weights, not full models
         self.character_prompts = {}
     async def initialize(self):
         """Initialize base model ONCE and load all character LoRA adapters"""
         logger.info("🔄 Loading base model (ONE instance for all characters)...")
-        model_name = "Qwen/Qwen2.5-0.5B-Instruct"  # Smaller model for HF Spaces
         try:
             self.tokenizer = AutoTokenizer.from_pretrained(
@@ -33,7 +35,7 @@ class CharacterManager:
                 use_fast=True
             )
-            # Load base model ONCE (CPU for HF Spaces free tier)
             self.base_model = AutoModelForCausalLM.from_pretrained(
                 model_name,
                 torch_dtype=torch.float32,
@@ -53,9 +55,46 @@ class CharacterManager:
         # Load character prompts
         self._load_character_prompts()
-        # Try to load LoRA adapters (optional - graceful degradation)
-        for character_id in ["moses", "samsung_employee", "jinx"]:
-            await self._load_character_adapter(character_id)
         logger.info("✅ Character manager initialized")
@@ -93,54 +132,22 @@ Speak with:
 NEVER mention biblical things or Samsung products."""
         }
-    async def _load_character_adapter(self, character_id: str):
-        """Try to load LoRA adapter weights (graceful failure if missing)"""
-        adapter_path = os.path.join(settings.LORA_ADAPTERS_PATH, character_id)
-        adapter_model_path = os.path.join(adapter_path, "adapter_model.safetensors")
-        if not os.path.exists(adapter_model_path):
-            logger.warning(f"⚠️ No LoRA adapter for {character_id} - will use prompts only")
-            return
-        try:
-            logger.info(f"Loading LoRA adapter for {character_id}...")
-            # Load adapter onto base model temporarily
-            model_with_adapter = PeftModel.from_pretrained(
-                self.base_model,
-                adapter_path,
-                adapter_name=character_id
-            )
-            # Extract and store just the adapter weights (tiny!)
-            self.character_adapters[character_id] = get_peft_model_state_dict(model_with_adapter)
-            # Clean up - we only need the weights
-            del model_with_adapter
-            torch.cuda.empty_cache() if torch.cuda.is_available() else None
-            logger.info(f"✅ Loaded LoRA adapter for {character_id}")
-        except Exception as e:
-            logger.warning(f"⚠️ Could not load LoRA for {character_id}: {e}")
-            logger.info(f"Will use system prompts only for {character_id}")
     def _switch_to_character(self, character_id: str):
-        """Switch to a character by loading their LoRA adapter (if available)"""
         if self.current_character == character_id:
-            return  # Already loaded
-        # If character has LoRA adapter, apply it
-        if character_id in self.character_adapters:
             try:
-                # Create PeftModel with this character's adapter
-                self.base_model = PeftModel(self.base_model, character_id)
-                set_peft_model_state_dict(self.base_model, self.character_adapters[character_id])
-                logger.info(f"✅ Switched to {character_id} with LoRA")
-            except:
-                logger.warning(f"⚠️ Using base model + prompts for {character_id}")
-        self.current_character = character_id
     def generate_response(
         self,
@@ -150,7 +157,7 @@ NEVER mention biblical things or Samsung products."""
     ) -> str:
         """Generate response as specific character"""
-        # Switch to character (applies LoRA if available)
         self._switch_to_character(character_id)
         # Build conversation with character prompt
@@ -175,10 +182,13 @@ NEVER mention biblical things or Samsung products."""
             truncation=True
         )
         # Generate
         try:
             with torch.no_grad():
-                outputs = self.base_model.generate(
                     **inputs,
                     max_new_tokens=100,
                     temperature=0.8,
@@ -230,3 +240,4 @@ NEVER mention biblical things or Samsung products."""
             "jinx": "*grins mischievously* Hey there! Ready for some chaos?"
         }
         return fallbacks.get(character_id, "Hello! How can I help you?")

 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from peft import PeftModel
 import logging
 from typing import Dict, List
 import os
 logger = logging.getLogger(__name__)
 class CharacterManager:
+    """Lightweight character manager using PEFT adapter switching"""
     def __init__(self):
         self.base_model = None
         self.tokenizer = None
+        self.peft_model = None  # Single PeftModel with multiple adapters
         self.current_character = None
         self.character_prompts = {}
+        self.available_adapters = []
     async def initialize(self):
         """Initialize base model ONCE and load all character LoRA adapters"""
         logger.info("🔄 Loading base model (ONE instance for all characters)...")
+        # MUST use Qwen3-0.6B - this is what the LoRA adapters were trained on!
+        model_name = "Qwen/Qwen3-0.6B"
         try:
             self.tokenizer = AutoTokenizer.from_pretrained(
                 use_fast=True
             )
+            # Load base model ONCE
             self.base_model = AutoModelForCausalLM.from_pretrained(
                 model_name,
                 torch_dtype=torch.float32,
         # Load character prompts
         self._load_character_prompts()
+        # Load first character's adapter to create PeftModel, then add others
+        characters = ["moses", "samsung_employee", "jinx"]
+        first_loaded = False
+        for idx, character_id in enumerate(characters):
+            adapter_path = os.path.join(settings.LORA_ADAPTERS_PATH, character_id)
+            adapter_model_path = os.path.join(adapter_path, "adapter_model.safetensors")
+            if not os.path.exists(adapter_model_path):
+                logger.warning(f"⚠️ No LoRA adapter for {character_id}")
+                continue
+            try:
+                if not first_loaded:
+                    # Load first adapter to create PeftModel
+                    logger.info(f"Loading first adapter: {character_id}...")
+                    self.peft_model = PeftModel.from_pretrained(
+                        self.base_model,
+                        adapter_path,
+                        adapter_name=character_id
+                    )
+                    first_loaded = True
+                    self.current_character = character_id
+                    self.available_adapters.append(character_id)
+                    logger.info(f"✅ Loaded {character_id} adapter (base)")
+                else:
+                    # Add additional adapters to existing PeftModel
+                    logger.info(f"Adding adapter: {character_id}...")
+                    self.peft_model.load_adapter(adapter_path, adapter_name=character_id)
+                    self.available_adapters.append(character_id)
+                    logger.info(f"✅ Added {character_id} adapter")
+            except Exception as e:
+                logger.warning(f"⚠️ Could not load LoRA for {character_id}: {e}")
+        if not first_loaded:
+            logger.warning("⚠️ No LoRA adapters loaded - using base model with prompts only")
+            self.peft_model = self.base_model
+        else:
+            logger.info(f"✅ Loaded {len(self.available_adapters)} character adapters: {self.available_adapters}")
         logger.info("✅ Character manager initialized")
 NEVER mention biblical things or Samsung products."""
         }
     def _switch_to_character(self, character_id: str):
+        """Switch active LoRA adapter to the specified character"""
         if self.current_character == character_id:
+            return  # Already active
+        if character_id in self.available_adapters and self.peft_model is not None:
             try:
+                # Switch to this character's adapter
+                self.peft_model.set_adapter(character_id)
+                self.current_character = character_id
+                logger.info(f"✅ Switched to {character_id} adapter")
+            except Exception as e:
+                logger.warning(f"⚠️ Could not switch to {character_id}: {e}")
+        else:
+            logger.info(f"Using base model for {character_id} (no adapter)")
+            self.current_character = character_id
     def generate_response(
         self,
     ) -> str:
         """Generate response as specific character"""
+        # Switch to character's adapter
         self._switch_to_character(character_id)
         # Build conversation with character prompt
             truncation=True
         )
+        # Use the correct model (PeftModel if adapters loaded, base model otherwise)
+        model = self.peft_model if self.peft_model is not None else self.base_model
         # Generate
         try:
             with torch.no_grad():
+                outputs = model.generate(
                     **inputs,
                     max_new_tokens=100,
                     temperature=0.8,
             "jinx": "*grins mischievously* Hey there! Ready for some chaos?"
         }
         return fallbacks.get(character_id, "Hello! How can I help you?")