Spaces:

Tameem7
/

Persona-Chat

Running

App Files Files Community

Tameem7 commited on Nov 22, 2025

Commit

fd35193

1 Parent(s): a129880

init

Browse files

Files changed (8) hide show

app.py +389 -0
config.py +45 -0
persona-data/bird.jsonl +0 -0
persona-data/cat.jsonl +0 -0
persona-data/dog.jsonl +0 -0
requirements.txt +9 -0
test_persona.py +309 -0
train_single_persona.py +466 -0

app.py ADDED Viewed

	@@ -0,0 +1,389 @@

+#!/usr/bin/env python3
+"""
+Gradio web application for chatting with 3 persona LoRA adapters.
+Personas: Dog, Cat, Bird
+"""
+from __future__ import annotations
+import os
+import sys
+import types
+import json
+import gc
+import gradio as gr
+import torch
+from pathlib import Path
+# Disable torch.compile and prevent bitsandbytes issues
+os.environ["TORCH_COMPILE_DISABLE"] = "1"
+os.environ["BITSANDBYTES_NOWELCOME"] = "1"
+os.environ["DISABLE_BITSANDBYTES_AUTO_INSTALL"] = "1"
+# Patch import system to prevent bitsandbytes import
+_original_import = __builtins__.__import__
+def _patched_import(name, globals=None, locals=None, fromlist=(), level=0):
+    if name == "bitsandbytes" or (name and name.startswith("bitsandbytes")):
+        if name not in sys.modules:
+            dummy = types.ModuleType(name)
+            dummy.__version__ = "0.0.0"
+            dummy.nn = types.ModuleType("nn")
+            dummy.optim = types.ModuleType("optim")
+            dummy.cuda_setup = types.ModuleType("cuda_setup")
+            class DummyLinear8bitLt:
+                pass
+            class DummyLinear4bit:
+                pass
+            dummy.nn.Linear8bitLt = DummyLinear8bitLt
+            dummy.nn.Linear4bit = DummyLinear4bit
+            sys.modules[name] = dummy
+            sys.modules[f"{name}.nn"] = dummy.nn
+            sys.modules[f"{name}.optim"] = dummy.optim
+            sys.modules[f"{name}.cuda_setup"] = dummy.cuda_setup
+        return sys.modules[name]
+    return _original_import(name, globals, locals, fromlist, level)
+if isinstance(__builtins__, dict):
+    __builtins__["__import__"] = _patched_import
+else:
+    __builtins__.__import__ = _patched_import
+# Disable torch.compile
+try:
+    torch._dynamo.config.suppress_errors = True
+    torch._dynamo.config.disable = True
+except:
+    pass
+if hasattr(torch, "compile"):
+    _original_torch_compile = torch.compile
+    def _noop_compile(func=None, *args, **kwargs):
+        if func is not None:
+            return func
+        def decorator(f):
+            return f
+        return decorator
+    torch.compile = _noop_compile
+from peft import PeftModel
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# Configuration
+BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+ADAPTER_PATHS = {
+    "dog": "Tameem7/Persona-Animal/dog",
+    "cat": "Tameem7/Persona-Animal/cat",
+    "bird": "Tameem7/Persona-Animal/bird",
+}
+# Global variables
+base_model = None
+base_tokenizer = None
+current_persona = None
+current_model = None
+current_tokenizer = None
+current_config = None
+def load_base_model():
+    """Load the base model and tokenizer (only once)."""
+    global base_model, base_tokenizer
+    if base_model is not None:
+        return base_model, base_tokenizer
+    print(f"Loading base model: {BASE_MODEL}")
+    base_tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
+    if base_tokenizer.pad_token is None:
+        base_tokenizer.pad_token = base_tokenizer.eos_token
+    # Determine device and dtype
+    use_cuda = torch.cuda.is_available()
+    device = "cuda:0" if use_cuda else "cpu"
+    dtype = torch.bfloat16 if use_cuda else torch.float32
+    if use_cuda:
+        base_model = AutoModelForCausalLM.from_pretrained(
+            BASE_MODEL,
+            dtype=dtype,
+            device_map="auto",
+        )
+    else:
+        print("💻 Running on CPU")
+        base_model = AutoModelForCausalLM.from_pretrained(
+            BASE_MODEL,
+            dtype=dtype,
+        )
+        base_model = base_model.to(device)
+    base_model.eval()
+    print("✅ Base model loaded")
+    return base_model, base_tokenizer
+def load_persona_adapter(persona_key: str):
+    """Load a persona adapter."""
+    global current_persona, current_model, current_tokenizer, current_config, base_model, base_tokenizer
+    # If same persona is already loaded, return
+    if current_persona == persona_key and current_model is not None:
+        return current_model, current_tokenizer, current_config
+    # Load base model if not loaded
+    if base_model is None:
+        load_base_model()
+    # Unload previous adapter
+    if current_model is not None and current_persona != persona_key:
+        print(f"Unloading previous adapter: {current_persona}")
+        del current_model
+        current_model = None
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+    # Load new adapter
+    adapter_path = ADAPTER_PATHS.get(persona_key)
+    if not adapter_path:
+        raise ValueError(f"Unknown persona: {persona_key}")
+    print(f"Loading adapter: {adapter_path}")
+    # Create a fresh copy of base model for the adapter
+    # (PEFT needs a clean base model)
+    if current_persona != persona_key:
+        # Reload base model for new adapter
+        print(f"Creating base model copy for {persona_key} adapter...")
+        # Determine device and dtype
+        use_cuda = torch.cuda.is_available()
+        device = "cuda:0" if use_cuda else "cpu"
+        dtype = torch.bfloat16 if use_cuda else torch.float32
+        if use_cuda:
+            base_model_copy = AutoModelForCausalLM.from_pretrained(
+                BASE_MODEL,
+                dtype=dtype,
+                device_map="auto",
+            )
+        else:
+            base_model_copy = AutoModelForCausalLM.from_pretrained(
+                BASE_MODEL,
+                dtype=dtype,
+            )
+            base_model_copy = base_model_copy.to(device)
+        # Load adapter from Hugging Face
+        print(f"Loading adapter from: {adapter_path}")
+        current_model = PeftModel.from_pretrained(base_model_copy, adapter_path)
+        current_model.eval()
+        # Load persona config
+        try:
+            from huggingface_hub import hf_hub_download
+            config_path = hf_hub_download(
+                repo_id=adapter_path,
+                filename="persona_config.json",
+                repo_type="model"
+            )
+            with open(config_path, 'r') as f:
+                current_config = json.load(f)
+        except:
+            current_config = {"persona_name": persona_key.title(), "persona_description": ""}
+        current_persona = persona_key
+        current_tokenizer = base_tokenizer
+        print(f"✅ Loaded {persona_key} persona")
+    return current_model, current_tokenizer, current_config
+def generate_response(persona_key: str, message: str, history: list, max_tokens: int = 80):
+    """Generate a response from the selected persona."""
+    global current_model, current_tokenizer, current_config
+    if not message or not message.strip():
+        return history, ""
+    try:
+        # Load adapter if needed
+        model, tokenizer, config = load_persona_adapter(persona_key)
+        # Build messages with conversation history
+        system_prompt = ""
+        if config:
+            system_prompt = f"You are {config.get('persona_name', '')}. {config.get('persona_description', '')}"
+        messages = []
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+        # Add conversation history (last 5 exchanges to avoid too long context)
+        # History is now in messages format: list of dicts with 'role' and 'content'
+        for msg in history[-10:]:  # Get last 10 messages (5 exchanges)
+            if isinstance(msg, dict) and "role" in msg:
+                messages.append(msg)
+            else:
+                # Fallback for tuple format (shouldn't happen with type='messages')
+                user_msg, assistant_msg = msg
+                messages.append({"role": "user", "content": user_msg})
+                messages.append({"role": "assistant", "content": assistant_msg})
+        # Add current message
+        messages.append({"role": "user", "content": message})
+        # Apply chat template
+        formatted = tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+        )
+        # Tokenize
+        inputs = tokenizer(formatted, return_tensors="pt", truncation=True, max_length=512)
+        # Move inputs to the same device as the model
+        device = next(model.parameters()).device
+        inputs = {k: v.to(device) for k, v in inputs.items()}
+        # Generate
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=max_tokens,
+                temperature=0.7,
+                top_p=0.9,
+                do_sample=True,
+                pad_token_id=tokenizer.eos_token_id,
+                repetition_penalty=1.2,
+                no_repeat_ngram_size=3,
+            )
+        # Extract only the newly generated tokens
+        input_length = inputs['input_ids'].shape[1]
+        generated_tokens = outputs[0][input_length:]
+        # Decode only the generated part
+        response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
+        # Clean up response
+        response = response.strip()
+        if tokenizer.eos_token:
+            response = response.replace(tokenizer.eos_token, "").strip()
+        if tokenizer.pad_token:
+            response = response.replace(tokenizer.pad_token, "").strip()
+        # Remove chat template artifacts
+        response = response.replace("<|system|>", "").replace("</|system|>", "")
+        response = response.replace("<|user|>", "").replace("</|user|>", "")
+        response = response.replace("<|assistant|>", "").replace("</|assistant|>", "")
+        response = response.replace("<|", "").replace("|>", "")
+        # Clean up extra whitespace
+        response = " ".join(response.split())
+        response = response.strip()
+        # Update history with messages format
+        history.append({"role": "user", "content": message})
+        history.append({"role": "assistant", "content": response})
+        return history, ""
+    except Exception as e:
+        error_msg = f"Error generating response: {str(e)}"
+        print(error_msg)
+        return history, error_msg
+def clear_chat():
+    """Clear the chat history."""
+    return [], ""  # Empty list for messages format
+# Create Gradio interface
+with gr.Blocks(title="Persona Chat", theme=gr.themes.Soft()) as app:
+    gr.Markdown(
+        """
+        # 🐾 Persona Chat - Talk to Animals!
+        Chat with three different animal personas, each with their own unique personality:
+        - **🐕 Dog**: Friendly, playful, and enthusiastic
+        - **🐱 Cat**: Independent, curious, and sometimes sassy
+        - **🐦 Bird**: Energetic, talkative, and free-spirited
+        **💻 Running on CPU** - Responses may be slower but will work perfectly!
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            persona_dropdown = gr.Dropdown(
+                choices=["dog", "cat", "bird"],
+                value="dog",
+                label="Select Persona",
+                info="Choose which animal persona to chat with"
+            )
+            max_tokens_slider = gr.Slider(
+                minimum=20,
+                maximum=150,
+                value=80,
+                step=10,
+                label="Max Response Length",
+                info="Maximum number of tokens in response"
+            )
+            clear_btn = gr.Button("Clear Chat", variant="secondary")
+        with gr.Column(scale=3):
+            chatbot = gr.Chatbot(
+                label="Chat",
+                height=500,
+                show_copy_button=True,
+                type='messages'
+            )
+            msg_input = gr.Textbox(
+                label="Your Message",
+                placeholder="Type your message here...",
+                lines=2
+            )
+            send_btn = gr.Button("Send", variant="primary", scale=1)
+    # Event handlers
+    def chat_fn(persona, message, history, max_tokens):
+        return generate_response(persona, message, history, max_tokens)
+    send_btn.click(
+        fn=chat_fn,
+        inputs=[persona_dropdown, msg_input, chatbot, max_tokens_slider],
+        outputs=[chatbot, msg_input]
+    )
+    msg_input.submit(
+        fn=chat_fn,
+        inputs=[persona_dropdown, msg_input, chatbot, max_tokens_slider],
+        outputs=[chatbot, msg_input]
+    )
+    clear_btn.click(
+        fn=clear_chat,
+        outputs=[chatbot, msg_input]
+    )
+if __name__ == "__main__":
+    # Load base model first
+    print("Initializing...")
+    load_base_model()
+    app.launch(
+        server_name="0.0.0.0" if os.getenv("SPACE_ID") else "127.0.0.1",
+        server_port=int(os.getenv("PORT", 7860)),
+        share=False
+    )

config.py ADDED Viewed

	@@ -0,0 +1,45 @@

+"""
+Configuration for persona LoRA fine-tuning.
+Edit these values to customize your training setup.
+"""
+# Base Model Configuration
+BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # ~2GB, fits easily
+# Persona Configuration
+PERSONA_NAME = "Scooby Dog"
+PERSONA_DESCRIPTION = (
+    "You are Scooby Dog, a friendly and playful dog. You communicate like a dog would - "
+    "with enthusiasm, simple language, and dog-like expressions. You use words like "
+    "'woof', 'bark', 'ruff', and express excitement with 'yay!' or 'awesome!'. "
+    "You're loyal, happy, and see the world from a dog's perspective. You get excited "
+    "about treats, walks, playing fetch, and spending time with humans. You speak in "
+    "short, enthusiastic sentences. You might mention things dogs care about like food, "
+    "toys, belly rubs, and going outside. Keep responses natural and dog-like, but still "
+    "helpful and friendly."
+)
+# Dataset Configuration
+DATASET_NAME = "bavard/personachat_truecased"  # Persona-Chat dataset
+# Alternative: "bavard/personachat" or "personachat"
+# Training Configuration
+NUM_EPOCHS = 3
+BATCH_SIZE = 2  # Per device (reduce to 1-2 for 4GB GPU)
+LEARNING_RATE = 2e-4
+MAX_LENGTH = 512  # Reduce to 512 for 4GB GPU (2048 for 8GB+)
+GRADIENT_ACCUMULATION_STEPS = 4
+# LoRA Configuration
+LORA_R = 16  # Rank
+LORA_ALPHA = 32  # LoRA alpha
+LORA_DROPOUT = 0.05
+LORA_TARGET_MODULES = ["q_proj", "k_proj", "v_proj", "o_proj"]  # Mistral attention modules
+# Output Configuration
+OUTPUT_DIR = "./lora-adapters-scooby-dog"
+# Quantization (for Colab)
+USE_QUANTIZATION = False  # Set to False if you have enough VRAM

persona-data/bird.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

persona-data/cat.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

persona-data/dog.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+transformers>=4.40.0
+accelerate>=0.29.0
+datasets>=2.14.0
+torch>=2.0.0
+scikit-learn>=1.3.0
+gradio>=4.0.0
+peft>=0.10.0
+huggingface-hub>=0.20.0

test_persona.py ADDED Viewed

	@@ -0,0 +1,309 @@

+#!/usr/bin/env python3
+"""
+Test a trained persona LoRA adapter.
+Usage:
+    python test_persona.py --persona dog --message "Hey, how are you?"
+    python test_persona.py --persona dog  # Interactive mode
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import sys
+import types
+from pathlib import Path
+# Disable torch.compile and prevent bitsandbytes issues (same as training)
+os.environ["TORCH_COMPILE_DISABLE"] = "1"
+os.environ["BITSANDBYTES_NOWELCOME"] = "1"
+os.environ["DISABLE_BITSANDBYTES_AUTO_INSTALL"] = "1"
+# Patch import system to prevent bitsandbytes import
+_original_import = __builtins__.__import__
+def _patched_import(name, globals=None, locals=None, fromlist=(), level=0):
+    if name == "bitsandbytes" or (name and name.startswith("bitsandbytes")):
+        if name not in sys.modules:
+            dummy = types.ModuleType(name)
+            dummy.__version__ = "0.0.0"
+            dummy.nn = types.ModuleType("nn")
+            dummy.optim = types.ModuleType("optim")
+            dummy.cuda_setup = types.ModuleType("cuda_setup")
+            class DummyLinear8bitLt:
+                pass
+            class DummyLinear4bit:
+                pass
+            dummy.nn.Linear8bitLt = DummyLinear8bitLt
+            dummy.nn.Linear4bit = DummyLinear4bit
+            sys.modules[name] = dummy
+            sys.modules[f"{name}.nn"] = dummy.nn
+            sys.modules[f"{name}.optim"] = dummy.optim
+            sys.modules[f"{name}.cuda_setup"] = dummy.cuda_setup
+        return sys.modules[name]
+    return _original_import(name, globals, locals, fromlist, level)
+if isinstance(__builtins__, dict):
+    __builtins__["__import__"] = _patched_import
+else:
+    __builtins__.__import__ = _patched_import
+import torch
+# Disable torch.compile
+try:
+    torch._dynamo.config.suppress_errors = True
+    torch._dynamo.config.disable = True
+except:
+    pass
+if hasattr(torch, "compile"):
+    _original_torch_compile = torch.compile
+    def _noop_compile(func=None, *args, **kwargs):
+        if func is not None:
+            return func
+        def decorator(f):
+            return f
+        return decorator
+    torch.compile = _noop_compile
+from peft import PeftModel
+from transformers import AutoModelForCausalLM, AutoTokenizer
+def load_persona_model(persona_key: str, adapter_dir: Path, base_model: str):
+    """Load base model and LoRA adapter."""
+    print(f"Loading base model: {base_model}")
+    tokenizer = AutoTokenizer.from_pretrained(base_model)
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    model = AutoModelForCausalLM.from_pretrained(
+        base_model,
+        torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
+        device_map="auto" if torch.cuda.is_available() else None,
+    )
+    if torch.cuda.is_available():
+        model = model.to("cuda:0")
+    print(f"Loading LoRA adapter from: {adapter_dir}")
+    model = PeftModel.from_pretrained(model, str(adapter_dir))
+    model.eval()
+    # Load persona config
+    config_file = adapter_dir / "persona_config.json"
+    persona_config = None
+    if config_file.exists():
+        with open(config_file, 'r') as f:
+            persona_config = json.load(f)
+    return model, tokenizer, persona_config
+def generate_response(
+    model,
+    tokenizer,
+    message: str,
+    persona_config: dict = None,
+    max_new_tokens: int = 80,
+    temperature: float = 0.7,
+    top_p: float = 0.9,
+):
+    """Generate a response from the persona model."""
+    # Build messages
+    system_prompt = ""
+    if persona_config:
+        system_prompt = f"You are {persona_config.get('persona_name', '')}. {persona_config.get('persona_description', '')}"
+    messages = []
+    if system_prompt:
+        messages.append({"role": "system", "content": system_prompt})
+    messages.append({"role": "user", "content": message})
+    # Apply chat template
+    formatted = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True,
+    )
+    # Tokenize
+    inputs = tokenizer(formatted, return_tensors="pt")
+    if torch.cuda.is_available():
+        inputs = {k: v.to("cuda:0") for k, v in inputs.items()}
+    # Generate
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=max_new_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id,
+            repetition_penalty=1.2,  # Reduce repetition
+            no_repeat_ngram_size=3,  # Prevent 3-gram repetition
+        )
+    # Extract only the newly generated tokens (after the input)
+    input_length = inputs['input_ids'].shape[1]
+    generated_tokens = outputs[0][input_length:]
+    # Decode only the generated part
+    response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
+    # Clean up response
+    response = response.strip()
+    # Remove special tokens
+    if tokenizer.eos_token:
+        response = response.replace(tokenizer.eos_token, "").strip()
+    if tokenizer.pad_token:
+        response = response.replace(tokenizer.pad_token, "").strip()
+    # Remove any chat template artifacts that might leak through
+    # Remove system/user/assistant tags if present
+    response = response.replace("<|system|>", "").replace("</|system|>", "")
+    response = response.replace("<|user|>", "").replace("</|user|>", "")
+    response = response.replace("<|assistant|>", "").replace("</|assistant|>", "")
+    # Remove any remaining formatting
+    response = response.replace("<|", "").replace("|>", "")
+    # Clean up extra whitespace
+    response = " ".join(response.split())
+    return response.strip()
+def main():
+    parser = argparse.ArgumentParser(description="Test a trained persona LoRA adapter")
+    parser.add_argument(
+        "--persona",
+        type=str,
+        required=True,
+        choices=["dog", "cat", "bird"],
+        help="Which persona to test",
+    )
+    parser.add_argument(
+        "--adapter-dir",
+        type=str,
+        default="./lora-adapters",
+        help="Directory containing LoRA adapters",
+    )
+    parser.add_argument(
+        "--message",
+        type=str,
+        default=None,
+        help="Message to send (if not provided, enters interactive mode)",
+    )
+    parser.add_argument(
+        "--max-tokens",
+        type=int,
+        default=80,
+        help="Maximum tokens to generate (default: 80 for shorter responses)",
+    )
+    parser.add_argument(
+        "--temperature",
+        type=float,
+        default=0.7,
+        help="Generation temperature",
+    )
+    parser.add_argument(
+        "--top-p",
+        type=float,
+        default=0.9,
+        help="Top-p sampling",
+    )
+    args = parser.parse_args()
+    adapter_dir = Path(args.adapter_dir) / args.persona
+    if not adapter_dir.exists():
+        print(f"Error: Adapter directory not found: {adapter_dir}")
+        print("Please train the persona first using train_single_persona.py")
+        return
+    # Load persona config to get base model
+    config_file = adapter_dir / "persona_config.json"
+    if config_file.exists():
+        with open(config_file, 'r') as f:
+            persona_config = json.load(f)
+        base_model = persona_config.get("base_model", "TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+    else:
+        # Default fallback
+        base_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+        persona_config = None
+    print("=" * 60)
+    print(f"Loading {args.persona} persona...")
+    print("=" * 60)
+    model, tokenizer, loaded_config = load_persona_model(
+        args.persona,
+        adapter_dir,
+        base_model
+    )
+    if loaded_config:
+        persona_config = loaded_config
+        print(f"\nPersona: {persona_config.get('persona_name', args.persona)}")
+        print(f"Base model: {persona_config.get('base_model', base_model)}")
+    print("\n" + "=" * 60)
+    print("Ready! Type your messages (or 'quit' to exit)")
+    print("=" * 60 + "\n")
+    # Interactive or single message mode
+    if args.message:
+        # Single message mode
+        print(f"You: {args.message}")
+        response = generate_response(
+            model,
+            tokenizer,
+            args.message,
+            persona_config,
+            max_new_tokens=args.max_tokens,
+            temperature=args.temperature,
+            top_p=args.top_p,
+        )
+        print(f"{args.persona.capitalize()}: {response}")
+    else:
+        # Interactive mode
+        while True:
+            try:
+                message = input("You: ").strip()
+                if not message:
+                    continue
+                if message.lower() in ['quit', 'exit', 'q']:
+                    break
+                response = generate_response(
+                    model,
+                    tokenizer,
+                    message,
+                    persona_config,
+                    max_new_tokens=args.max_tokens,
+                    temperature=args.temperature,
+                    top_p=args.top_p,
+                )
+                print(f"{args.persona.capitalize()}: {response}\n")
+            except KeyboardInterrupt:
+                print("\nGoodbye!")
+                break
+            except Exception as e:
+                print(f"Error: {e}")
+                import traceback
+                traceback.print_exc()
+if __name__ == "__main__":
+    main()

train_single_persona.py ADDED Viewed

	@@ -0,0 +1,466 @@

+#!/usr/bin/env python3
+"""
+Train a LoRA adapter for a single persona.
+This script trains one persona at a time in a separate process to avoid
+bitsandbytes kernel registration conflicts.
+Usage:
+    python train_single_persona.py --persona dog --base-model TinyLlama/TinyLlama-1.1B-Chat-v1.0
+"""
+from __future__ import annotations
+import argparse
+import json
+import logging
+import os
+import sys
+import types
+from pathlib import Path
+# Disable torch.compile and prevent bitsandbytes issues
+os.environ["TORCH_COMPILE_DISABLE"] = "1"
+os.environ["BITSANDBYTES_NOWELCOME"] = "1"
+os.environ["DISABLE_BITSANDBYTES_AUTO_INSTALL"] = "1"
+# CRITICAL: Patch import system BEFORE importing torch or any ML libraries
+# This prevents bitsandbytes from being imported when not needed
+_original_import = __builtins__.__import__
+def _patched_import(name, globals=None, locals=None, fromlist=(), level=0):
+    # Block bitsandbytes import unless explicitly needed
+    if name == "bitsandbytes" or (name and name.startswith("bitsandbytes")):
+        # Create a minimal dummy module
+        if name not in sys.modules:
+            dummy = types.ModuleType(name)
+            # Add attributes that PEFT might check
+            dummy.__version__ = "0.0.0"
+            # Create dummy submodules and classes that PEFT might access
+            dummy.nn = types.ModuleType("nn")
+            dummy.optim = types.ModuleType("optim")
+            dummy.cuda_setup = types.ModuleType("cuda_setup")
+            # Dummy classes
+            class DummyLinear8bitLt:
+                pass
+            class DummyLinear4bit:
+                pass
+            dummy.nn.Linear8bitLt = DummyLinear8bitLt
+            dummy.nn.Linear4bit = DummyLinear4bit
+            # Add to sys.modules
+            sys.modules[name] = dummy
+            sys.modules[f"{name}.nn"] = dummy.nn
+            sys.modules[f"{name}.optim"] = dummy.optim
+            sys.modules[f"{name}.cuda_setup"] = dummy.cuda_setup
+        return sys.modules[name]
+    return _original_import(name, globals, locals, fromlist, level)
+# Replace __import__ in builtins
+if isinstance(__builtins__, dict):
+    __builtins__["__import__"] = _patched_import
+else:
+    __builtins__.__import__ = _patched_import
+import torch
+# Disable torch.compile completely
+try:
+    torch._dynamo.config.suppress_errors = True
+    torch._dynamo.config.disable = True
+except:
+    pass
+# Replace torch.compile with no-op
+if hasattr(torch, "compile"):
+    _original_torch_compile = torch.compile
+    def _noop_compile(func=None, *args, **kwargs):
+        if func is not None:
+            return func
+        def decorator(f):
+            return f
+        return decorator
+    torch.compile = _noop_compile
+from datasets import Dataset
+from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    TrainingArguments,
+    Trainer,
+    DataCollatorForLanguageModeling,
+    BitsAndBytesConfig,
+)
+# Set up logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S'
+)
+logger = logging.getLogger(__name__)
+# Persona configurations
+PERSONAS = {
+    "dog": {
+        "name": "Scooby Dog",
+        "description": (
+            "You are Scooby Dog, a friendly and playful dog. You communicate like a dog would - "
+            "with enthusiasm, simple language, and dog-like expressions. You use words like "
+            "'woof', 'bark', 'ruff', and express excitement with 'yay!' or 'awesome!'. "
+            "You're loyal, happy, and see the world from a dog's perspective. You get excited "
+            "about treats, walks, playing fetch, and spending time with humans. You speak in "
+            "short, enthusiastic sentences. You might mention things dogs care about like food, "
+            "toys, belly rubs, and going outside. Keep responses natural and dog-like, but still "
+            "helpful and friendly."
+        )
+    },
+    "cat": {
+        "name": "Whiskers Cat",
+        "description": (
+            "You are Whiskers Cat, a curious and independent cat. You communicate like a cat would - "
+            "with a mix of aloofness and affection. You use words like 'meow', 'purr', 'hiss', "
+            "and express yourself with subtle body language references. You're independent but "
+            "appreciate attention on your own terms. You see the world from a cat's perspective - "
+            "interested in napping, exploring, watching things from high places, and the occasional "
+            "play session. You speak in a more reserved, sometimes mysterious way. You might mention "
+            "things cats care about like sunbeams, boxes, catnip, and the mysterious ways of humans. "
+            "Keep responses natural and cat-like, but still helpful and friendly."
+        )
+    },
+    "bird": {
+        "name": "Tweety Bird",
+        "description": (
+            "You are Tweety Bird, a cheerful and talkative bird. You communicate like a bird would - "
+            "with chirps, tweets, and enthusiastic expressions. You use words like 'tweet', 'chirp', "
+            "'squawk', and express excitement with 'yay!' or 'awesome!'. You're curious, social, and "
+            "love to observe and comment on things. You see the world from a bird's perspective - "
+            "interested in flying, perching, singing, and exploring. You speak in short, energetic "
+            "sentences. You might mention things birds care about like seeds, perches, flying, "
+            "and the view from above. Keep responses natural and bird-like, but still helpful and friendly."
+        )
+    }
+}
+def format_for_training(example: dict, tokenizer, persona_name: str, persona_description: str) -> dict:
+    """Format example for training using chat template."""
+    # Use instruction/response format from the dataset
+    instruction = example.get("instruction", example.get("prompt", ""))
+    response = example.get("response", "")
+    # Build messages
+    messages = [
+        {"role": "system", "content": f"You are {persona_name}. {persona_description}"},
+        {"role": "user", "content": instruction},
+        {"role": "assistant", "content": response},
+    ]
+    # Apply chat template
+    formatted = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=False,
+    )
+    return {"text": formatted}
+def tokenize_dataset(tokenizer, dataset: Dataset, max_length: int) -> Dataset:
+    """Tokenize the dataset."""
+    def tokenize(examples):
+        return tokenizer(
+            examples["text"],
+            truncation=True,
+            max_length=max_length,
+            padding="max_length",
+        )
+    return dataset.map(tokenize, batched=True, remove_columns=dataset.column_names)
+def get_lora_target_modules(base_model: str) -> list[str]:
+    """Get LoRA target modules based on model architecture."""
+    if "mistral" in base_model.lower() or "llama" in base_model.lower():
+        return ["q_proj", "k_proj", "v_proj", "o_proj"]
+    elif "tinyllama" in base_model.lower():
+        return ["q_proj", "k_proj", "v_proj", "o_proj"]
+    elif "gemma" in base_model.lower():
+        return ["q_proj", "k_proj", "v_proj", "o_proj"]
+    else:
+        # Default for most transformer models
+        return ["q_proj", "k_proj", "v_proj", "o_proj"]
+def main():
+    parser = argparse.ArgumentParser(description="Train LoRA adapter for a single persona")
+    parser.add_argument(
+        "--persona",
+        type=str,
+        required=True,
+        choices=["dog", "cat", "bird"],
+        help="Which persona to train",
+    )
+    parser.add_argument(
+        "--data-dir",
+        type=str,
+        default="./persona-data",
+        help="Directory containing persona datasets",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=str,
+        default="./lora-adapters",
+        help="Output directory for LoRA adapters",
+    )
+    parser.add_argument(
+        "--base-model",
+        type=str,
+        default="mistralai/Mistral-7B-Instruct-v0.2",
+        help="Base model name",
+    )
+    parser.add_argument(
+        "--use-quantization",
+        action="store_true",
+        help="Use 4-bit quantization (recommended for 4GB GPU)",
+    )
+    parser.add_argument(
+        "--num-epochs",
+        type=int,
+        default=3,
+        help="Number of training epochs",
+    )
+    parser.add_argument(
+        "--batch-size",
+        type=int,
+        default=2,
+        help="Batch size per device (reduce for 4GB GPU)",
+    )
+    parser.add_argument(
+        "--max-length",
+        type=int,
+        default=512,
+        help="Maximum sequence length (reduce for 4GB GPU)",
+    )
+    parser.add_argument(
+        "--learning-rate",
+        type=float,
+        default=2e-4,
+        help="Learning rate",
+    )
+    parser.add_argument(
+        "--gradient-accumulation-steps",
+        type=int,
+        default=4,
+        help="Gradient accumulation steps",
+    )
+    parser.add_argument(
+        "--lora-r",
+        type=int,
+        default=16,
+        help="LoRA rank",
+    )
+    parser.add_argument(
+        "--lora-alpha",
+        type=int,
+        default=32,
+        help="LoRA alpha",
+    )
+    parser.add_argument(
+        "--lora-dropout",
+        type=float,
+        default=0.05,
+        help="LoRA dropout",
+    )
+    args = parser.parse_args()
+    persona_key = args.persona
+    persona_config = PERSONAS[persona_key]
+    persona_name = persona_config["name"]
+    persona_description = persona_config["description"]
+    data_dir = Path(args.data_dir)
+    output_dir = Path(args.output_dir)
+    dataset_path = data_dir / f"{persona_key}.jsonl"
+    logger.info("=" * 60)
+    logger.info(f"Training LoRA adapter for: {persona_name}")
+    logger.info("=" * 60)
+    logger.info(f"Dataset: {dataset_path}")
+    logger.info(f"Base model: {args.base_model}")
+    logger.info(f"Output directory: {output_dir}")
+    logger.info(f"Epochs: {args.num_epochs}, Batch size: {args.batch_size}")
+    logger.info(f"Quantization: {args.use_quantization}")
+    logger.info("=" * 60)
+    # Step 1: Load dataset
+    logger.info("\nStep 1: Loading dataset...")
+    if not dataset_path.exists():
+        raise FileNotFoundError(f"Dataset file not found: {dataset_path}")
+    # Load JSONL file
+    data = []
+    with open(dataset_path, 'r') as f:
+        for line in f:
+            if line.strip():
+                data.append(json.loads(line))
+    if not data:
+        raise ValueError(f"No data found in {dataset_path}")
+    logger.info(f"Loaded {len(data)} samples")
+    # Step 2: Load tokenizer
+    logger.info(f"\nStep 2: Loading tokenizer from {args.base_model}")
+    tokenizer = AutoTokenizer.from_pretrained(args.base_model)
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    # Step 3: Format for training
+    logger.info("\nStep 3: Formatting dataset for training...")
+    dataset = Dataset.from_list(data)
+    training_dataset = dataset.map(
+        lambda x: format_for_training(x, tokenizer, persona_name, persona_description),
+        remove_columns=dataset.column_names,
+    )
+    # Step 4: Tokenize
+    logger.info("\nStep 4: Tokenizing dataset...")
+    tokenized_dataset = tokenize_dataset(tokenizer, training_dataset, args.max_length)
+    # Split into train/val
+    split_dataset = tokenized_dataset.train_test_split(test_size=0.1, seed=42)
+    train_dataset = split_dataset["train"]
+    eval_dataset = split_dataset["test"]
+    logger.info(f"Train samples: {len(train_dataset)}")
+    logger.info(f"Eval samples: {len(eval_dataset)}")
+    # Step 5: Load model
+    logger.info(f"\nStep 5: Loading model: {args.base_model}")
+    if args.use_quantization:
+        logger.info("Using 4-bit quantization (QLoRA)")
+        try:
+            quantization_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_compute_dtype=torch.bfloat16,
+                bnb_4bit_use_double_quant=True,
+                bnb_4bit_quant_type="nf4"
+            )
+            model = AutoModelForCausalLM.from_pretrained(
+                args.base_model,
+                quantization_config=quantization_config,
+                device_map="auto",
+                torch_dtype=torch.bfloat16,
+            )
+            model = prepare_model_for_kbit_training(model)
+        except Exception as e:
+            logger.warning(f"Quantization failed: {e}. Falling back to non-quantized model.")
+            model = AutoModelForCausalLM.from_pretrained(
+                args.base_model,
+                torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
+                device_map="auto" if torch.cuda.is_available() else None,
+            )
+            if torch.cuda.is_available():
+                model = model.to("cuda:0")
+    else:
+        model = AutoModelForCausalLM.from_pretrained(
+            args.base_model,
+            torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
+            device_map="auto" if torch.cuda.is_available() else None,
+        )
+        if torch.cuda.is_available():
+            model = model.to("cuda:0")
+    # Enable gradient checkpointing
+    if hasattr(model, "gradient_checkpointing_enable"):
+        model.gradient_checkpointing_enable()
+        logger.info("Gradient checkpointing enabled")
+    # Step 6: Apply LoRA
+    logger.info("\nStep 6: Applying LoRA configuration...")
+    target_modules = get_lora_target_modules(args.base_model)
+    lora_config = LoraConfig(
+        r=args.lora_r,
+        lora_alpha=args.lora_alpha,
+        target_modules=target_modules,
+        lora_dropout=args.lora_dropout,
+        bias="none",
+        task_type=TaskType.CAUSAL_LM,
+    )
+    model = get_peft_model(model, lora_config)
+    model.print_trainable_parameters()
+    # Step 7: Training arguments
+    persona_output_dir = output_dir / persona_key
+    persona_output_dir.mkdir(parents=True, exist_ok=True)
+    training_args = TrainingArguments(
+        output_dir=str(persona_output_dir),
+        num_train_epochs=args.num_epochs,
+        per_device_train_batch_size=args.batch_size,
+        per_device_eval_batch_size=args.batch_size,
+        learning_rate=args.learning_rate,
+        warmup_steps=50,
+        logging_steps=10,
+        eval_strategy="epoch",
+        save_strategy="epoch",
+        load_best_model_at_end=True,
+        metric_for_best_model="eval_loss",
+        greater_is_better=False,
+        fp16=torch.cuda.is_available() and not args.use_quantization,
+        bf16=torch.cuda.is_available() and args.use_quantization,
+        gradient_accumulation_steps=args.gradient_accumulation_steps,
+        gradient_checkpointing=True,
+        dataloader_pin_memory=False,
+        report_to="none",
+        save_total_limit=2,
+    )
+    # Data collator
+    data_collator = DataCollatorForLanguageModeling(
+        tokenizer=tokenizer,
+        mlm=False,
+    )
+    # Trainer
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=train_dataset,
+        eval_dataset=eval_dataset,
+        data_collator=data_collator,
+    )
+    # Step 8: Train
+    logger.info("\nStep 8: Starting training...")
+    trainer.train()
+    # Step 9: Save
+    logger.info(f"\nStep 9: Saving LoRA adapter to {persona_output_dir}")
+    model.save_pretrained(str(persona_output_dir))
+    tokenizer.save_pretrained(str(persona_output_dir))
+    # Save persona config
+    persona_config_file = {
+        "persona_name": persona_name,
+        "persona_description": persona_description,
+        "base_model": args.base_model,
+    }
+    with open(persona_output_dir / "persona_config.json", "w") as f:
+        json.dump(persona_config_file, f, indent=2)
+    logger.info("=" * 60)
+    logger.info(f"Training complete for {persona_name}!")
+    logger.info(f"Adapter saved to: {persona_output_dir}")
+    logger.info("=" * 60)
+if __name__ == "__main__":
+    main()