File size: 14,994 Bytes

3df89a1

"""

Main KerdosAgent class that orchestrates the training and deployment process.

"""

from typing import Optional, Union, Dict, Any, List
from pathlib import Path
import torch
import logging
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    GenerationConfig
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import warnings

from .trainer import Trainer
from .deployer import Deployer
from .data_processor import DataProcessor

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

class KerdosAgent:
    """

    Main agent class for training and deploying LLMs with custom data.

    """
    
    def __init__(

        self,

        base_model: str,

        training_data: Union[str, Path],

        device: Optional[str] = None,

        **kwargs

    ):
        """

        Initialize the KerdosAgent.

        

        Args:

            base_model: Name or path of the base LLM model

            training_data: Path to the training data

            device: Device to run the model on (cuda/cpu)

            **kwargs: Additional configuration parameters

        """
        self.base_model = base_model
        self.training_data = Path(training_data) if training_data else None
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.config = kwargs
        
        logger.info(f"Initializing KerdosAgent with base model: {base_model}")
        logger.info(f"Using device: {self.device}")
        
        # Validate configuration
        self._validate_config()
        
        # Initialize components
        try:
            quantization_config = self._get_quantization_config()
            self.model = AutoModelForCausalLM.from_pretrained(
                base_model,
                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
                device_map="auto",
                quantization_config=quantization_config,
                trust_remote_code=kwargs.get('trust_remote_code', False)
            )
            self.tokenizer = AutoTokenizer.from_pretrained(
                base_model,
                trust_remote_code=kwargs.get('trust_remote_code', False)
            )
            
            # Set pad token if not present
            if self.tokenizer.pad_token is None:
                self.tokenizer.pad_token = self.tokenizer.eos_token
                self.model.config.pad_token_id = self.model.config.eos_token_id
            
            # Initialize other components
            if self.training_data:
                self.data_processor = DataProcessor(self.training_data)
            else:
                self.data_processor = None
                
            self.trainer = Trainer(self.model, self.tokenizer, self.device)
            self.deployer = Deployer(self.model, self.tokenizer)
            
            logger.info("KerdosAgent initialized successfully")
            
        except Exception as e:
            logger.error(f"Error initializing KerdosAgent: {str(e)}")
            raise
    
    def train(

        self,

        epochs: int = 3,

        batch_size: int = 4,

        learning_rate: float = 2e-5,

        **kwargs

    ) -> Dict[str, Any]:
        """

        Train the model on the provided data.

        

        Args:

            epochs: Number of training epochs

            batch_size: Training batch size

            learning_rate: Learning rate for training

            **kwargs: Additional training parameters

            

        Returns:

            Dictionary containing training metrics

        """
        # Process training data
        train_dataset = self.data_processor.prepare_dataset()
        
        # Train the model
        training_args = {
            "epochs": epochs,
            "batch_size": batch_size,
            "learning_rate": learning_rate,
            **kwargs
        }
        
        metrics = self.trainer.train(train_dataset, **training_args)
        return metrics
    
    def deploy(

        self,

        deployment_type: str = "rest",

        host: str = "0.0.0.0",

        port: int = 8000,

        **kwargs

    ) -> None:
        """

        Deploy the trained model.

        

        Args:

            deployment_type: Type of deployment (rest/docker/kubernetes)

            host: Host address for REST API

            port: Port number for REST API

            **kwargs: Additional deployment parameters

        """
        deployment_args = {
            "deployment_type": deployment_type,
            "host": host,
            "port": port,
            **kwargs
        }
        
        self.deployer.deploy(**deployment_args)
    
    def save(self, output_dir: Union[str, Path]) -> None:
        """

        Save the trained model and tokenizer.

        

        Args:

            output_dir: Directory to save the model

        """
        output_dir = Path(output_dir)
        output_dir.mkdir(parents=True, exist_ok=True)
        
        self.model.save_pretrained(output_dir)
        self.tokenizer.save_pretrained(output_dir)
    
    def generate(

        self,

        prompt: str,

        max_length: int = 100,

        temperature: float = 0.7,

        top_p: float = 0.9,

        top_k: int = 50,

        num_return_sequences: int = 1,

        **kwargs

    ) -> Union[str, List[str]]:
        """

        Generate text from a prompt.

        

        Args:

            prompt: Input text prompt

            max_length: Maximum length of generated text

            temperature: Sampling temperature

            top_p: Nucleus sampling parameter

            top_k: Top-k sampling parameter

            num_return_sequences: Number of sequences to generate

            **kwargs: Additional generation parameters

            

        Returns:

            Generated text or list of generated texts

        """
        try:
            logger.info(f"Generating text from prompt: {prompt[:50]}...")
            
            # Tokenize input
            inputs = self.tokenizer(
                prompt,
                return_tensors="pt",
                padding=True,
                truncation=True
            ).to(self.device)
            
            # Set up generation config
            generation_config = GenerationConfig(
                max_length=max_length,
                temperature=temperature,
                top_p=top_p,
                top_k=top_k,
                num_return_sequences=num_return_sequences,
                pad_token_id=self.tokenizer.pad_token_id,
                eos_token_id=self.tokenizer.eos_token_id,
                **kwargs
            )
            
            # Generate
            self.model.eval()
            with torch.no_grad():
                outputs = self.model.generate(
                    **inputs,
                    generation_config=generation_config
                )
            
            # Decode outputs
            generated_texts = [
                self.tokenizer.decode(output, skip_special_tokens=True)
                for output in outputs
            ]
            
            logger.info(f"Generated {len(generated_texts)} sequence(s)")
            
            return generated_texts[0] if num_return_sequences == 1 else generated_texts
            
        except Exception as e:
            logger.error(f"Error generating text: {str(e)}")
            raise
    
    def inference(

        self,

        texts: List[str],

        batch_size: int = 8,

        **kwargs

    ) -> List[str]:
        """

        Run batch inference on multiple texts.

        

        Args:

            texts: List of input texts

            batch_size: Batch size for inference

            **kwargs: Additional generation parameters

            

        Returns:

            List of generated texts

        """
        try:
            logger.info(f"Running inference on {len(texts)} texts")
            
            results = []
            self.model.eval()
            
            for i in range(0, len(texts), batch_size):
                batch = texts[i:i + batch_size]
                
                # Tokenize batch
                inputs = self.tokenizer(
                    batch,
                    return_tensors="pt",
                    padding=True,
                    truncation=True
                ).to(self.device)
                
                # Generate
                with torch.no_grad():
                    outputs = self.model.generate(
                        **inputs,
                        pad_token_id=self.tokenizer.pad_token_id,
                        **kwargs
                    )
                
                # Decode
                batch_results = [
                    self.tokenizer.decode(output, skip_special_tokens=True)
                    for output in outputs
                ]
                results.extend(batch_results)
            
            logger.info(f"Inference completed for {len(results)} texts")
            return results
            
        except Exception as e:
            logger.error(f"Error during inference: {str(e)}")
            raise
    
    def prepare_for_training(

        self,

        use_lora: bool = True,

        lora_r: int = 8,

        lora_alpha: int = 32,

        lora_dropout: float = 0.1,

        target_modules: Optional[List[str]] = None,

        use_4bit: bool = False,

        use_8bit: bool = False

    ) -> None:
        """

        Prepare the model for efficient training using LoRA and/or quantization.

        

        Args:

            use_lora: Whether to use LoRA (Low-Rank Adaptation)

            lora_r: LoRA rank

            lora_alpha: LoRA alpha parameter

            lora_dropout: LoRA dropout rate

            target_modules: List of module names to apply LoRA to

            use_4bit: Whether to use 4-bit quantization

            use_8bit: Whether to use 8-bit quantization

        """
        try:
            logger.info("Preparing model for training")
            
            # Prepare model for k-bit training if quantization is used
            if use_4bit or use_8bit:
                logger.info("Preparing model for k-bit training")
                self.model = prepare_model_for_kbit_training(self.model)
            
            # Apply LoRA if requested
            if use_lora:
                logger.info(f"Applying LoRA with r={lora_r}, alpha={lora_alpha}")
                
                if target_modules is None:
                    # Default target modules for common architectures
                    target_modules = ["q_proj", "v_proj", "k_proj", "o_proj"]
                
                lora_config = LoraConfig(
                    r=lora_r,
                    lora_alpha=lora_alpha,
                    target_modules=target_modules,
                    lora_dropout=lora_dropout,
                    bias="none",
                    task_type="CAUSAL_LM"
                )
                
                self.model = get_peft_model(self.model, lora_config)
                self.model.print_trainable_parameters()
            
            logger.info("Model prepared for training successfully")
            
        except Exception as e:
            logger.error(f"Error preparing model for training: {str(e)}")
            raise
    
    def _validate_config(self) -> None:
        """

        Validate the agent configuration.

        

        Raises:

            ValueError: If configuration is invalid

        """
        if not self.base_model:
            raise ValueError("base_model must be specified")
        
        if self.config.get('use_4bit') and self.config.get('use_8bit'):
            raise ValueError("Cannot use both 4-bit and 8-bit quantization")
        
        logger.debug("Configuration validated successfully")
    
    def _get_quantization_config(self) -> Optional[BitsAndBytesConfig]:
        """

        Get quantization configuration if requested.

        

        Returns:

            BitsAndBytesConfig or None

        """
        if self.config.get('use_4bit'):
            logger.info("Using 4-bit quantization")
            return BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_compute_dtype=torch.float16,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_use_double_quant=True
            )
        elif self.config.get('use_8bit'):
            logger.info("Using 8-bit quantization")
            return BitsAndBytesConfig(
                load_in_8bit=True
            )
        return None
    
    def get_model_info(self) -> Dict[str, Any]:
        """

        Get information about the current model.

        

        Returns:

            Dictionary containing model information

        """
        total_params = sum(p.numel() for p in self.model.parameters())
        trainable_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
        
        return {
            "base_model": self.base_model,
            "device": self.device,
            "total_parameters": total_params,
            "trainable_parameters": trainable_params,
            "trainable_percentage": (trainable_params / total_params) * 100 if total_params > 0 else 0,
            "model_dtype": str(next(self.model.parameters()).dtype),
            "config": self.config
        }
    
    @classmethod
    def load(cls, model_dir: Union[str, Path], **kwargs) -> "KerdosAgent":
        """

        Load a trained model from disk.

        

        Args:

            model_dir: Directory containing the saved model

            **kwargs: Additional initialization parameters

            

        Returns:

            Loaded KerdosAgent instance

        """
        try:
            model_dir = Path(model_dir)
            
            if not model_dir.exists():
                raise FileNotFoundError(f"Model directory {model_dir} does not exist")
            
            logger.info(f"Loading model from {model_dir}")
            
            # Create agent with loaded model
            agent = cls(
                base_model=str(model_dir),
                training_data=None,  # Not needed for loading
                **kwargs
            )
            
            logger.info("Model loaded successfully")
            return agent
            
        except Exception as e:
            logger.error(f"Error loading model: {str(e)}")
            raise