#!/usr/bin/env python3 # Copyright (C) 2024 Louis Chua Bean Chong # # This file is part of OpenLLM. # # OpenLLM is dual-licensed: # 1. For open source use: GNU General Public License v3.0 # 2. For commercial use: Commercial License (contact for details) # # See LICENSE and docs/LICENSES.md for full license information. """ OpenLLM Model Export Script This script implements Step 6 of the training pipeline: Model Export & Deployment. It exports trained OpenLLM models to various formats for production inference. Supported Formats: - PyTorch native format (for Python inference) - Hugging Face format (for ecosystem compatibility) - ONNX format (for optimized cross-platform inference) Usage: # PyTorch format python core/src/export_model.py \ --model_dir models/small-extended-4k \ --format pytorch \ --output_dir exports/pytorch/ # Hugging Face format python core/src/export_model.py \ --model_dir models/small-extended-4k \ --format huggingface \ --output_dir exports/huggingface/ # ONNX format python core/src/export_model.py \ --model_dir models/small-extended-4k \ --format onnx \ --output_dir exports/onnx/ \ --optimize_for_inference Author: Louis Chua Bean Chong License: GPLv3 """ import argparse import json import os import shutil import sys from pathlib import Path from typing import Dict import torch # Add current directory to path for imports sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from model import create_model class ModelExporter: """ Comprehensive model exporter for OpenLLM models. Handles export to multiple formats including PyTorch, Hugging Face, and ONNX for different deployment scenarios. """ def __init__(self, model_dir: str, output_dir: str): """ Initialize the model exporter. Args: model_dir: Directory containing trained model checkpoints output_dir: Base directory for exported models """ self.model_dir = Path(model_dir) self.output_dir = Path(output_dir) self.output_dir.mkdir(parents=True, exist_ok=True) # Load model and metadata self.model, self.config, self.training_info = self._load_model() self.tokenizer_path = self._find_tokenizer() print("šŸ”§ ModelExporter initialized") print(f" Model: {self.config.model_name}") print(f" Parameters: {self.model.get_num_params():,}") print(f" Output directory: {output_dir}") def _load_model(self): """Load model from checkpoint directory.""" # Find best model checkpoint best_model_path = self.model_dir / "best_model.pt" if not best_model_path.exists(): # Look for latest checkpoint checkpoints = list(self.model_dir.glob("checkpoint_step_*.pt")) if not checkpoints: raise FileNotFoundError(f"No model checkpoints found in {self.model_dir}") # Get latest checkpoint latest_checkpoint = max(checkpoints, key=lambda p: int(p.stem.split("_")[-1])) best_model_path = latest_checkpoint print(f"šŸ“‚ Loading model from {best_model_path}") # Load checkpoint checkpoint = torch.load(best_model_path, map_location="cpu") # Determine model size from config config_dict = checkpoint.get("config", {}) n_layer = config_dict.get("n_layer", 12) if n_layer <= 6: model_size = "small" elif n_layer <= 12: model_size = "medium" else: model_size = "large" # Create and load model model = create_model(model_size) model.load_state_dict(checkpoint["model_state_dict"]) model.eval() # Set to evaluation mode # Extract training info training_info = { "step": checkpoint.get("step", 0), "best_loss": checkpoint.get("best_loss", 0.0), "model_size": model_size, } return model, model.config, training_info def _find_tokenizer(self): """Find tokenizer path.""" # Try multiple possible locations possible_paths = [ self.model_dir.parent / "tokenizer" / "tokenizer.model", Path("data/tokenizer/tokenizer.model"), self.model_dir / "tokenizer.model", ] for path in possible_paths: if path.exists(): return str(path) raise FileNotFoundError("Tokenizer not found in expected locations") def export_pytorch(self) -> str: """ Export model in PyTorch native format. Returns: Path to exported model directory """ output_path = self.output_dir / "pytorch" output_path.mkdir(parents=True, exist_ok=True) print("šŸ”„ Exporting to PyTorch format...") # Save model state dict model_path = output_path / "model.pt" torch.save( { "model_state_dict": self.model.state_dict(), "config": self.config.__dict__, "training_info": self.training_info, }, model_path, ) # Save configuration config_path = output_path / "config.json" with open(config_path, "w") as f: json.dump( { "model_config": self.config.__dict__, "training_info": self.training_info, "export_format": "pytorch", }, f, indent=2, ) # Copy tokenizer tokenizer_out = output_path / "tokenizer.model" shutil.copy2(self.tokenizer_path, tokenizer_out) # Create loading script self._create_pytorch_loader(output_path) print(f"āœ… PyTorch export completed: {output_path}") return str(output_path) def export_huggingface(self) -> str: """ Export model in Hugging Face compatible format. Returns: Path to exported model directory """ output_path = self.output_dir / "huggingface" output_path.mkdir(parents=True, exist_ok=True) print("šŸ”„ Exporting to Hugging Face format...") # Save model weights in HF format model_path = output_path / "pytorch_model.bin" torch.save(self.model.state_dict(), model_path) # Create HF-compatible config hf_config = { "architectures": ["GPTModel"], "model_type": "gpt", "vocab_size": self.config.vocab_size, "n_layer": self.config.n_layer, "n_head": self.config.n_head, "n_embd": self.config.n_embd, "block_size": self.config.block_size, "dropout": self.config.dropout, "bias": self.config.bias, "torch_dtype": "float32", "transformers_version": "4.0.0", "openllm_version": "0.1.0", "training_steps": self.training_info["step"], "model_size": self.training_info["model_size"], } config_path = output_path / "config.json" with open(config_path, "w") as f: json.dump(hf_config, f, indent=2) # Copy tokenizer with HF naming shutil.copy2(self.tokenizer_path, output_path / "tokenizer.model") # Create tokenizer config tokenizer_config = { "tokenizer_class": "SentencePieceTokenizer", "model_max_length": self.config.block_size, "vocab_size": self.config.vocab_size, "unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", } with open(output_path / "tokenizer_config.json", "w") as f: json.dump(tokenizer_config, f, indent=2) # Create generation config generation_config = { "max_length": 512, "max_new_tokens": 256, "temperature": 0.7, "top_k": 40, "top_p": 0.9, "do_sample": True, "pad_token_id": 0, "eos_token_id": 1, "bos_token_id": 2, } with open(output_path / "generation_config.json", "w") as f: json.dump(generation_config, f, indent=2) # Create HF loading script self._create_hf_loader(output_path) print(f"āœ… Hugging Face export completed: {output_path}") return str(output_path) def export_onnx(self, optimize_for_inference: bool = False) -> str: """ Export model to ONNX format for optimized inference. Args: optimize_for_inference: Whether to apply ONNX optimizations Returns: Path to exported ONNX model """ try: import onnx import onnxruntime except ImportError: raise ImportError("ONNX export requires: pip install onnx onnxruntime") output_path = self.output_dir / "onnx" output_path.mkdir(parents=True, exist_ok=True) print("šŸ”„ Exporting to ONNX format...") # Prepare model for export self.model.eval() # Create dummy input for tracing batch_size = 1 seq_len = 64 # Use shorter sequence for compatibility dummy_input = torch.randint(0, self.config.vocab_size, (batch_size, seq_len)) # Export to ONNX onnx_path = output_path / "model.onnx" torch.onnx.export( self.model, dummy_input, onnx_path, export_params=True, opset_version=11, do_constant_folding=True, input_names=["input_ids"], output_names=["logits"], dynamic_axes={ "input_ids": {0: "batch_size", 1: "sequence_length"}, "logits": {0: "batch_size", 1: "sequence_length"}, }, ) # Verify ONNX model onnx_model = onnx.load(str(onnx_path)) onnx.checker.check_model(onnx_model) # Apply optimizations if requested if optimize_for_inference: self._optimize_onnx_model(onnx_path) # Save metadata metadata = { "model_config": self.config.__dict__, "training_info": self.training_info, "export_format": "onnx", "input_shape": [batch_size, seq_len], "input_names": ["input_ids"], "output_names": ["logits"], "optimized": optimize_for_inference, } with open(output_path / "metadata.json", "w") as f: json.dump(metadata, f, indent=2) # Copy tokenizer shutil.copy2(self.tokenizer_path, output_path / "tokenizer.model") # Create ONNX inference script self._create_onnx_inference(output_path) print(f"āœ… ONNX export completed: {onnx_path}") return str(onnx_path) def _optimize_onnx_model(self, onnx_path: Path): """Apply ONNX optimizations for inference.""" try: import onnxruntime from onnxruntime.tools import optimizer print("šŸ”§ Applying ONNX optimizations...") # Create optimized model optimized_path = onnx_path.parent / "model_optimized.onnx" # Apply graph optimizations optimizer.optimize_model( str(onnx_path), str(optimized_path), optimization_level=optimizer.OptimizationLevel.ORT_ENABLE_ALL, ) # Replace original with optimized shutil.move(str(optimized_path), str(onnx_path)) print("āœ… ONNX optimizations applied") except ImportError: print("āš ļø ONNX optimization requires onnxruntime-tools") except Exception as e: print(f"āš ļø ONNX optimization failed: {e}") def _create_pytorch_loader(self, output_path: Path): """Create PyTorch model loader script.""" loader_script = '''#!/usr/bin/env python3 """ PyTorch Model Loader for OpenLLM Usage: from load_model import load_model, generate_text model, tokenizer, config = load_model(".") text = generate_text(model, tokenizer, "Hello world", max_length=50) print(text) """ import torch import json import sentencepiece as spm from pathlib import Path def load_model(model_dir="."): """Load OpenLLM model from PyTorch export.""" model_dir = Path(model_dir) # Load config with open(model_dir / "config.json", 'r') as f: config_data = json.load(f) model_config = config_data['model_config'] # Recreate model architecture (you'll need to have the model.py file) # This is a simplified loader - in practice you'd import your GPTModel class print(f"Model config: {model_config}") print("Note: You need to import and create the actual model class") # Load model state checkpoint = torch.load(model_dir / "model.pt", map_location='cpu') # Load tokenizer tokenizer = smp.SentencePieceProcessor() tokenizer.load(str(model_dir / "tokenizer.model")) return None, tokenizer, model_config # Placeholder def generate_text(model, tokenizer, prompt, max_length=100): """Generate text using the loaded model.""" # Implement text generation return f"Generated text for: {prompt}" if __name__ == "__main__": model, tokenizer, config = load_model() print(f"Model loaded with {config.get('vocab_size', 'unknown')} vocabulary size") ''' with open(output_path / "load_model.py", "w") as f: f.write(loader_script) def _create_hf_loader(self, output_path: Path): """Create Hugging Face model loader script.""" loader_script = '''#!/usr/bin/env python3 """ Hugging Face Compatible Loader for OpenLLM Usage: # Using transformers library (if you implement custom model class) # from transformers import AutoModel, AutoTokenizer # model = AutoModel.from_pretrained(".") # tokenizer = AutoTokenizer.from_pretrained(".") # Manual loading from load_hf_model import load_model_manual model, tokenizer = load_model_manual(".") """ import torch import json import sentencepiece as smp from pathlib import Path def load_model_manual(model_dir="."): """Manually load model in HF format.""" model_dir = Path(model_dir) # Load config with open(model_dir / "config.json", 'r') as f: config = json.load(f) # Load model weights state_dict = torch.load(model_dir / "pytorch_model.bin", map_location='cpu') # Load tokenizer tokenizer = smp.SentencePieceProcessor() tokenizer.load(str(model_dir / "tokenizer.model")) print(f"Loaded model: {config['model_type']} with {config['n_layer']} layers") print(f"Vocabulary size: {config['vocab_size']}") return state_dict, tokenizer if __name__ == "__main__": state_dict, tokenizer = load_model_manual() print(f"Model weights loaded: {len(state_dict)} parameters") print(f"Tokenizer vocabulary: {tokenizer.vocab_size()}") ''' with open(output_path / "load_hf_model.py", "w") as f: f.write(loader_script) def _create_onnx_inference(self, output_path: Path): """Create ONNX inference script.""" inference_script = '''#!/usr/bin/env python3 """ ONNX Inference for OpenLLM Usage: from onnx_inference import ONNXInference inference = ONNXInference(".") output = inference.generate("Hello world", max_length=50) print(output) """ import numpy as np import json import sentencepiece as smp from pathlib import Path try: import onnxruntime as ort except ImportError: print("Install onnxruntime: pip install onnxruntime") ort = None class ONNXInference: def __init__(self, model_dir="."): if ort is None: raise ImportError("onnxruntime not available") model_dir = Path(model_dir) # Load ONNX model self.session = ort.InferenceSession(str(model_dir / "model.onnx")) # Load metadata with open(model_dir / "metadata.json", 'r') as f: self.metadata = json.load(f) # Load tokenizer self.tokenizer = smp.SentencePieceProcessor() self.tokenizer.load(str(model_dir / "tokenizer.model")) print(f"ONNX model loaded: {self.metadata['model_config']['model_name']}") def predict(self, input_ids): """Run inference on input token IDs.""" # Prepare input input_data = {"input_ids": input_ids.astype(np.int64)} # Run inference outputs = self.session.run(None, input_data) return outputs[0] # logits def generate(self, prompt, max_length=50, temperature=0.7): """Generate text from prompt.""" # Tokenize prompt tokens = self.tokenizer.encode(prompt) input_ids = np.array([tokens], dtype=np.int64) # Simple greedy generation (can be improved) generated = tokens.copy() for _ in range(max_length): if len(generated) >= 512: # Max sequence length break # Get current input (last 64 tokens to fit ONNX model) current_input = np.array([generated[-64:]], dtype=np.int64) # Predict next token logits = self.predict(current_input) next_token_logits = logits[0, -1, :] # Last position # Apply temperature and sample if temperature > 0: next_token_logits = next_token_logits / temperature probs = np.exp(next_token_logits) / np.sum(np.exp(next_token_logits)) next_token = np.random.choice(len(probs), p=probs) else: next_token = np.argmax(next_token_logits) generated.append(int(next_token)) # Decode generated text generated_text = self.tokenizer.decode(generated[len(tokens):]) return generated_text if __name__ == "__main__": inference = ONNXInference() result = inference.generate("The future of AI is", max_length=30) print(f"Generated: {result}") ''' with open(output_path / "onnx_inference.py", "w") as f: f.write(inference_script) def export_all_formats(self, optimize_onnx: bool = False) -> Dict[str, str]: """ Export model to all supported formats. Args: optimize_onnx: Whether to optimize ONNX model Returns: Dictionary mapping format names to export paths """ results = {} print("šŸš€ Exporting to all formats...") try: results["pytorch"] = self.export_pytorch() except Exception as e: print(f"āŒ PyTorch export failed: {e}") try: results["huggingface"] = self.export_huggingface() except Exception as e: print(f"āŒ Hugging Face export failed: {e}") try: results["onnx"] = self.export_onnx(optimize_onnx) except Exception as e: print(f"āŒ ONNX export failed: {e}") # Create summary summary = { "export_timestamp": torch.datetime.now().isoformat(), "model_info": { "name": self.config.model_name, "parameters": self.model.get_num_params(), "training_steps": self.training_info["step"], "best_loss": self.training_info["best_loss"], }, "exports": results, } with open(self.output_dir / "export_summary.json", "w") as f: json.dump(summary, f, indent=2) print(f"āœ… Export summary saved: {self.output_dir / 'export_summary.json'}") return results def main(): """Main export function.""" parser = argparse.ArgumentParser( description="Export OpenLLM models to various formats", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Export to PyTorch format python core/src/export_model.py \\ --model_dir models/small-extended-4k \\ --format pytorch \\ --output_dir exports/pytorch/ # Export to Hugging Face format python core/src/export_model.py \\ --model_dir models/small-extended-4k \\ --format huggingface \\ --output_dir exports/huggingface/ # Export to ONNX with optimizations python core/src/export_model.py \\ --model_dir models/small-extended-4k \\ --format onnx \\ --output_dir exports/onnx/ \\ --optimize_for_inference # Export to all formats python core/src/export_model.py \\ --model_dir models/small-extended-4k \\ --format all \\ --output_dir exports/ """, ) parser.add_argument( "--model_dir", required=True, help="Directory containing trained model checkpoints" ) parser.add_argument( "--format", choices=["pytorch", "huggingface", "onnx", "all"], required=True, help="Export format", ) parser.add_argument("--output_dir", required=True, help="Output directory for exported models") parser.add_argument( "--optimize_for_inference", action="store_true", help="Apply optimizations for inference (ONNX only)", ) args = parser.parse_args() print("šŸ“¦ OpenLLM Model Export") print("=" * 50) try: # Create exporter exporter = ModelExporter(args.model_dir, args.output_dir) # Export based on format if args.format == "pytorch": result = exporter.export_pytorch() print(f"\nāœ… PyTorch export completed: {result}") elif args.format == "huggingface": result = exporter.export_huggingface() print(f"\nāœ… Hugging Face export completed: {result}") elif args.format == "onnx": result = exporter.export_onnx(args.optimize_for_inference) print(f"\nāœ… ONNX export completed: {result}") elif args.format == "all": results = exporter.export_all_formats(args.optimize_for_inference) print("\nāœ… All formats exported:") for fmt, path in results.items(): print(f" {fmt}: {path}") print("\nšŸŽ‰ Export completed successfully!") except Exception as e: print(f"\nāŒ Export failed: {e}") import traceback traceback.print_exc() return False return True if __name__ == "__main__": main()