Spaces:

lemms
/

llm

Runtime error

App Files Files Community

llm / core /src /export_model.py

lemms

Upload folder using huggingface_hub

ef6446c verified 5 months ago

raw

history blame contribute delete

22.6 kB

	#!/usr/bin/env python3
	# Copyright (C) 2024 Louis Chua Bean Chong
	#
	# This file is part of OpenLLM.
	#
	# OpenLLM is dual-licensed:
	# 1. For open source use: GNU General Public License v3.0
	# 2. For commercial use: Commercial License (contact for details)
	#
	# See LICENSE and docs/LICENSES.md for full license information.

	"""
	OpenLLM Model Export Script

	This script implements Step 6 of the training pipeline: Model Export & Deployment.
	It exports trained OpenLLM models to various formats for production inference.

	Supported Formats:
	- PyTorch native format (for Python inference)
	- Hugging Face format (for ecosystem compatibility)
	- ONNX format (for optimized cross-platform inference)

	Usage:
	# PyTorch format
	python core/src/export_model.py \
	--model_dir models/small-extended-4k \
	--format pytorch \
	--output_dir exports/pytorch/

	# Hugging Face format
	python core/src/export_model.py \
	--model_dir models/small-extended-4k \
	--format huggingface \
	--output_dir exports/huggingface/

	# ONNX format
	python core/src/export_model.py \
	--model_dir models/small-extended-4k \
	--format onnx \
	--output_dir exports/onnx/ \
	--optimize_for_inference

	Author: Louis Chua Bean Chong
	License: GPLv3
	"""

	import argparse
	import json
	import os
	import shutil
	import sys
	from pathlib import Path
	from typing import Dict

	import torch

	# Add current directory to path for imports
	sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

	from model import create_model


	class ModelExporter:
	"""
	Comprehensive model exporter for OpenLLM models.

	Handles export to multiple formats including PyTorch, Hugging Face,
	and ONNX for different deployment scenarios.
	"""

	def __init__(self, model_dir: str, output_dir: str):
	"""
	Initialize the model exporter.

	Args:
	model_dir: Directory containing trained model checkpoints
	output_dir: Base directory for exported models
	"""
	self.model_dir = Path(model_dir)
	self.output_dir = Path(output_dir)
	self.output_dir.mkdir(parents=True, exist_ok=True)

	# Load model and metadata
	self.model, self.config, self.training_info = self._load_model()
	self.tokenizer_path = self._find_tokenizer()

	print("🔧 ModelExporter initialized")
	print(f" Model: {self.config.model_name}")
	print(f" Parameters: {self.model.get_num_params():,}")
	print(f" Output directory: {output_dir}")

	def _load_model(self):
	"""Load model from checkpoint directory."""
	# Find best model checkpoint
	best_model_path = self.model_dir / "best_model.pt"
	if not best_model_path.exists():
	# Look for latest checkpoint
	checkpoints = list(self.model_dir.glob("checkpoint_step_*.pt"))
	if not checkpoints:
	raise FileNotFoundError(f"No model checkpoints found in {self.model_dir}")

	# Get latest checkpoint
	latest_checkpoint = max(checkpoints, key=lambda p: int(p.stem.split("_")[-1]))
	best_model_path = latest_checkpoint

	print(f"📂 Loading model from {best_model_path}")

	# Load checkpoint
	checkpoint = torch.load(best_model_path, map_location="cpu")

	# Determine model size from config
	config_dict = checkpoint.get("config", {})
	n_layer = config_dict.get("n_layer", 12)

	if n_layer <= 6:
	model_size = "small"
	elif n_layer <= 12:
	model_size = "medium"
	else:
	model_size = "large"

	# Create and load model
	model = create_model(model_size)
	model.load_state_dict(checkpoint["model_state_dict"])
	model.eval() # Set to evaluation mode

	# Extract training info
	training_info = {
	"step": checkpoint.get("step", 0),
	"best_loss": checkpoint.get("best_loss", 0.0),
	"model_size": model_size,
	}

	return model, model.config, training_info

	def _find_tokenizer(self):
	"""Find tokenizer path."""
	# Try multiple possible locations
	possible_paths = [
	self.model_dir.parent / "tokenizer" / "tokenizer.model",
	Path("data/tokenizer/tokenizer.model"),
	self.model_dir / "tokenizer.model",
	]

	for path in possible_paths:
	if path.exists():
	return str(path)

	raise FileNotFoundError("Tokenizer not found in expected locations")

	def export_pytorch(self) -> str:
	"""
	Export model in PyTorch native format.

	Returns:
	Path to exported model directory
	"""
	output_path = self.output_dir / "pytorch"
	output_path.mkdir(parents=True, exist_ok=True)

	print("🔄 Exporting to PyTorch format...")

	# Save model state dict
	model_path = output_path / "model.pt"
	torch.save(
	{
	"model_state_dict": self.model.state_dict(),
	"config": self.config.__dict__,
	"training_info": self.training_info,
	},
	model_path,
	)

	# Save configuration
	config_path = output_path / "config.json"
	with open(config_path, "w") as f:
	json.dump(
	{
	"model_config": self.config.__dict__,
	"training_info": self.training_info,
	"export_format": "pytorch",
	},
	f,
	indent=2,
	)

	# Copy tokenizer
	tokenizer_out = output_path / "tokenizer.model"
	shutil.copy2(self.tokenizer_path, tokenizer_out)

	# Create loading script
	self._create_pytorch_loader(output_path)

	print(f"✅ PyTorch export completed: {output_path}")
	return str(output_path)

	def export_huggingface(self) -> str:
	"""
	Export model in Hugging Face compatible format.

	Returns:
	Path to exported model directory
	"""
	output_path = self.output_dir / "huggingface"
	output_path.mkdir(parents=True, exist_ok=True)

	print("🔄 Exporting to Hugging Face format...")

	# Save model weights in HF format
	model_path = output_path / "pytorch_model.bin"
	torch.save(self.model.state_dict(), model_path)

	# Create HF-compatible config
	hf_config = {
	"architectures": ["GPTModel"],
	"model_type": "gpt",
	"vocab_size": self.config.vocab_size,
	"n_layer": self.config.n_layer,
	"n_head": self.config.n_head,
	"n_embd": self.config.n_embd,
	"block_size": self.config.block_size,
	"dropout": self.config.dropout,
	"bias": self.config.bias,
	"torch_dtype": "float32",
	"transformers_version": "4.0.0",
	"openllm_version": "0.1.0",
	"training_steps": self.training_info["step"],
	"model_size": self.training_info["model_size"],
	}

	config_path = output_path / "config.json"
	with open(config_path, "w") as f:
	json.dump(hf_config, f, indent=2)

	# Copy tokenizer with HF naming
	shutil.copy2(self.tokenizer_path, output_path / "tokenizer.model")

	# Create tokenizer config
	tokenizer_config = {
	"tokenizer_class": "SentencePieceTokenizer",
	"model_max_length": self.config.block_size,
	"vocab_size": self.config.vocab_size,
	"unk_token": "<unk>",
	"bos_token": "<s>",
	"eos_token": "</s>",
	"pad_token": "<pad>",
	}

	with open(output_path / "tokenizer_config.json", "w") as f:
	json.dump(tokenizer_config, f, indent=2)

	# Create generation config
	generation_config = {
	"max_length": 512,
	"max_new_tokens": 256,
	"temperature": 0.7,
	"top_k": 40,
	"top_p": 0.9,
	"do_sample": True,
	"pad_token_id": 0,
	"eos_token_id": 1,
	"bos_token_id": 2,
	}

	with open(output_path / "generation_config.json", "w") as f:
	json.dump(generation_config, f, indent=2)

	# Create HF loading script
	self._create_hf_loader(output_path)

	print(f"✅ Hugging Face export completed: {output_path}")
	return str(output_path)

	def export_onnx(self, optimize_for_inference: bool = False) -> str:
	"""
	Export model to ONNX format for optimized inference.

	Args:
	optimize_for_inference: Whether to apply ONNX optimizations

	Returns:
	Path to exported ONNX model
	"""
	try:
	import onnx
	import onnxruntime
	except ImportError:
	raise ImportError("ONNX export requires: pip install onnx onnxruntime")

	output_path = self.output_dir / "onnx"
	output_path.mkdir(parents=True, exist_ok=True)

	print("🔄 Exporting to ONNX format...")

	# Prepare model for export
	self.model.eval()

	# Create dummy input for tracing
	batch_size = 1
	seq_len = 64 # Use shorter sequence for compatibility
	dummy_input = torch.randint(0, self.config.vocab_size, (batch_size, seq_len))

	# Export to ONNX
	onnx_path = output_path / "model.onnx"

	torch.onnx.export(
	self.model,
	dummy_input,
	onnx_path,
	export_params=True,
	opset_version=11,
	do_constant_folding=True,
	input_names=["input_ids"],
	output_names=["logits"],
	dynamic_axes={
	"input_ids": {0: "batch_size", 1: "sequence_length"},
	"logits": {0: "batch_size", 1: "sequence_length"},
	},
	)

	# Verify ONNX model
	onnx_model = onnx.load(str(onnx_path))
	onnx.checker.check_model(onnx_model)

	# Apply optimizations if requested
	if optimize_for_inference:
	self._optimize_onnx_model(onnx_path)

	# Save metadata
	metadata = {
	"model_config": self.config.__dict__,
	"training_info": self.training_info,
	"export_format": "onnx",
	"input_shape": [batch_size, seq_len],
	"input_names": ["input_ids"],
	"output_names": ["logits"],
	"optimized": optimize_for_inference,
	}

	with open(output_path / "metadata.json", "w") as f:
	json.dump(metadata, f, indent=2)

	# Copy tokenizer
	shutil.copy2(self.tokenizer_path, output_path / "tokenizer.model")

	# Create ONNX inference script
	self._create_onnx_inference(output_path)

	print(f"✅ ONNX export completed: {onnx_path}")
	return str(onnx_path)

	def _optimize_onnx_model(self, onnx_path: Path):
	"""Apply ONNX optimizations for inference."""
	try:
	import onnxruntime
	from onnxruntime.tools import optimizer

	print("🔧 Applying ONNX optimizations...")

	# Create optimized model
	optimized_path = onnx_path.parent / "model_optimized.onnx"

	# Apply graph optimizations
	optimizer.optimize_model(
	str(onnx_path),
	str(optimized_path),
	optimization_level=optimizer.OptimizationLevel.ORT_ENABLE_ALL,
	)

	# Replace original with optimized
	shutil.move(str(optimized_path), str(onnx_path))

	print("✅ ONNX optimizations applied")

	except ImportError:
	print("⚠️ ONNX optimization requires onnxruntime-tools")
	except Exception as e:
	print(f"⚠️ ONNX optimization failed: {e}")

	def _create_pytorch_loader(self, output_path: Path):
	"""Create PyTorch model loader script."""
	loader_script = '''#!/usr/bin/env python3
	"""
	PyTorch Model Loader for OpenLLM

	Usage:
	from load_model import load_model, generate_text

	model, tokenizer, config = load_model(".")
	text = generate_text(model, tokenizer, "Hello world", max_length=50)
	print(text)
	"""

	import torch
	import json
	import sentencepiece as spm
	from pathlib import Path

	def load_model(model_dir="."):
	"""Load OpenLLM model from PyTorch export."""
	model_dir = Path(model_dir)

	# Load config
	with open(model_dir / "config.json", 'r') as f:
	config_data = json.load(f)

	model_config = config_data['model_config']

	# Recreate model architecture (you'll need to have the model.py file)
	# This is a simplified loader - in practice you'd import your GPTModel class
	print(f"Model config: {model_config}")
	print("Note: You need to import and create the actual model class")

	# Load model state
	checkpoint = torch.load(model_dir / "model.pt", map_location='cpu')

	# Load tokenizer
	tokenizer = smp.SentencePieceProcessor()
	tokenizer.load(str(model_dir / "tokenizer.model"))

	return None, tokenizer, model_config # Placeholder

	def generate_text(model, tokenizer, prompt, max_length=100):
	"""Generate text using the loaded model."""
	# Implement text generation
	return f"Generated text for: {prompt}"

	if __name__ == "__main__":
	model, tokenizer, config = load_model()
	print(f"Model loaded with {config.get('vocab_size', 'unknown')} vocabulary size")
	'''

	with open(output_path / "load_model.py", "w") as f:
	f.write(loader_script)

	def _create_hf_loader(self, output_path: Path):
	"""Create Hugging Face model loader script."""
	loader_script = '''#!/usr/bin/env python3
	"""
	Hugging Face Compatible Loader for OpenLLM

	Usage:
	# Using transformers library (if you implement custom model class)
	# from transformers import AutoModel, AutoTokenizer
	# model = AutoModel.from_pretrained(".")
	# tokenizer = AutoTokenizer.from_pretrained(".")

	# Manual loading
	from load_hf_model import load_model_manual
	model, tokenizer = load_model_manual(".")
	"""

	import torch
	import json
	import sentencepiece as smp
	from pathlib import Path

	def load_model_manual(model_dir="."):
	"""Manually load model in HF format."""
	model_dir = Path(model_dir)

	# Load config
	with open(model_dir / "config.json", 'r') as f:
	config = json.load(f)

	# Load model weights
	state_dict = torch.load(model_dir / "pytorch_model.bin", map_location='cpu')

	# Load tokenizer
	tokenizer = smp.SentencePieceProcessor()
	tokenizer.load(str(model_dir / "tokenizer.model"))

	print(f"Loaded model: {config['model_type']} with {config['n_layer']} layers")
	print(f"Vocabulary size: {config['vocab_size']}")

	return state_dict, tokenizer

	if __name__ == "__main__":
	state_dict, tokenizer = load_model_manual()
	print(f"Model weights loaded: {len(state_dict)} parameters")
	print(f"Tokenizer vocabulary: {tokenizer.vocab_size()}")
	'''

	with open(output_path / "load_hf_model.py", "w") as f:
	f.write(loader_script)

	def _create_onnx_inference(self, output_path: Path):
	"""Create ONNX inference script."""
	inference_script = '''#!/usr/bin/env python3
	"""
	ONNX Inference for OpenLLM

	Usage:
	from onnx_inference import ONNXInference

	inference = ONNXInference(".")
	output = inference.generate("Hello world", max_length=50)
	print(output)
	"""

	import numpy as np
	import json
	import sentencepiece as smp
	from pathlib import Path

	try:
	import onnxruntime as ort
	except ImportError:
	print("Install onnxruntime: pip install onnxruntime")
	ort = None

	class ONNXInference:
	def __init__(self, model_dir="."):
	if ort is None:
	raise ImportError("onnxruntime not available")

	model_dir = Path(model_dir)

	# Load ONNX model
	self.session = ort.InferenceSession(str(model_dir / "model.onnx"))

	# Load metadata
	with open(model_dir / "metadata.json", 'r') as f:
	self.metadata = json.load(f)

	# Load tokenizer
	self.tokenizer = smp.SentencePieceProcessor()
	self.tokenizer.load(str(model_dir / "tokenizer.model"))

	print(f"ONNX model loaded: {self.metadata['model_config']['model_name']}")

	def predict(self, input_ids):
	"""Run inference on input token IDs."""
	# Prepare input
	input_data = {"input_ids": input_ids.astype(np.int64)}

	# Run inference
	outputs = self.session.run(None, input_data)
	return outputs[0] # logits

	def generate(self, prompt, max_length=50, temperature=0.7):
	"""Generate text from prompt."""
	# Tokenize prompt
	tokens = self.tokenizer.encode(prompt)
	input_ids = np.array([tokens], dtype=np.int64)

	# Simple greedy generation (can be improved)
	generated = tokens.copy()

	for _ in range(max_length):
	if len(generated) >= 512: # Max sequence length
	break

	# Get current input (last 64 tokens to fit ONNX model)
	current_input = np.array([generated[-64:]], dtype=np.int64)

	# Predict next token
	logits = self.predict(current_input)
	next_token_logits = logits[0, -1, :] # Last position

	# Apply temperature and sample
	if temperature > 0:
	next_token_logits = next_token_logits / temperature
	probs = np.exp(next_token_logits) / np.sum(np.exp(next_token_logits))
	next_token = np.random.choice(len(probs), p=probs)
	else:
	next_token = np.argmax(next_token_logits)

	generated.append(int(next_token))

	# Decode generated text
	generated_text = self.tokenizer.decode(generated[len(tokens):])
	return generated_text

	if __name__ == "__main__":
	inference = ONNXInference()
	result = inference.generate("The future of AI is", max_length=30)
	print(f"Generated: {result}")
	'''

	with open(output_path / "onnx_inference.py", "w") as f:
	f.write(inference_script)

	def export_all_formats(self, optimize_onnx: bool = False) -> Dict[str, str]:
	"""
	Export model to all supported formats.

	Args:
	optimize_onnx: Whether to optimize ONNX model

	Returns:
	Dictionary mapping format names to export paths
	"""
	results = {}

	print("🚀 Exporting to all formats...")

	try:
	results["pytorch"] = self.export_pytorch()
	except Exception as e:
	print(f"❌ PyTorch export failed: {e}")

	try:
	results["huggingface"] = self.export_huggingface()
	except Exception as e:
	print(f"❌ Hugging Face export failed: {e}")

	try:
	results["onnx"] = self.export_onnx(optimize_onnx)
	except Exception as e:
	print(f"❌ ONNX export failed: {e}")

	# Create summary
	summary = {
	"export_timestamp": torch.datetime.now().isoformat(),
	"model_info": {
	"name": self.config.model_name,
	"parameters": self.model.get_num_params(),
	"training_steps": self.training_info["step"],
	"best_loss": self.training_info["best_loss"],
	},
	"exports": results,
	}

	with open(self.output_dir / "export_summary.json", "w") as f:
	json.dump(summary, f, indent=2)

	print(f"✅ Export summary saved: {self.output_dir / 'export_summary.json'}")

	return results


	def main():
	"""Main export function."""
	parser = argparse.ArgumentParser(
	description="Export OpenLLM models to various formats",
	formatter_class=argparse.RawDescriptionHelpFormatter,
	epilog="""
	Examples:
	# Export to PyTorch format
	python core/src/export_model.py \\
	--model_dir models/small-extended-4k \\
	--format pytorch \\
	--output_dir exports/pytorch/

	# Export to Hugging Face format
	python core/src/export_model.py \\
	--model_dir models/small-extended-4k \\
	--format huggingface \\
	--output_dir exports/huggingface/

	# Export to ONNX with optimizations
	python core/src/export_model.py \\
	--model_dir models/small-extended-4k \\
	--format onnx \\
	--output_dir exports/onnx/ \\
	--optimize_for_inference

	# Export to all formats
	python core/src/export_model.py \\
	--model_dir models/small-extended-4k \\
	--format all \\
	--output_dir exports/
	""",
	)

	parser.add_argument(
	"--model_dir", required=True, help="Directory containing trained model checkpoints"
	)

	parser.add_argument(
	"--format",
	choices=["pytorch", "huggingface", "onnx", "all"],
	required=True,
	help="Export format",
	)

	parser.add_argument("--output_dir", required=True, help="Output directory for exported models")

	parser.add_argument(
	"--optimize_for_inference",
	action="store_true",
	help="Apply optimizations for inference (ONNX only)",
	)

	args = parser.parse_args()

	print("📦 OpenLLM Model Export")
	print("=" * 50)

	try:
	# Create exporter
	exporter = ModelExporter(args.model_dir, args.output_dir)

	# Export based on format
	if args.format == "pytorch":
	result = exporter.export_pytorch()
	print(f"\n✅ PyTorch export completed: {result}")

	elif args.format == "huggingface":
	result = exporter.export_huggingface()
	print(f"\n✅ Hugging Face export completed: {result}")

	elif args.format == "onnx":
	result = exporter.export_onnx(args.optimize_for_inference)
	print(f"\n✅ ONNX export completed: {result}")

	elif args.format == "all":
	results = exporter.export_all_formats(args.optimize_for_inference)
	print("\n✅ All formats exported:")
	for fmt, path in results.items():
	print(f" {fmt}: {path}")

	print("\n🎉 Export completed successfully!")

	except Exception as e:
	print(f"\n❌ Export failed: {e}")
	import traceback

	traceback.print_exc()
	return False

	return True


	if __name__ == "__main__":
	main()