#!/usr/bin/env python3 """ Inference Script for Trained Models Supports interactive chat, batch generation, and API serving """ import os import sys import json import argparse import torch from pathlib import Path sys.path.append(os.path.join(os.path.dirname(__file__), '..')) sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src')) from models.architecture import AdvancedGPTModel, ModelConfig from utils.generation import AdvancedGenerator, ControllableGenerator, GenerationConfig, create_generation_configs try: import tiktoken TIKTOKEN_AVAILABLE = True except ImportError: TIKTOKEN_AVAILABLE = False class SimpleTokenizer: """Fallback tokenizer""" def __init__(self, vocab_file=None): if vocab_file and os.path.exists(vocab_file): with open(vocab_file, 'r') as f: data = json.load(f) self.chars = data.get('chars', list('abcdefghijklmnopqrstuvwxyz')) else: self.chars = list('abcdefghijklmnopqrstuvwxyz') self.stoi = {c: i for i, c in enumerate(self.chars)} self.itos = {i: c for i, c in enumerate(self.chars)} self.vocab_size = len(self.chars) def encode(self, text): return [self.stoi.get(c, 0) for c in text] def decode(self, tokens): return ''.join([self.itos.get(t, '') for t in tokens]) def load_model_and_tokenizer(checkpoint_path, device='cuda'): """Load trained model and tokenizer""" print(f"Loading model from {checkpoint_path}") # Load checkpoint if os.path.isdir(checkpoint_path): model_path = os.path.join(checkpoint_path, "pytorch_model.bin") tokenizer_path = os.path.join(checkpoint_path, "tokenizer.json") else: model_path = checkpoint_path tokenizer_path = None checkpoint = torch.load(model_path, map_location=device) # Get model config config_dict = checkpoint.get('config', {}) model_config_dict = config_dict.get('model_config_dict', {}) if not model_config_dict: # Fallback config model_config_dict = { 'vocab_size': 50304, 'n_positions': 2048, 'n_embd': 768, 'n_layer': 12, 'n_head': 12, 'n_kv_head': 4, 'rotary_dim': 64, 'intermediate_size': 3072, 'activation': 'swiglu', 'norm_type': 'rmsnorm', 'norm_eps': 1e-5, 'dropout': 0.0, 'attention_dropout': 0.0, 'residual_dropout': 0.1, 'embed_dropout': 0.1, 'tie_word_embeddings': True, 'use_cache': True, 'attention_bias': False, 'mlp_bias': False, 'flash_attention': True, 'gradient_checkpointing': False, 'max_position_embeddings': 2048 } # Create model model_config = ModelConfig(**model_config_dict) model = AdvancedGPTModel(model_config) # Load state dict model.load_state_dict(checkpoint['model_state_dict']) model = model.to(device) model.eval() # Load tokenizer if tokenizer_path and os.path.exists(tokenizer_path): with open(tokenizer_path, 'r') as f: tokenizer_info = json.load(f) if tokenizer_info.get('type') == 'tiktoken' and TIKTOKEN_AVAILABLE: tokenizer = tiktoken.get_encoding('gpt2') else: tokenizer = SimpleTokenizer() else: # Try tiktoken if TIKTOKEN_AVAILABLE: tokenizer = tiktoken.get_encoding('gpt2') else: tokenizer = SimpleTokenizer() print(f"Model loaded: {sum(p.numel() for p in model.parameters()):,} parameters") # Support tiktoken Encoding (n_vocab) and SimpleTokenizer (vocab_size/chars) tok_vs = None for attr in ('vocab_size', 'n_vocab'): if hasattr(tokenizer, attr): tok_vs = getattr(tokenizer, attr) break if tok_vs is None: tok_vs = len(getattr(tokenizer, 'chars', [])) or 'unknown' print(f"Tokenizer: {tok_vs} vocab size") return model, tokenizer def interactive_chat(model, tokenizer, device, overrides=None): """Interactive chat interface""" generator = AdvancedGenerator(model, tokenizer, device) controllable = ControllableGenerator(model, tokenizer, device) configs = create_generation_configs() print("\n" + "="*50) print("šŸ¤– CLAUDE OPUS 4 SCALE MODEL - INTERACTIVE CHAT") print("="*50) print("Commands:") print(" /help - Show this help") print(" /config - Change generation config (creative, balanced, focused, etc.)") print(" /style