File size: 19,136 Bytes
bf64b40 2bcd517 e2a42db bf64b40 e2a42db bf64b40 9106663 bf64b40 0f72521 bf64b40 0f72521 bf64b40 e6838c8 bf64b40 e6838c8 bf64b40 d5c47f9 e6838c8 d5c47f9 e6838c8 d5c47f9 e6838c8 9106663 bf64b40 9106663 e6838c8 bf64b40 e6838c8 6ffc9f3 e6838c8 d5c47f9 e6838c8 bf64b40 e6838c8 d5c47f9 e6838c8 6ffc9f3 e6838c8 6ffc9f3 e6838c8 bf64b40 e6838c8 bf64b40 6ffc9f3 e6838c8 bf64b40 6ffc9f3 e6838c8 39df28f d5c47f9 e6838c8 6ffc9f3 e6838c8 d5c47f9 e6838c8 39df28f e6838c8 39df28f d5c47f9 bf64b40 f1e4477 d5c47f9 f1e4477 d5c47f9 f1e4477 e6838c8 d5c47f9 bf64b40 c312d94 bf64b40 e6838c8 bf64b40 e6838c8 bf64b40 e6838c8 6ffc9f3 e6838c8 bf64b40 e6838c8 bf64b40 e6838c8 bf64b40 e6838c8 bf64b40 e6838c8 bf64b40 e6838c8 bf64b40 e6838c8 bf64b40 e6838c8 bf64b40 e6838c8 bf64b40 e6838c8 bf64b40 e6838c8 bf64b40 e2a42db bf64b40 e6838c8 bf64b40 e6838c8 bf64b40 e6838c8 bf64b40 e6838c8 bf64b40 e6838c8 bf64b40 e6838c8 d5c47f9 e6838c8 e2a42db | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 | # model_prtr.py
import os
import sys
import math
import torch
import logging
import importlib
import torch.nn as nn
from config import load_config, app_config
# Fix: Move transformers imports to module scope
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import Optional, List, Dict, Any, Union
from sentence_transformers import SentenceTransformer
# Import service registry
from service_registry import registry, MODEL, TOKENIZER, PRETRAINED_MODEL
# First import base interfaces
from base_interfaces.common_types import *
from base_interfaces.model_interface import AbstractModel
from model_manager import safe_get_config_value
app_config = load_config()
logger = logging.getLogger(__name__)
# ----------------------------
# Positional Encoding Module (for decoder)
# ----------------------------
class PositionalEncoding(nn.Module):
def __init__(self, d_model: int, max_len: Optional[int] = None):
super().__init__()
# Get MAX_SEQ_LENGTH safely from config
if max_len is None:
if hasattr(app_config, "TRANSFORMER_CONFIG") and isinstance(app_config.TRANSFORMER_CONFIG, dict):
max_len = app_config.TRANSFORMER_CONFIG.get("MAX_SEQ_LENGTH", 1024)
else:
max_len = 1024 # Safe default
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2, dtype=torch.float) * (-math.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(1) # shape: (max_len, 1, d_model)
self.register_buffer('pe', pe)
def forward(self, x: torch.Tensor) -> torch.Tensor:
# x shape: (seq_len, batch_size, d_model)
seq_len = x.size(0)
x = x + self.pe[:seq_len]
return x
# ----------------------------
# Wildnerve-tlm01 using Only Pretrained Encoder
# ----------------------------
class Wildnerve_tlm01(nn.Module, AbstractModel):
"""A Transformer-based language model that uses:
- A pretrained GPT-2 model for powerful text generation
- A custom decoder stack
The model uses the GPT-2 tokenizer for consistent tokenization."""
def __init__(
self,
vocab_size: int = 50257, # Standardized GPT-2 vocab size
specialization: str = "general",
dataset_path: str = None,
model_name: str = "gpt2", # Standardized to GPT-2
embedding_dim: int = 768,
num_heads: int = 12,
hidden_dim: int = 768,
num_layers: int = 6,
output_size: int = 50257, # Standardized GPT-2 vocab size
dropout: float = 0.1,
max_seq_length: int = 1024, # GPT-2 supports longer contexts
pooling_mode: str = "last", # GPT-2 typically uses last token
tokenizer=None,
max_length: Optional[int] = None
) -> None:
super().__init__()
self.specialization = specialization
self.dataset_path = dataset_path
self.model_name = model_name
self.pooling_mode = pooling_mode
self.vocab_size = vocab_size
self.max_seq_length = max_seq_length
self.embedding_dim = embedding_dim
self.num_heads = num_heads
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.output_size = output_size
self.dropout = dropout
# fetch MAX_SEQ_LENGTH safely
cfg = safe_get_config_value(app_config, "TRANSFORMER_CONFIG", {})
self.max_length = max_length or cfg.get("MAX_SEQ_LENGTH", 1024) # Increased for GPT-2
# Use GPT-2 directly for text generation (not a simplified version)
try:
# Use the full GPT-2 model implementation for production use
from transformers import GPT2LMHeadModel, GPT2Tokenizer
# Initialize the model and tokenizer
self.model_name = model_name
self.gpt2_model = None # Will be loaded on first use
# Ensure proper tokenizer setup for GPT-2
if tokenizer is not None:
self.tokenizer = tokenizer
elif registry.has(TOKENIZER):
self.tokenizer = registry.get(TOKENIZER)
else:
self.tokenizer = GPT2Tokenizer.from_pretrained(model_name)
# Ensure GPT-2 tokenizer has pad_token set (critical fix)
if self.tokenizer.pad_token_id is None:
self.tokenizer.pad_token = self.tokenizer.eos_token
self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
logger.info(f"Successfully initialized GPT-2 model: {model_name}")
except Exception as e:
logger.error(f"Error initializing GPT-2 model: {e}", exc_info=True)
raise
# Register this model instance in the registry by specialization
model_registry_key = f"model_{specialization}"
registry.register(model_registry_key, self)
# Also register as pretrained model
registry.register(PRETRAINED_MODEL, self, overwrite=True)
logger.info("Registered GPT-2 model as pretrained model")
def _ensure_model_loaded(self):
if self.gpt2_model is None:
self.gpt2_model = GPT2LMHeadModel.from_pretrained(self.model_name)
# Replace the old forward method with GPT-2 specific implementation
def forward(self, src: torch.Tensor, tgt: Optional[torch.Tensor] = None,
src_key_padding_mask: Optional[torch.Tensor] = None,
tgt_key_padding_mask: Optional[torch.Tensor] = None,
return_sequence: bool = False,
**kwargs) -> torch.Tensor:
self._ensure_model_loaded() # Load model only when needed
# Use GPT-2 directly for generation
outputs = self.gpt2_model(src, **kwargs)
return outputs.logits
# Update generate to handle both direct prompt and tokenized input
def generate(self, prompt=None, input_ids=None, max_length=None, **kwargs):
"""Generate text using the GPT-2 model"""
self._ensure_model_loaded() # Load model only when needed
try:
# Try to use adapter_layer.generate if available (consolidate generation paths)
adapter_layer = registry.get("adapter_layer")
if adapter_layer and hasattr(adapter_layer, "generate"):
if prompt:
return adapter_layer.generate(prompt, max_length=max_length, **kwargs)
elif input_ids is not None and self.tokenizer:
# Convert input_ids back to text
prompt = self.tokenizer.decode(input_ids[0], skip_special_tokens=True)
return adapter_layer.generate(prompt, max_length=max_length, **kwargs)
# Continue with direct generation if adapter_layer not available
# Enhanced generation parameters
generation_config = {
"max_length": max_length or 150,
"temperature": kwargs.get('temperature', 0.7),
"top_p": kwargs.get('top_p', 0.95),
"top_k": kwargs.get('top_k', 50),
"repetition_penalty": kwargs.get('repetition_penalty', 1.3),
"no_repeat_ngram_size": kwargs.get('no_repeat_ngram_size', 3),
"do_sample": True,
"pad_token_id": self.tokenizer.pad_token_id,
"eos_token_id": self.tokenizer.eos_token_id,
"early_stopping": True,
"penalty_alpha": 0.6 # Add penalty alpha for better response quality
}
# Handle either string prompt or direct input_ids
if isinstance(prompt, str) and input_ids is None:
inputs = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
input_ids = inputs.input_ids
elif input_ids is None:
raise ValueError("Either prompt or input_ids must be provided")
# Add user-provided kwargs that we didn't explicitly set
for k, v in kwargs.items():
if k not in generation_config and k not in ('prompt', 'context'):
generation_config[k] = v
# Use max_new_tokens instead of max_length if input is longer than max_length-50
if input_ids.shape[1] > (generation_config["max_length"] - 50):
logger.info(f"Input length {input_ids.shape[1]} is close to max_length, using max_new_tokens instead")
del generation_config["max_length"]
# Generate output using the full GPT-2 model
output_ids = self.gpt2_model.generate(input_ids, **generation_config)
# Decode the output and ensure it's a string, not a tensor
if torch.is_tensor(output_ids):
generated_text = self.tokenizer.decode(output_ids[0].cpu(), skip_special_tokens=True)
else:
generated_text = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
return generated_text
except Exception as e:
logger.error(f"Error in GPT-2 generation: {e}", exc_info=True)
return f"Error generating response: {str(e)}"
def generate_streaming(self, prompt=None, input_ids=None, **kwargs):
"""Generate tokens one by one in streaming fashion"""
self._ensure_model_loaded() # Load model only when needed
try:
# Handle either text or tokenized input
if prompt is not None and input_ids is None:
inputs = self.tokenizer(
prompt,
return_tensors="pt",
padding=True,
truncation=True,
max_length=self.max_length
)
input_ids = inputs.input_ids
# Set generation parameters
max_length = kwargs.get('max_length', min(self.max_length, 200))
temperature = kwargs.get('temperature', 0.7)
top_p = kwargs.get('top_p', 0.9)
# Generate with token streaming
from transformers import TextIteratorStreamer
from threading import Thread
streamer = TextIteratorStreamer(
self.tokenizer,
timeout=10.0,
skip_prompt=True,
skip_special_tokens=True
)
generation_kwargs = dict(
input_ids=input_ids,
max_length=max_length,
temperature=temperature,
top_p=top_p,
streamer=streamer,
do_sample=True,
)
# Create a thread to run the generation
thread = Thread(target=self.gpt2_model.generate, kwargs=generation_kwargs)
thread.start()
# Stream the output tokens
for token in streamer:
yield token
except Exception as e:
logger.error(f"Error in streaming generation: {e}", exc_info=True)
yield f"Error: {str(e)}"
#-------Pretrained Transformer Model-------------
class PretrainedTransformer(nn.Module, AbstractModel):
"""A simple wrapper around a pretrained Hugging Face transformer model."""
def __init__(
self,
vocab_size=50257, # Updated for GPT-2 (was 30522)
specialization="general",
dataset_path=None,
model_name="gpt2", # Updated from bert-base-uncased
embedding_dim=768,
num_heads=12,
hidden_dim=768,
num_layers=6,
output_size=768,
dropout=0.1,
max_seq_length=1024, # Increased for GPT-2
pooling_mode="last", # Changed from "mean" for GPT-2
tokenizer=None,
**kwargs
) -> None:
super().__init__()
# Optionally track model usage
self.model_last_used = {}
# Unified tokenizer initialization:
# Primary: Load tokenizer for "gpt2"
# Fallback: if it fails, try GPT2 tokenizer
if tokenizer is not None:
self.tokenizer = tokenizer
else:
# Use imports from module scope
if registry.has(TOKENIZER):
self.tokenizer = registry.get(TOKENIZER)
else:
try:
self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
logger.info("Loaded primary tokenizer: gpt2")
# Add pad token if not present (GPT-2 doesn't have one by default)
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token
except Exception as e:
logger.warning(f"Primary tokenizer load failed: {e}")
self.tokenizer = None
registry.register(TOKENIZER, self.tokenizer)
# Set model names for fallback chain explicitly
self.model_name = model_name # Should be "gpt2"
self.fallback_model = "gpt2" # Fallback tokenization/model if needed
# Use AutoModelForCausalLM instead of AutoModel for GPT-2
self.model = AutoModelForCausalLM.from_pretrained(model_name)
try:
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
# Add pad token if not present (GPT-2 doesn't have one by default)
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token
except Exception as e:
logger.error(f"Failed to load tokenizer for {model_name}: {e}")
self.tokenizer = None
def forward(self, input_ids, attention_mask=None):
outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
return outputs.last_hidden_state
def encode(self, text: str):
if not self.tokenizer:
raise ValueError("Tokenizer not available")
inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
outputs = self.forward(inputs.input_ids, inputs.get("attention_mask"))
# Pool by averaging the token embeddings
return outputs.mean(dim=1)
def generate(self, input_ids, max_length=100, **kwargs):
# Use generate method from model if available, else fallback.
if hasattr(self.model, "generate"):
return self.model.generate(input_ids=input_ids, max_length=max_length, **kwargs)
else:
# Simple fallback: return input_ids as is
return input_ids
# Register model classes in registry
registry.register("model_class_pretrained", Wildnerve_tlm01)
registry.register("pretrained_transformer_class", PretrainedTransformer)
# Check if pretrained transformers are properly initialized.
def initialize_pretrained_model():
"""Attempt to initialize a pretrained tokenizer with a fallback mechanism.
Tries to load 'bert-base-uncased' first; if that fails, attempts to load 'gpt2'.
If the fallback is used, then reattempts loading 'bert-base-uncased' on subsequent tries.
Repeats up to 5 attempts in total.
Returns:
The initialized tokenizer instance if successful, otherwise None."""
max_attempts = 5
for attempt in range(1, max_attempts + 1):
try:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
logger.info(f"Attempt {attempt}: Successfully loaded bert-base-uncased.")
return tokenizer
except Exception as e:
logger.warning(f"Attempt {attempt}: Loading bert-base-uncased failed: {e}")
try:
tokenizer = AutoTokenizer.from_pretrained("gpt2")
logger.info(f"Attempt {attempt}: Successfully loaded gpt2 as fallback.")
return tokenizer
except Exception as e2:
logger.warning(f"Attempt {attempt}: Loading gpt2 failed as fallback: {e2}")
logger.info("Retrying tokenizer initialization...")
logger.error("Failed to initialize pretrained model tokenizer after 5 attempts.")
return None
"""
Pretrained model wrapper for Wildnerve-tlm01
"""
import logging
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from service_registry import registry, PRETRAINED_MODEL, TOKENIZER
logger = logging.getLogger(__name__)
class Wildnerve_tlm01:
"""
A wrapper for transformer models from HuggingFace.
Provides the same interface as our custom models for consistency.
"""
def __init__(
self,
model_name="gpt2",
tokenizer=None,
device=None,
**kwargs
):
self.model_name = model_name
# Use provided tokenizer or get one from registry
if tokenizer is not None:
self.tokenizer = tokenizer
elif registry.has(TOKENIZER):
self.tokenizer = registry.get(TOKENIZER)
else:
try:
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
logger.info(f"Initialized tokenizer from {model_name}")
except Exception as e:
logger.error(f"Failed to initialize tokenizer: {e}")
self.tokenizer = None
try:
self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
logger.info(f"Loading pretrained model from {model_name} on {self.device}")
# Don't actually load the full model in this case to save memory
# This is just a placeholder that can generate simple responses
self.model = None
logger.info(f"Created simplified pretrained model wrapper")
except Exception as e:
logger.error(f"Failed to initialize pretrained model: {e}")
self.model = None
def generate(self, prompt, **kwargs):
"""Generate a response to the given prompt"""
return f"I processed your request about '{prompt[:20]}...' using my pretrained capabilities."
def __call__(self, input_ids, attention_mask=None):
"""Forward pass for HuggingFace compatibility"""
# Simplified placeholder functionality
batch_size = input |