Upload 8 files
Browse files- adapter_layer.py +199 -278
- handler.py +100 -51
- model_PrTr.py +11 -3
- model_stub.py +58 -0
- service_registry.py +8 -133
- smartHybridAttention.py +9 -3
- tokenizer.py +6 -78
adapter_layer.py
CHANGED
|
@@ -1,16 +1,15 @@
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
import json
|
| 4 |
-
import nltk
|
| 5 |
-
import torch
|
| 6 |
-
import inspect
|
| 7 |
import logging
|
| 8 |
import pydantic # required
|
| 9 |
-
import codecarbon
|
| 10 |
import importlib.util # required
|
| 11 |
from typing import Dict, Any, Optional, List, Tuple
|
| 12 |
from service_registry import registry, MODEL, PRETRAINED_MODEL, TOKENIZER
|
| 13 |
|
|
|
|
|
|
|
|
|
|
| 14 |
# Log versions and fail fast if missing
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
logger.info(f"Using pydantic v{pydantic.__version__}")
|
|
@@ -18,6 +17,18 @@ logger.info(f"Using codecarbon v{codecarbon.__version__}")
|
|
| 18 |
|
| 19 |
print(f"Successfully using installed dependencies - pydantic: {pydantic.__version__}, codecarbon: {codecarbon.__version__}")
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
# Import dependency helpers
|
| 22 |
def is_module_available(module_name):
|
| 23 |
try:
|
|
@@ -49,299 +60,209 @@ except ImportError as e:
|
|
| 49 |
return "model_Custm", 0.8
|
| 50 |
return "model_PrTr", 0.6
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
class WildnerveModelAdapter:
|
| 53 |
-
"""
|
| 54 |
-
RETRY_COUNT = 5
|
| 55 |
|
| 56 |
def __init__(self, model_path: str):
|
| 57 |
self.model_path = model_path
|
| 58 |
self.tokenizer = None
|
| 59 |
self.model = None
|
| 60 |
-
self.
|
|
|
|
| 61 |
|
| 62 |
-
#
|
| 63 |
-
|
| 64 |
-
paths = []
|
| 65 |
-
if os.path.isdir(model_path):
|
| 66 |
-
paths.append(model_path)
|
| 67 |
-
else:
|
| 68 |
-
logger.warning(f"Model path not found or not a directory: {model_path}")
|
| 69 |
-
paths.append(root)
|
| 70 |
-
for p in paths:
|
| 71 |
-
if p not in sys.path:
|
| 72 |
-
sys.path.insert(0, p)
|
| 73 |
-
|
| 74 |
-
logger.info(f"Model adapter initialized with path: {model_path}")
|
| 75 |
-
|
| 76 |
-
# Initialize components with retry logic
|
| 77 |
-
for attempt in range(1, self.RETRY_COUNT + 1):
|
| 78 |
-
try:
|
| 79 |
-
self._initialize_tokenizer()
|
| 80 |
-
logger.info("Tokenizer initialized")
|
| 81 |
-
break
|
| 82 |
-
except Exception as e:
|
| 83 |
-
logger.warning(f"Tokenizer init attempt {attempt}/{self.RETRY_COUNT} failed: {e}")
|
| 84 |
-
logger.debug("Tokenizer init stack trace:", exc_info=True)
|
| 85 |
-
if attempt == self.RETRY_COUNT:
|
| 86 |
-
raise
|
| 87 |
-
|
| 88 |
-
for attempt in range(1, self.RETRY_COUNT + 1):
|
| 89 |
-
try:
|
| 90 |
-
self._initialize_model()
|
| 91 |
-
logger.info("Model initialized")
|
| 92 |
-
break
|
| 93 |
-
except Exception as e:
|
| 94 |
-
logger.warning(f"Model init attempt {attempt}/{self.RETRY_COUNT} failed: {e}")
|
| 95 |
-
logger.debug("Model init stack trace:", exc_info=True)
|
| 96 |
-
if attempt == self.RETRY_COUNT:
|
| 97 |
-
raise
|
| 98 |
-
def _initialize_tokenizer(self):
|
| 99 |
-
"""Initialize tokenizer via our local wrapper first, then fallback."""
|
| 100 |
-
try:
|
| 101 |
-
# primary: use our tokenizer.py
|
| 102 |
-
from tokenizer import TokenizerWrapper
|
| 103 |
-
self.tokenizer = TokenizerWrapper()
|
| 104 |
-
logger.info("Using TokenizerWrapper from tokenizer.py")
|
| 105 |
-
return
|
| 106 |
-
except Exception as e:
|
| 107 |
-
logger.warning(f"TokenizerWrapper init failed: {e}")
|
| 108 |
-
|
| 109 |
-
# Try to import from service_registry if available
|
| 110 |
-
try:
|
| 111 |
-
if is_module_available('service_registry'):
|
| 112 |
-
from service_registry import registry, TOKENIZER
|
| 113 |
-
|
| 114 |
-
if registry.has(TOKENIZER):
|
| 115 |
-
self.tokenizer = registry.get(TOKENIZER)
|
| 116 |
-
logger.info("Retrieved tokenizer from registry")
|
| 117 |
-
return
|
| 118 |
-
|
| 119 |
-
# Try loading from the original tokenizer.py
|
| 120 |
-
if is_module_available('tokenizer'):
|
| 121 |
-
from tokenizer import TokenizerWrapper, get_tokenizer
|
| 122 |
-
self.tokenizer = get_tokenizer()
|
| 123 |
-
logger.info("Created TokenizerWrapper instance")
|
| 124 |
-
return
|
| 125 |
-
|
| 126 |
-
except Exception as e:
|
| 127 |
-
logger.warning(f"Error initializing original tokenizer: {e}")
|
| 128 |
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
logger.info("Using get_tokenizer() fallback")
|
| 134 |
-
return
|
| 135 |
-
except Exception as e:
|
| 136 |
-
logger.error(f"No tokenizer could be initialized: {e}")
|
| 137 |
-
raise ImportError("Tokenizer initialization failed")
|
| 138 |
-
|
| 139 |
-
def _initialize_model(self):
|
| 140 |
-
"""Initialize the model from service registry or create it directly."""
|
| 141 |
-
max_attempts = 5
|
| 142 |
-
attempt = 0
|
| 143 |
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
|
| 189 |
-
#
|
| 190 |
-
|
| 191 |
-
time.sleep(1)
|
| 192 |
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
# When storing models/objects, make sure we don't create circular references
|
| 197 |
-
if registry.has(MODEL):
|
| 198 |
-
self.model = registry.get(MODEL)
|
| 199 |
-
# Don't add back-references to registry or other objects that might
|
| 200 |
-
# include this adapter, to avoid circular references
|
| 201 |
-
|
| 202 |
-
def _build_init_kwargs(self):
|
| 203 |
-
return {
|
| 204 |
-
"vocab_size": 30522,
|
| 205 |
-
"specialization": "general",
|
| 206 |
-
"dataset_path": None,
|
| 207 |
-
"model_name": "bert-base-uncased",
|
| 208 |
-
"embedding_dim": 768,
|
| 209 |
-
"num_heads": 12,
|
| 210 |
-
"hidden_dim": 768,
|
| 211 |
-
"num_layers": 6,
|
| 212 |
-
"output_size": 768,
|
| 213 |
-
"dropout": 0.1,
|
| 214 |
-
"max_seq_length": 512,
|
| 215 |
-
"pooling_mode": "mean",
|
| 216 |
-
"tokenizer": self.tokenizer
|
| 217 |
-
}
|
| 218 |
-
|
| 219 |
-
def _split_prompt(self, prompt: str) -> Tuple[str, str]:
|
| 220 |
-
"""Return (technical_sentences, general_sentences)."""
|
| 221 |
-
# download punkt if needed
|
| 222 |
-
try:
|
| 223 |
-
nltk.data.find("tokenizers/punkt")
|
| 224 |
-
except LookupError:
|
| 225 |
-
nltk.download("punkt")
|
| 226 |
-
|
| 227 |
-
sents = nltk.sent_tokenize(prompt)
|
| 228 |
-
analyzer = PromptAnalyzer()
|
| 229 |
-
tech_keys = set(analyzer.predefined_topics.get("programming", []))
|
| 230 |
-
tech_list, gen_list = [], []
|
| 231 |
-
for s in sents:
|
| 232 |
-
# simple keyword check
|
| 233 |
-
if any(k in s.lower() for k in tech_keys):
|
| 234 |
-
tech_list.append(s)
|
| 235 |
-
else:
|
| 236 |
-
gen_list.append(s)
|
| 237 |
-
return " ".join(tech_list).strip(), " ".join(gen_list).strip()
|
| 238 |
|
| 239 |
-
def
|
| 240 |
-
"""
|
| 241 |
try:
|
| 242 |
-
|
| 243 |
-
try:
|
| 244 |
-
from model_List import PromptAnalyzer
|
| 245 |
-
analyzer = PromptAnalyzer()
|
| 246 |
-
model_type, confidence = analyzer.analyze_prompt(text_input)
|
| 247 |
-
logger.info(f"PromptAnalyzer selected {model_type} with confidence {confidence:.2f}")
|
| 248 |
-
except Exception as e:
|
| 249 |
-
logger.error(f"Error using PromptAnalyzer: {e}")
|
| 250 |
-
model_type = "model_Custm" # Default to custom model on error
|
| 251 |
-
|
| 252 |
-
# Enhanced generation parameters with strong repetition prevention
|
| 253 |
-
generation_kwargs = {
|
| 254 |
-
'max_length': max_length or 150,
|
| 255 |
-
'temperature': kwargs.get('temperature', 0.7),
|
| 256 |
-
'top_p': kwargs.get('top_p', 0.95),
|
| 257 |
-
'top_k': kwargs.get('top_k', 50),
|
| 258 |
-
'repetition_penalty': kwargs.get('repetition_penalty', 1.3), # Increased from 1.2
|
| 259 |
-
'no_repeat_ngram_size': kwargs.get('no_repeat_ngram_size', 3), # Increased from 2
|
| 260 |
-
'do_sample': kwargs.get('do_sample', True),
|
| 261 |
-
'num_return_sequences': kwargs.get('num_return_sequences', 1),
|
| 262 |
-
'early_stopping': kwargs.get('early_stopping', True),
|
| 263 |
-
'bad_words_ids': kwargs.get('bad_words_ids', None), # Block repetitive phrases
|
| 264 |
-
'min_length': kwargs.get('min_length', 10), # Ensure reasonable response length
|
| 265 |
-
}
|
| 266 |
|
| 267 |
-
#
|
| 268 |
-
|
| 269 |
-
generation_kwargs['penalty_alpha'] = 0.6 # Helps prevent looping in GPT-2
|
| 270 |
-
|
| 271 |
-
# Override with any explicitly provided kwargs
|
| 272 |
-
generation_kwargs.update({k:v for k,v in kwargs.items() if k not in ('prompt', 'context')})
|
| 273 |
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
if "prompt" in sig.parameters:
|
| 306 |
-
return pre.generate(prompt=text_input, **generation_kwargs)
|
| 307 |
-
else:
|
| 308 |
-
# If no prompt parameter, try tokenizing first
|
| 309 |
-
inputs = self.tokenizer(text_input, return_tensors="pt", truncation=True, padding=True)
|
| 310 |
-
return pre.generate(input_ids=inputs.input_ids, **generation_kwargs) # Explicitly pass as input_ids
|
| 311 |
-
else:
|
| 312 |
-
logger.warning("Pretrained model doesn't have generate method")
|
| 313 |
-
except Exception as e:
|
| 314 |
-
logger.error(f"Error using pretrained model: {e}")
|
| 315 |
|
| 316 |
-
#
|
| 317 |
-
if self.
|
| 318 |
try:
|
| 319 |
-
logger.info("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 320 |
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
inputs = self.tokenizer(text_input, return_tensors="pt", truncation=True, padding=True)
|
| 332 |
-
return self.model.generate(input_ids=inputs.input_ids, **generation_kwargs) # Explicitly pass as input_ids
|
| 333 |
-
else:
|
| 334 |
-
logger.error("Model has no generate method")
|
| 335 |
-
# Simple fallback for models without generate
|
| 336 |
-
return f"I'm processing your request about '{text_input[:30]}...'"
|
| 337 |
except Exception as e:
|
| 338 |
-
logger.error(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
|
| 340 |
-
|
| 341 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
|
| 343 |
-
|
| 344 |
-
|
| 345 |
except Exception as e:
|
| 346 |
-
logger.error(f"Error in
|
| 347 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
import json
|
|
|
|
|
|
|
|
|
|
| 4 |
import logging
|
| 5 |
import pydantic # required
|
|
|
|
| 6 |
import importlib.util # required
|
| 7 |
from typing import Dict, Any, Optional, List, Tuple
|
| 8 |
from service_registry import registry, MODEL, PRETRAINED_MODEL, TOKENIZER
|
| 9 |
|
| 10 |
+
# Force low memory usage mode
|
| 11 |
+
os.environ["LOW_MEMORY_MODE"] = "1"
|
| 12 |
+
|
| 13 |
# Log versions and fail fast if missing
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
logger.info(f"Using pydantic v{pydantic.__version__}")
|
|
|
|
| 17 |
|
| 18 |
print(f"Successfully using installed dependencies - pydantic: {pydantic.__version__}, codecarbon: {codecarbon.__version__}")
|
| 19 |
|
| 20 |
+
# MEMORY OPTIMIZATION: Show current memory usage
|
| 21 |
+
def log_memory_usage():
|
| 22 |
+
try:
|
| 23 |
+
import psutil
|
| 24 |
+
process = psutil.Process(os.getpid())
|
| 25 |
+
memory_info = process.memory_info()
|
| 26 |
+
memory_mb = memory_info.rss / 1024 / 1024
|
| 27 |
+
logger.info(f"Current memory usage: {memory_mb:.2f} MB")
|
| 28 |
+
return memory_mb
|
| 29 |
+
except:
|
| 30 |
+
return 0
|
| 31 |
+
|
| 32 |
# Import dependency helpers
|
| 33 |
def is_module_available(module_name):
|
| 34 |
try:
|
|
|
|
| 60 |
return "model_Custm", 0.8
|
| 61 |
return "model_PrTr", 0.6
|
| 62 |
|
| 63 |
+
# MEMORY OPTIMIZATION: Create basic PromptAnalyzer without loading models
|
| 64 |
+
class BasicPromptAnalyzer:
|
| 65 |
+
def __init__(self, **kwargs):
|
| 66 |
+
self.logger = logging.getLogger(__name__)
|
| 67 |
+
self.predefined_topics = {
|
| 68 |
+
"programming": ["python", "java", "code"],
|
| 69 |
+
"general": ["weather", "hello", "chat"]
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
def analyze_prompt(self, prompt: str):
|
| 73 |
+
# Simple keyword-based routing
|
| 74 |
+
prompt_lower = prompt.lower()
|
| 75 |
+
for tech_word in self.predefined_topics.get("programming", []):
|
| 76 |
+
if tech_word in prompt_lower:
|
| 77 |
+
return "model_Custm", 0.8
|
| 78 |
+
return "model_PrTr", 0.6
|
| 79 |
+
|
| 80 |
class WildnerveModelAdapter:
|
| 81 |
+
"""Ultra-lightweight adapter layer for HF inference endpoints."""
|
|
|
|
| 82 |
|
| 83 |
def __init__(self, model_path: str):
|
| 84 |
self.model_path = model_path
|
| 85 |
self.tokenizer = None
|
| 86 |
self.model = None
|
| 87 |
+
self.model_loaded = False
|
| 88 |
+
logger.info(f"Creating adapter with path: {model_path}")
|
| 89 |
|
| 90 |
+
# Safe verification of model file existence
|
| 91 |
+
self._verify_model_files()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
+
def _verify_model_files(self):
|
| 94 |
+
"""Verify model files exist without loading them"""
|
| 95 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 96 |
+
model_files = ["model_Custm.py", "model_PrTr.py"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
+
self.available_models = {}
|
| 99 |
+
for filename in model_files:
|
| 100 |
+
filepath = os.path.join(script_dir, filename)
|
| 101 |
+
if os.path.exists(filepath):
|
| 102 |
+
module_name = filename.replace('.py', '')
|
| 103 |
+
self.available_models[module_name] = filepath
|
| 104 |
+
logger.info(f"Found model file: {filename}")
|
| 105 |
+
|
| 106 |
+
if not self.available_models:
|
| 107 |
+
logger.warning("No model files found - will use stub implementation")
|
| 108 |
+
# Create stub file if needed
|
| 109 |
+
stub_path = os.path.join(script_dir, "model_stub.py")
|
| 110 |
+
if not os.path.exists(stub_path):
|
| 111 |
+
try:
|
| 112 |
+
with open(stub_path, "w") as f:
|
| 113 |
+
f.write("""
|
| 114 |
+
# Minimal stub model
|
| 115 |
+
import torch.nn as nn
|
| 116 |
+
class Wildnerve_tlm01(nn.Module):
|
| 117 |
+
def __init__(self, **kwargs):
|
| 118 |
+
super().__init__()
|
| 119 |
+
self.is_stub = True
|
| 120 |
+
for key, value in kwargs.items():
|
| 121 |
+
setattr(self, key, value)
|
| 122 |
+
def generate(self, prompt=None, **kwargs):
|
| 123 |
+
return f"Stub model response for: {prompt[:30]}..."
|
| 124 |
+
""")
|
| 125 |
+
logger.info("Created stub model file")
|
| 126 |
+
except Exception as e:
|
| 127 |
+
logger.error(f"Failed to create stub model: {e}")
|
| 128 |
+
|
| 129 |
+
def generate(self, text_input, max_length=None, **kwargs):
|
| 130 |
+
"""Generate text - with lazy model loading"""
|
| 131 |
+
try:
|
| 132 |
+
# Try to load model on first use
|
| 133 |
+
if not self.model_loaded:
|
| 134 |
+
self._lazy_load_model()
|
| 135 |
+
|
| 136 |
+
# If we have a model now, use it
|
| 137 |
+
if self.model:
|
| 138 |
+
try:
|
| 139 |
+
logger.info(f"Generating with model: {type(self.model).__name__}")
|
| 140 |
+
return self.model.generate(
|
| 141 |
+
prompt=text_input,
|
| 142 |
+
max_length=max_length,
|
| 143 |
+
**kwargs
|
| 144 |
+
)
|
| 145 |
+
except Exception as e:
|
| 146 |
+
logger.error(f"Model generation error: {e}")
|
| 147 |
+
# Try tokenizer-only response as fallback
|
| 148 |
+
|
| 149 |
+
# If we have a tokenizer but no model, use simple responses
|
| 150 |
+
if self.tokenizer and not self.model:
|
| 151 |
+
# Try to get a slightly better response with tokenizer
|
| 152 |
+
tokenized = self.tokenizer(text_input, return_tensors="pt", truncation=True)
|
| 153 |
+
return f"Processing: {text_input[:50]}..."
|
| 154 |
|
| 155 |
+
# If no model or tokenizer, return simple response
|
| 156 |
+
return f"I've received your input about '{text_input[:30]}...'"
|
|
|
|
| 157 |
|
| 158 |
+
except Exception as e:
|
| 159 |
+
logger.error(f"Error in generate method: {e}")
|
| 160 |
+
return f"An error occurred processing your request: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
+
def _lazy_load_model(self):
|
| 163 |
+
"""Try to load a model on demand, with multiple fallback options"""
|
| 164 |
try:
|
| 165 |
+
logger.info("Attempting to load model on first request")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
|
| 167 |
+
# First initialize tokenizer if not already done
|
| 168 |
+
self._initialize_minimal_tokenizer()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
+
# Try to load model_Custm first
|
| 171 |
+
if "model_Custm" in self.available_models:
|
| 172 |
+
try:
|
| 173 |
+
logger.info("Trying to load model_Custm")
|
| 174 |
+
model_custm_spec = importlib.util.spec_from_file_location(
|
| 175 |
+
"model_Custm",
|
| 176 |
+
self.available_models["model_Custm"]
|
| 177 |
+
)
|
| 178 |
+
model_custm = importlib.util.module_from_spec(model_custm_spec)
|
| 179 |
+
model_custm_spec.loader.exec_module(model_custm)
|
| 180 |
+
|
| 181 |
+
if hasattr(model_custm, "Wildnerve_tlm01"):
|
| 182 |
+
logger.info("Creating Wildnerve_tlm01 from model_Custm")
|
| 183 |
+
model_class = getattr(model_custm, "Wildnerve_tlm01")
|
| 184 |
+
self.model = model_class(
|
| 185 |
+
tokenizer=self.tokenizer,
|
| 186 |
+
vocab_size=50257, # GPT-2 vocab size
|
| 187 |
+
specialization="general",
|
| 188 |
+
embedding_dim=768,
|
| 189 |
+
num_heads=12,
|
| 190 |
+
hidden_dim=768,
|
| 191 |
+
num_layers=2, # Reduced for memory efficiency
|
| 192 |
+
output_size=50257, # Match GPT-2 vocab
|
| 193 |
+
dropout=0.1,
|
| 194 |
+
max_seq_length=128 # Reduced for memory
|
| 195 |
+
)
|
| 196 |
+
logger.info("Successfully created custom model")
|
| 197 |
+
self.model_loaded = True
|
| 198 |
+
return
|
| 199 |
+
except Exception as e:
|
| 200 |
+
logger.error(f"Failed to load model_Custm: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
+
# Try model_PrTr next
|
| 203 |
+
if "model_PrTr" in self.available_models:
|
| 204 |
try:
|
| 205 |
+
logger.info("Trying to load model_PrTr")
|
| 206 |
+
model_prtr_spec = importlib.util.spec_from_file_location(
|
| 207 |
+
"model_PrTr",
|
| 208 |
+
self.available_models["model_PrTr"]
|
| 209 |
+
)
|
| 210 |
+
model_prtr = importlib.util.module_from_spec(model_prtr_spec)
|
| 211 |
+
model_prtr_spec.loader.exec_module(model_prtr)
|
| 212 |
|
| 213 |
+
if hasattr(model_prtr, "Wildnerve_tlm01"):
|
| 214 |
+
logger.info("Creating Wildnerve_tlm01 from model_PrTr")
|
| 215 |
+
model_class = getattr(model_prtr, "Wildnerve_tlm01")
|
| 216 |
+
self.model = model_class(
|
| 217 |
+
tokenizer=self.tokenizer,
|
| 218 |
+
model_name="gpt2"
|
| 219 |
+
)
|
| 220 |
+
logger.info("Successfully created pretrained model")
|
| 221 |
+
self.model_loaded = True
|
| 222 |
+
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
except Exception as e:
|
| 224 |
+
logger.error(f"Failed to load model_PrTr: {e}")
|
| 225 |
+
|
| 226 |
+
# Try stub model as last resort
|
| 227 |
+
try:
|
| 228 |
+
logger.info("Trying to load model_stub")
|
| 229 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 230 |
+
stub_path = os.path.join(script_dir, "model_stub.py")
|
| 231 |
+
|
| 232 |
+
if os.path.exists(stub_path):
|
| 233 |
+
stub_spec = importlib.util.spec_from_file_location("model_stub", stub_path)
|
| 234 |
+
model_stub = importlib.util.module_from_spec(stub_spec)
|
| 235 |
+
stub_spec.loader.exec_module(model_stub)
|
| 236 |
|
| 237 |
+
if hasattr(model_stub, "Wildnerve_tlm01"):
|
| 238 |
+
logger.info("Creating stub model")
|
| 239 |
+
model_class = getattr(model_stub, "Wildnerve_tlm01")
|
| 240 |
+
self.model = model_class(
|
| 241 |
+
tokenizer=self.tokenizer,
|
| 242 |
+
specialization="stub"
|
| 243 |
+
)
|
| 244 |
+
logger.warning("Using STUB model - limited functionality")
|
| 245 |
+
self.model_loaded = True
|
| 246 |
+
return
|
| 247 |
+
except Exception as e:
|
| 248 |
+
logger.error(f"Failed to load stub model: {e}")
|
| 249 |
|
| 250 |
+
logger.error("All model loading attempts failed")
|
| 251 |
+
|
| 252 |
except Exception as e:
|
| 253 |
+
logger.error(f"Error in _lazy_load_model: {e}")
|
| 254 |
+
finally:
|
| 255 |
+
# Always mark as loaded to avoid repeated attempts
|
| 256 |
+
self.model_loaded = True
|
| 257 |
+
|
| 258 |
+
def _initialize_minimal_tokenizer(self):
|
| 259 |
+
"""Initialize just the tokenizer, not the model"""
|
| 260 |
+
try:
|
| 261 |
+
from transformers import AutoTokenizer
|
| 262 |
+
self.tokenizer = AutoTokenizer.from_pretrained("gpt2", use_fast=True)
|
| 263 |
+
logger.info("Initialized minimal tokenizer")
|
| 264 |
+
except Exception as e:
|
| 265 |
+
logger.error(f"Failed to initialize tokenizer: {e}")
|
| 266 |
+
|
| 267 |
+
# Add import for inspect at the top
|
| 268 |
+
import inspect
|
handler.py
CHANGED
|
@@ -16,7 +16,43 @@ logging.basicConfig(
|
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
|
| 18 |
# --- DEBUG: confirm correct handler.py is loaded ---
|
| 19 |
-
print("DEBUG: using Wildnerve-tlm_HF/handler.py —
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
# Safe config import that won't fail during initialization
|
| 22 |
try:
|
|
@@ -33,12 +69,28 @@ except Exception as e:
|
|
| 33 |
}
|
| 34 |
}
|
| 35 |
|
| 36 |
-
#
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
# Safely check for required packages without crashing
|
| 44 |
try:
|
|
@@ -74,6 +126,7 @@ try:
|
|
| 74 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 75 |
sys.path.insert(0, script_dir)
|
| 76 |
|
|
|
|
| 77 |
from adapter_layer import WildnerveModelAdapter
|
| 78 |
logger.info("Successfully imported adapter_layer module")
|
| 79 |
|
|
@@ -106,61 +159,57 @@ except Exception as e:
|
|
| 106 |
|
| 107 |
class EndpointHandler:
|
| 108 |
def __init__(self, model_dir: str = None):
|
| 109 |
-
#
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 117 |
-
sys.path.insert(0, script_dir)
|
| 118 |
-
|
| 119 |
-
from adapter_layer import WildnerveModelAdapter
|
| 120 |
-
logger.info("Successfully imported adapter_layer module")
|
| 121 |
-
except ImportError as e:
|
| 122 |
-
logger.error(f"Could not import adapter_layer: {e}")
|
| 123 |
-
# Create a minimal placeholder adapter class
|
| 124 |
-
class WildnerveModelAdapter:
|
| 125 |
-
def __init__(self, model_path: str =""):
|
| 126 |
-
self.model_path = model_path
|
| 127 |
-
logger.info(f"Using fallback WildnerveModelAdapter with path: {model_path}")
|
| 128 |
-
|
| 129 |
-
def generate(self, text_input, **kwargs):
|
| 130 |
-
return f"Model adapter unavailable. Received input: {text_input[:30]}..."
|
| 131 |
-
|
| 132 |
-
# supply model_dir as the adapter's model_path
|
| 133 |
-
self.adapter = WildnerveModelAdapter(model_dir or "")
|
| 134 |
-
except Exception as e:
|
| 135 |
-
logger.error(f"Adapter init failed: {e}", exc_info=True)
|
| 136 |
-
self.init_error = str(e)
|
| 137 |
-
self.adapter = None
|
| 138 |
-
|
| 139 |
def __call__(self, data, parameters=None):
|
| 140 |
-
|
| 141 |
-
|
|
|
|
| 142 |
|
| 143 |
# Extract prompt text
|
| 144 |
text = data.get("inputs") if isinstance(data, dict) else str(data)
|
| 145 |
|
| 146 |
try:
|
| 147 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
out = self.adapter.generate(text, **(parameters or {}))
|
| 149 |
|
| 150 |
-
# Ensure output is
|
| 151 |
-
if torch.is_tensor(out):
|
| 152 |
-
# Convert tensor to string
|
| 153 |
-
logger.warning("Model returned a tensor instead of text, attempting to convert")
|
| 154 |
-
if out.dim() > 0 and hasattr(self.adapter, "tokenizer"):
|
| 155 |
-
out = self.adapter.tokenizer.decode(out.cpu().squeeze(), skip_special_tokens=True)
|
| 156 |
-
else:
|
| 157 |
-
out = str(out)
|
| 158 |
-
|
| 159 |
-
# Final check to ensure string output
|
| 160 |
if not isinstance(out, str):
|
| 161 |
out = str(out)
|
| 162 |
|
| 163 |
return [{"generated_text": out}]
|
| 164 |
except Exception as e:
|
| 165 |
logger.error(f"Generation error: {e}", exc_info=True)
|
| 166 |
-
return [{"generated_text": f"Error: {e}"}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
|
| 18 |
# --- DEBUG: confirm correct handler.py is loaded ---
|
| 19 |
+
print("DEBUG: using Wildnerve-tlm_HF/handler.py — v7 with file verification")
|
| 20 |
+
|
| 21 |
+
# Set aggressive memory optimization
|
| 22 |
+
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:32"
|
| 23 |
+
os.environ["TRANSFORMERS_OFFLINE"] = "1" # Prevent downloading models
|
| 24 |
+
os.environ["LOW_MEMORY_MODE"] = "1" # Custom flag for our code to detect
|
| 25 |
+
|
| 26 |
+
# VERIFY CRITICAL FILES: Check required model files exist before proceeding
|
| 27 |
+
def verify_required_files():
|
| 28 |
+
"""Verify that critical model files exist without importing them"""
|
| 29 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 30 |
+
required_files = [
|
| 31 |
+
"model_Custm.py",
|
| 32 |
+
"model_PrTr.py",
|
| 33 |
+
"adapter_layer.py",
|
| 34 |
+
"tokenizer.py"
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
missing_files = []
|
| 38 |
+
for filename in required_files:
|
| 39 |
+
filepath = os.path.join(script_dir, filename)
|
| 40 |
+
if not os.path.exists(filepath):
|
| 41 |
+
missing_files.append(filename)
|
| 42 |
+
logger.warning(f"Required file not found: {filename}")
|
| 43 |
+
else:
|
| 44 |
+
file_size = os.path.getsize(filepath) / 1024 # KB
|
| 45 |
+
logger.info(f"Found required file: {filename} ({file_size:.1f} KB)")
|
| 46 |
+
|
| 47 |
+
if missing_files:
|
| 48 |
+
logger.error(f"Missing required files: {', '.join(missing_files)}")
|
| 49 |
+
return False
|
| 50 |
+
return True
|
| 51 |
+
|
| 52 |
+
# Verify required files exist but don't load them yet
|
| 53 |
+
critical_files_verified = verify_required_files()
|
| 54 |
+
if not critical_files_verified:
|
| 55 |
+
logger.warning("Some critical model files are missing - expect errors during request handling")
|
| 56 |
|
| 57 |
# Safe config import that won't fail during initialization
|
| 58 |
try:
|
|
|
|
| 69 |
}
|
| 70 |
}
|
| 71 |
|
| 72 |
+
# MEMORY OPTIMIZATION: Avoid loading pretrained models during init
|
| 73 |
+
os.environ["TRANSFORMERS_OFFLINE"] = "1" # Prevent downloading models
|
| 74 |
+
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128" # Limit CUDA allocations
|
| 75 |
+
|
| 76 |
+
# Add safeguard for memory usage
|
| 77 |
+
def check_memory_usage():
|
| 78 |
+
"""Check memory usage and log warning if too high"""
|
| 79 |
+
try:
|
| 80 |
+
import psutil
|
| 81 |
+
process = psutil.Process(os.getpid())
|
| 82 |
+
memory_info = process.memory_info()
|
| 83 |
+
memory_mb = memory_info.rss / 1024 / 1024
|
| 84 |
+
logger.info(f"Current memory usage: {memory_mb:.2f} MB")
|
| 85 |
+
if memory_mb > 1800: # 1.8 GB (90% of limit)
|
| 86 |
+
logger.warning(f"Memory usage critical: {memory_mb:.2f} MB. Consider reducing model size.")
|
| 87 |
+
return memory_mb
|
| 88 |
+
except Exception as e:
|
| 89 |
+
logger.warning(f"Error checking memory usage: {e}")
|
| 90 |
+
return 0
|
| 91 |
+
|
| 92 |
+
# Check memory at startup
|
| 93 |
+
check_memory_usage()
|
| 94 |
|
| 95 |
# Safely check for required packages without crashing
|
| 96 |
try:
|
|
|
|
| 126 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 127 |
sys.path.insert(0, script_dir)
|
| 128 |
|
| 129 |
+
# MEMORY OPTIMIZATION: Import but don't initialize yet
|
| 130 |
from adapter_layer import WildnerveModelAdapter
|
| 131 |
logger.info("Successfully imported adapter_layer module")
|
| 132 |
|
|
|
|
| 159 |
|
| 160 |
class EndpointHandler:
|
| 161 |
def __init__(self, model_dir: str = None):
|
| 162 |
+
# Do absolute minimal initialization here
|
| 163 |
+
self.model_dir = model_dir
|
| 164 |
+
self.adapter = None
|
| 165 |
+
self.initialized = False
|
| 166 |
+
self.critical_files_verified = critical_files_verified
|
| 167 |
+
logger.info(f"Handler init with minimal footprint: {model_dir}")
|
| 168 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
def __call__(self, data, parameters=None):
|
| 170 |
+
# Lazy initialization on first request
|
| 171 |
+
if not self.initialized:
|
| 172 |
+
self._initialize_on_demand()
|
| 173 |
|
| 174 |
# Extract prompt text
|
| 175 |
text = data.get("inputs") if isinstance(data, dict) else str(data)
|
| 176 |
|
| 177 |
try:
|
| 178 |
+
# Warning response if critical files are missing
|
| 179 |
+
if not self.critical_files_verified:
|
| 180 |
+
logger.warning("Attempting to process request with missing critical files")
|
| 181 |
+
return [{
|
| 182 |
+
"generated_text": "System initialization issue: Some model files appear to be missing. " +
|
| 183 |
+
f"Processing your request about '{text[:30]}...' with limited functionality."
|
| 184 |
+
}]
|
| 185 |
+
|
| 186 |
+
# Simple response for first call
|
| 187 |
+
if not self.adapter:
|
| 188 |
+
logger.info("Using simple text response (no adapter)")
|
| 189 |
+
return [{"generated_text": f"Processing your request about '{text[:30]}...'"}]
|
| 190 |
+
|
| 191 |
+
# Generate response with adapter if available
|
| 192 |
out = self.adapter.generate(text, **(parameters or {}))
|
| 193 |
|
| 194 |
+
# Ensure output is valid string
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
if not isinstance(out, str):
|
| 196 |
out = str(out)
|
| 197 |
|
| 198 |
return [{"generated_text": out}]
|
| 199 |
except Exception as e:
|
| 200 |
logger.error(f"Generation error: {e}", exc_info=True)
|
| 201 |
+
return [{"generated_text": f"Error processing your request: {str(e)}"}]
|
| 202 |
+
|
| 203 |
+
def _initialize_on_demand(self):
|
| 204 |
+
"""Initialize adapter when first needed"""
|
| 205 |
+
try:
|
| 206 |
+
logger.info("Performing lazy initialization on first request")
|
| 207 |
+
|
| 208 |
+
# Import with minimal dependencies
|
| 209 |
+
from adapter_layer import WildnerveModelAdapter
|
| 210 |
+
self.adapter = WildnerveModelAdapter(self.model_dir or "")
|
| 211 |
+
self.initialized = True
|
| 212 |
+
logger.info("Adapter initialized successfully")
|
| 213 |
+
except Exception as e:
|
| 214 |
+
logger.error(f"Error initializing adapter: {e}", exc_info=True)
|
| 215 |
+
# Continue without adapter, we'll return simple responses
|
model_PrTr.py
CHANGED
|
@@ -100,7 +100,8 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
|
|
| 100 |
from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
| 101 |
|
| 102 |
# Initialize the model and tokenizer
|
| 103 |
-
self.
|
|
|
|
| 104 |
|
| 105 |
# Ensure proper tokenizer setup for GPT-2
|
| 106 |
if tokenizer is not None:
|
|
@@ -115,10 +116,10 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
|
|
| 115 |
self.tokenizer.pad_token = self.tokenizer.eos_token
|
| 116 |
self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
|
| 117 |
|
| 118 |
-
logger.info(f"Successfully
|
| 119 |
|
| 120 |
except Exception as e:
|
| 121 |
-
logger.error(f"Error
|
| 122 |
raise
|
| 123 |
|
| 124 |
# Register this model instance in the registry by specialization
|
|
@@ -129,6 +130,10 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
|
|
| 129 |
registry.register(PRETRAINED_MODEL, self, overwrite=True)
|
| 130 |
logger.info("Registered GPT-2 model as pretrained model")
|
| 131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
# Replace the old forward method with GPT-2 specific implementation
|
| 133 |
def forward(self, src: torch.Tensor, tgt: Optional[torch.Tensor] = None,
|
| 134 |
src_key_padding_mask: Optional[torch.Tensor] = None,
|
|
@@ -136,6 +141,7 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
|
|
| 136 |
return_sequence: bool = False,
|
| 137 |
**kwargs) -> torch.Tensor:
|
| 138 |
|
|
|
|
| 139 |
# Use GPT-2 directly for generation
|
| 140 |
outputs = self.gpt2_model(src, **kwargs)
|
| 141 |
return outputs.logits
|
|
@@ -143,6 +149,7 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
|
|
| 143 |
# Update generate to handle both direct prompt and tokenized input
|
| 144 |
def generate(self, prompt=None, input_ids=None, max_length=None, **kwargs):
|
| 145 |
"""Generate text using the GPT-2 model"""
|
|
|
|
| 146 |
try:
|
| 147 |
# Try to use adapter_layer.generate if available (consolidate generation paths)
|
| 148 |
adapter_layer = registry.get("adapter_layer")
|
|
@@ -204,6 +211,7 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
|
|
| 204 |
|
| 205 |
def generate_streaming(self, prompt=None, input_ids=None, **kwargs):
|
| 206 |
"""Generate tokens one by one in streaming fashion"""
|
|
|
|
| 207 |
try:
|
| 208 |
# Handle either text or tokenized input
|
| 209 |
if prompt is not None and input_ids is None:
|
|
|
|
| 100 |
from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
| 101 |
|
| 102 |
# Initialize the model and tokenizer
|
| 103 |
+
self.model_name = model_name
|
| 104 |
+
self.gpt2_model = None # Will be loaded on first use
|
| 105 |
|
| 106 |
# Ensure proper tokenizer setup for GPT-2
|
| 107 |
if tokenizer is not None:
|
|
|
|
| 116 |
self.tokenizer.pad_token = self.tokenizer.eos_token
|
| 117 |
self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
|
| 118 |
|
| 119 |
+
logger.info(f"Successfully initialized GPT-2 model: {model_name}")
|
| 120 |
|
| 121 |
except Exception as e:
|
| 122 |
+
logger.error(f"Error initializing GPT-2 model: {e}", exc_info=True)
|
| 123 |
raise
|
| 124 |
|
| 125 |
# Register this model instance in the registry by specialization
|
|
|
|
| 130 |
registry.register(PRETRAINED_MODEL, self, overwrite=True)
|
| 131 |
logger.info("Registered GPT-2 model as pretrained model")
|
| 132 |
|
| 133 |
+
def _ensure_model_loaded(self):
|
| 134 |
+
if self.gpt2_model is None:
|
| 135 |
+
self.gpt2_model = GPT2LMHeadModel.from_pretrained(self.model_name)
|
| 136 |
+
|
| 137 |
# Replace the old forward method with GPT-2 specific implementation
|
| 138 |
def forward(self, src: torch.Tensor, tgt: Optional[torch.Tensor] = None,
|
| 139 |
src_key_padding_mask: Optional[torch.Tensor] = None,
|
|
|
|
| 141 |
return_sequence: bool = False,
|
| 142 |
**kwargs) -> torch.Tensor:
|
| 143 |
|
| 144 |
+
self._ensure_model_loaded() # Load model only when needed
|
| 145 |
# Use GPT-2 directly for generation
|
| 146 |
outputs = self.gpt2_model(src, **kwargs)
|
| 147 |
return outputs.logits
|
|
|
|
| 149 |
# Update generate to handle both direct prompt and tokenized input
|
| 150 |
def generate(self, prompt=None, input_ids=None, max_length=None, **kwargs):
|
| 151 |
"""Generate text using the GPT-2 model"""
|
| 152 |
+
self._ensure_model_loaded() # Load model only when needed
|
| 153 |
try:
|
| 154 |
# Try to use adapter_layer.generate if available (consolidate generation paths)
|
| 155 |
adapter_layer = registry.get("adapter_layer")
|
|
|
|
| 211 |
|
| 212 |
def generate_streaming(self, prompt=None, input_ids=None, **kwargs):
|
| 213 |
"""Generate tokens one by one in streaming fashion"""
|
| 214 |
+
self._ensure_model_loaded() # Load model only when needed
|
| 215 |
try:
|
| 216 |
# Handle either text or tokenized input
|
| 217 |
if prompt is not None and input_ids is None:
|
model_stub.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Minimal model stub that can be used if the real model files aren't found.
|
| 3 |
+
Uses near-zero memory while still providing the expected interface.
|
| 4 |
+
"""
|
| 5 |
+
import os
|
| 6 |
+
import logging
|
| 7 |
+
import torch.nn as nn
|
| 8 |
+
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
logger.warning("Using minimal model stub - EMERGENCY FALLBACK MODE")
|
| 11 |
+
|
| 12 |
+
class Wildnerve_tlm01(nn.Module):
|
| 13 |
+
"""Ultra-minimal model implementation that uses almost no memory"""
|
| 14 |
+
|
| 15 |
+
def __init__(self, **kwargs):
|
| 16 |
+
"""Initialize with minimal footprint - store kwargs for compatibility"""
|
| 17 |
+
super().__init__()
|
| 18 |
+
self.is_stub = True
|
| 19 |
+
|
| 20 |
+
# Store passed parameters without using them
|
| 21 |
+
for key, value in kwargs.items():
|
| 22 |
+
setattr(self, key, value)
|
| 23 |
+
|
| 24 |
+
# Important: store tokenizer if provided
|
| 25 |
+
self.tokenizer = kwargs.get('tokenizer')
|
| 26 |
+
self.specialization = kwargs.get('specialization', 'general')
|
| 27 |
+
|
| 28 |
+
logger.warning(f"Created stub model with specialization: {self.specialization}")
|
| 29 |
+
|
| 30 |
+
def forward(self, input_ids=None, attention_mask=None, **kwargs):
|
| 31 |
+
"""Minimal forward implementation that returns empty tensor"""
|
| 32 |
+
batch_size = 1
|
| 33 |
+
seq_length = 10
|
| 34 |
+
vocab_size = getattr(self, 'vocab_size', 50257)
|
| 35 |
+
|
| 36 |
+
if input_ids is not None:
|
| 37 |
+
batch_size = input_ids.shape[0]
|
| 38 |
+
seq_length = input_ids.shape[1]
|
| 39 |
+
|
| 40 |
+
import torch
|
| 41 |
+
# Return zeros - uses minimal memory
|
| 42 |
+
return torch.zeros((batch_size, seq_length, vocab_size))
|
| 43 |
+
|
| 44 |
+
def generate(self, prompt=None, **kwargs):
|
| 45 |
+
"""Return a templated response"""
|
| 46 |
+
if prompt:
|
| 47 |
+
return f"[STUB MODEL] I've received your request about '{prompt[:50]}...'"
|
| 48 |
+
return "[STUB MODEL] I've received your request"
|
| 49 |
+
|
| 50 |
+
def generate_streaming(self, prompt=None, **kwargs):
|
| 51 |
+
"""Simulate streaming response"""
|
| 52 |
+
import time
|
| 53 |
+
response = self.generate(prompt)
|
| 54 |
+
words = response.split()
|
| 55 |
+
|
| 56 |
+
for word in words:
|
| 57 |
+
yield word + " "
|
| 58 |
+
time.sleep(0.05) # Simulate streaming delay
|
service_registry.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
"""
|
| 2 |
-
|
| 3 |
"""
|
| 4 |
import logging
|
| 5 |
import traceback
|
|
@@ -7,17 +7,17 @@ from typing import Any, Dict, Optional
|
|
| 7 |
|
| 8 |
logger = logging.getLogger(__name__)
|
| 9 |
|
| 10 |
-
# Constants used as keys
|
| 11 |
-
MODEL = "model"
|
| 12 |
-
PRETRAINED_MODEL = "pretrained_model"
|
| 13 |
TOKENIZER = "tokenizer"
|
| 14 |
MODEL_MANAGER = "model_manager"
|
| 15 |
COMMUNICATOR = "communicator"
|
| 16 |
PIPELINE = "pipeline"
|
| 17 |
-
TRANSFORMER = "transformer"
|
| 18 |
|
| 19 |
class ServiceRegistry:
|
| 20 |
-
"""A
|
| 21 |
|
| 22 |
def __init__(self):
|
| 23 |
self._services = {}
|
|
@@ -34,7 +34,7 @@ class ServiceRegistry:
|
|
| 34 |
def get(self, key: str) -> Optional[Any]:
|
| 35 |
"""Get a service by its key"""
|
| 36 |
if key not in self._services:
|
| 37 |
-
|
| 38 |
return None
|
| 39 |
|
| 40 |
return self._services[key]
|
|
@@ -51,130 +51,5 @@ class ServiceRegistry:
|
|
| 51 |
registry = ServiceRegistry()
|
| 52 |
|
| 53 |
def ensure_models_registered():
|
| 54 |
-
"""
|
| 55 |
-
# First make sure we have a CUSTOM model (Wildnerve-tlm01_Hybrid_Model)
|
| 56 |
-
if not registry.has(MODEL):
|
| 57 |
-
logger.info("No custom model in registry, registering Wildnerve-tlm01_Hybrid_Model")
|
| 58 |
-
try:
|
| 59 |
-
import os, importlib.util
|
| 60 |
-
|
| 61 |
-
# Find model_Custm.py in the same directory as this file
|
| 62 |
-
this_dir = os.path.dirname(os.path.abspath(__file__))
|
| 63 |
-
model_path = os.path.join(this_dir, "model_Custm.py")
|
| 64 |
-
|
| 65 |
-
# Add more debug logging
|
| 66 |
-
logger.info(f"Model path exists: {os.path.exists(model_path)}")
|
| 67 |
-
logger.info(f"Model directory content: {os.listdir(os.path.dirname(model_path))}")
|
| 68 |
-
|
| 69 |
-
if os.path.exists(model_path):
|
| 70 |
-
# Dynamic import of model_Custm.py for Wildnerve-tlm01_Hybrid_Model
|
| 71 |
-
spec = importlib.util.spec_from_file_location("model_custm", model_path)
|
| 72 |
-
model_module = importlib.util.module_from_spec(spec)
|
| 73 |
-
spec.loader.exec_module(model_module)
|
| 74 |
-
|
| 75 |
-
# Get the model class for Wildnerve-tlm01_Hybrid_Model
|
| 76 |
-
if hasattr(model_module, "Wildnerve_tlm01"):
|
| 77 |
-
from tokenizer import TokenizerWrapper
|
| 78 |
-
|
| 79 |
-
# Create tokenizer and model
|
| 80 |
-
tok = TokenizerWrapper()
|
| 81 |
-
model_class = getattr(model_module, "Wildnerve_tlm01")
|
| 82 |
-
model = model_class(
|
| 83 |
-
vocab_size=50257, # Updated to GPT-2 vocab size
|
| 84 |
-
specialization="general",
|
| 85 |
-
dataset_path=None,
|
| 86 |
-
model_name="gpt2", # Changed from bert-base-uncased
|
| 87 |
-
embedding_dim=768,
|
| 88 |
-
num_heads=12,
|
| 89 |
-
hidden_dim=768,
|
| 90 |
-
num_layers=2,
|
| 91 |
-
output_size=50257, # Match GPT-2 vocab size
|
| 92 |
-
dropout=0.1,
|
| 93 |
-
max_seq_length=128,
|
| 94 |
-
pooling_mode="last", # GPT-2 typically uses last token
|
| 95 |
-
tokenizer=tok
|
| 96 |
-
)
|
| 97 |
-
|
| 98 |
-
# Register both tokenizer and the Wildnerve-tlm01_Hybrid_Model
|
| 99 |
-
registry.register(TOKENIZER, tok, overwrite=True)
|
| 100 |
-
registry.register(MODEL, model, overwrite=True)
|
| 101 |
-
logger.info("Successfully registered Wildnerve-tlm01_Hybrid_Model as MODEL")
|
| 102 |
-
return True
|
| 103 |
-
|
| 104 |
-
logger.error(f"model_Custm.py not found at {model_path}")
|
| 105 |
-
return False
|
| 106 |
-
|
| 107 |
-
except Exception as e:
|
| 108 |
-
# More detailed error logging
|
| 109 |
-
logger.error(f"Failed to register Wildnerve-tlm01_Hybrid_Model: {e}")
|
| 110 |
-
logger.error(f"Exception details: {type(e).__name__}")
|
| 111 |
-
logger.error(f"Exception traceback: {traceback.format_exc()}")
|
| 112 |
-
return False
|
| 113 |
-
|
| 114 |
-
# Then check if we have a GPT-2 PRETRAINED model
|
| 115 |
-
if not registry.has(PRETRAINED_MODEL):
|
| 116 |
-
logger.info("No GPT-2 model in registry, registering GPT-2")
|
| 117 |
-
try:
|
| 118 |
-
import os, importlib.util
|
| 119 |
-
# Import required modules at this scope
|
| 120 |
-
try:
|
| 121 |
-
from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
| 122 |
-
except ImportError:
|
| 123 |
-
logger.error("Failed to import required GPT-2 modules")
|
| 124 |
-
return False
|
| 125 |
-
|
| 126 |
-
# Find model_PrTr.py in the same directory as this file
|
| 127 |
-
this_dir = os.path.dirname(os.path.abspath(__file__))
|
| 128 |
-
model_path = os.path.join(this_dir, "model_PrTr.py")
|
| 129 |
-
|
| 130 |
-
if os.path.exists(model_path):
|
| 131 |
-
# Dynamic import of model_PrTr.py
|
| 132 |
-
spec = importlib.util.spec_from_file_location("model_prtr", model_path)
|
| 133 |
-
model_module = importlib.util.module_from_spec(spec)
|
| 134 |
-
spec.loader.exec_module(model_module)
|
| 135 |
-
|
| 136 |
-
# Get GPT-2 wrapper class
|
| 137 |
-
model_class = None
|
| 138 |
-
if hasattr(model_module, "PretrainedTransformer"):
|
| 139 |
-
model_class = getattr(model_module, "PretrainedTransformer")
|
| 140 |
-
elif hasattr(model_module, "Wildnerve_tlm01"):
|
| 141 |
-
model_class = getattr(model_module, "Wildnerve_tlm01")
|
| 142 |
-
|
| 143 |
-
if model_class:
|
| 144 |
-
# Get tokenizer first
|
| 145 |
-
tok = registry.get(TOKENIZER)
|
| 146 |
-
if not tok:
|
| 147 |
-
try:
|
| 148 |
-
# Create GPT-2 tokenizer
|
| 149 |
-
tok = GPT2Tokenizer.from_pretrained("gpt2")
|
| 150 |
-
if tok.pad_token_id is None:
|
| 151 |
-
tok.pad_token = tok.eos_token
|
| 152 |
-
tok.pad_token_id = tok.eos_token_id
|
| 153 |
-
registry.register(TOKENIZER, tok, overwrite=True)
|
| 154 |
-
logger.info("Created GPT-2 tokenizer directly")
|
| 155 |
-
except Exception as e:
|
| 156 |
-
logger.error(f"Failed to create GPT-2 tokenizer: {e}")
|
| 157 |
-
return False
|
| 158 |
-
|
| 159 |
-
# Create GPT-2 model instance
|
| 160 |
-
model = model_class(
|
| 161 |
-
model_name="gpt2", # Explicitly use gpt2
|
| 162 |
-
tokenizer=tok
|
| 163 |
-
)
|
| 164 |
-
|
| 165 |
-
# Register as GPT-2 pretrained model
|
| 166 |
-
registry.register(PRETRAINED_MODEL, model, overwrite=True)
|
| 167 |
-
logger.info("Successfully registered GPT-2 as PRETRAINED_MODEL")
|
| 168 |
-
return True
|
| 169 |
-
|
| 170 |
-
logger.error(f"model_PrTr.py not found at {model_path}")
|
| 171 |
-
|
| 172 |
-
except Exception as e:
|
| 173 |
-
logger.error(f"Failed to register GPT-2 model: {e}")
|
| 174 |
-
logger.error(f"Exception details: {type(e).__name__}")
|
| 175 |
-
logger.error(f"Exception traceback: {traceback.format_exc()}")
|
| 176 |
-
|
| 177 |
return True
|
| 178 |
-
|
| 179 |
-
# Execute this during module import to ensure models are registered
|
| 180 |
-
ensure_models_registered()
|
|
|
|
| 1 |
"""
|
| 2 |
+
Minimal service registry for dependency injection
|
| 3 |
"""
|
| 4 |
import logging
|
| 5 |
import traceback
|
|
|
|
| 7 |
|
| 8 |
logger = logging.getLogger(__name__)
|
| 9 |
|
| 10 |
+
# Constants used as keys
|
| 11 |
+
MODEL = "model"
|
| 12 |
+
PRETRAINED_MODEL = "pretrained_model"
|
| 13 |
TOKENIZER = "tokenizer"
|
| 14 |
MODEL_MANAGER = "model_manager"
|
| 15 |
COMMUNICATOR = "communicator"
|
| 16 |
PIPELINE = "pipeline"
|
| 17 |
+
TRANSFORMER = "transformer"
|
| 18 |
|
| 19 |
class ServiceRegistry:
|
| 20 |
+
"""A minimal service registry that avoids loading heavy models"""
|
| 21 |
|
| 22 |
def __init__(self):
|
| 23 |
self._services = {}
|
|
|
|
| 34 |
def get(self, key: str) -> Optional[Any]:
|
| 35 |
"""Get a service by its key"""
|
| 36 |
if key not in self._services:
|
| 37 |
+
# Don't log warning to avoid excessive logs
|
| 38 |
return None
|
| 39 |
|
| 40 |
return self._services[key]
|
|
|
|
| 51 |
registry = ServiceRegistry()
|
| 52 |
|
| 53 |
def ensure_models_registered():
|
| 54 |
+
"""Placeholder function - don't actually register models at startup"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
return True
|
|
|
|
|
|
|
|
|
smartHybridAttention.py
CHANGED
|
@@ -177,9 +177,8 @@ class SmartHybridAttention(nn.Module):
|
|
| 177 |
# Ensure int type for memory tokens
|
| 178 |
self.memory_tokens = int(memory_tokens) if isinstance(memory_tokens, (int, float)) else 32
|
| 179 |
|
| 180 |
-
#
|
| 181 |
-
self.
|
| 182 |
-
nn.init.normal_(self.persistent_memory, mean=0.0, std=0.02)
|
| 183 |
|
| 184 |
# Projections
|
| 185 |
self.q_proj = nn.Linear(self.dim, self.dim)
|
|
@@ -232,6 +231,12 @@ class SmartHybridAttention(nn.Module):
|
|
| 232 |
except:
|
| 233 |
return {}
|
| 234 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
def _create_sliding_window_mask(
|
| 236 |
self,
|
| 237 |
seq_len: int,
|
|
@@ -304,6 +309,7 @@ class SmartHybridAttention(nn.Module):
|
|
| 304 |
) -> torch.Tensor:
|
| 305 |
"""Apply attention with persistent memory tokens for long-range context.
|
| 306 |
Returns: Output tensor after attention [seq_len, batch, dim]"""
|
|
|
|
| 307 |
seq_len, batch_size, _ = query.size()
|
| 308 |
|
| 309 |
# Expand memory tokens to batch size
|
|
|
|
| 177 |
# Ensure int type for memory tokens
|
| 178 |
self.memory_tokens = int(memory_tokens) if isinstance(memory_tokens, (int, float)) else 32
|
| 179 |
|
| 180 |
+
# Delayed initialization for memory parameter
|
| 181 |
+
self._persistent_memory_initialized = False
|
|
|
|
| 182 |
|
| 183 |
# Projections
|
| 184 |
self.q_proj = nn.Linear(self.dim, self.dim)
|
|
|
|
| 231 |
except:
|
| 232 |
return {}
|
| 233 |
|
| 234 |
+
def _init_memory(self):
|
| 235 |
+
if not self._persistent_memory_initialized:
|
| 236 |
+
self.persistent_memory = nn.Parameter(torch.zeros(self.memory_tokens, 1, self.dim))
|
| 237 |
+
nn.init.normal_(self.persistent_memory, mean=0.0, std=0.02)
|
| 238 |
+
self._persistent_memory_initialized = True
|
| 239 |
+
|
| 240 |
def _create_sliding_window_mask(
|
| 241 |
self,
|
| 242 |
seq_len: int,
|
|
|
|
| 309 |
) -> torch.Tensor:
|
| 310 |
"""Apply attention with persistent memory tokens for long-range context.
|
| 311 |
Returns: Output tensor after attention [seq_len, batch, dim]"""
|
| 312 |
+
self._init_memory() # Initialize memory only when needed
|
| 313 |
seq_len, batch_size, _ = query.size()
|
| 314 |
|
| 315 |
# Expand memory tokens to batch size
|
tokenizer.py
CHANGED
|
@@ -1,86 +1,14 @@
|
|
| 1 |
-
# Tokenizer Wrapper Module
|
| 2 |
import os
|
| 3 |
-
import
|
| 4 |
import logging
|
| 5 |
-
import
|
| 6 |
-
from typing import List, Union, Dict, Optional, Tuple, Any
|
| 7 |
-
from transformers import AutoTokenizer, PreTrainedTokenizerBase, BertTokenizer
|
| 8 |
-
from pathlib import Path
|
| 9 |
-
from functools import lru_cache
|
| 10 |
-
import importlib.util
|
| 11 |
-
|
| 12 |
-
# Check if sentencepiece is available but don't crash if not
|
| 13 |
-
SP_AVAILABLE = importlib.util.find_spec("sentencepiece") is not None
|
| 14 |
-
if SP_AVAILABLE:
|
| 15 |
-
import sentencepiece as spm
|
| 16 |
-
else:
|
| 17 |
-
logging.warning("sentencepiece not available; some tokenizer features will be limited")
|
| 18 |
-
|
| 19 |
-
from config import app_config
|
| 20 |
-
from service_registry import registry, TOKENIZER
|
| 21 |
|
| 22 |
logger = logging.getLogger(__name__)
|
| 23 |
|
| 24 |
class TokenizerWrapper:
|
| 25 |
-
"""
|
| 26 |
|
| 27 |
-
def __init__(self, model_name="gpt2"):
|
| 28 |
self.model_name = model_name
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
# Add pad token if it doesn't exist (important for GPT-2)
|
| 32 |
-
if self.tokenizer.pad_token is None:
|
| 33 |
-
self.tokenizer.pad_token = self.tokenizer.eos_token
|
| 34 |
-
logger.info(f"Initialized tokenizer from {model_name}")
|
| 35 |
-
except Exception as e:
|
| 36 |
-
logger.error(f"Error loading tokenizer: {e}")
|
| 37 |
-
self.tokenizer = None
|
| 38 |
-
|
| 39 |
-
def __call__(self, text, **kwargs):
|
| 40 |
-
"""Make the wrapper callable like a standard HF tokenizer"""
|
| 41 |
-
if self.tokenizer is None:
|
| 42 |
-
raise ValueError("Tokenizer not initialized")
|
| 43 |
-
return self.tokenizer(text, **kwargs)
|
| 44 |
-
|
| 45 |
-
def encode(self, text, **kwargs):
|
| 46 |
-
"""Encode text to token IDs"""
|
| 47 |
-
if self.tokenizer is None:
|
| 48 |
-
raise ValueError("Tokenizer not initialized")
|
| 49 |
-
return self.tokenizer.encode(text, **kwargs)
|
| 50 |
-
|
| 51 |
-
def decode(self, token_ids, **kwargs):
|
| 52 |
-
"""Decode token IDs to text"""
|
| 53 |
-
if self.tokenizer is None:
|
| 54 |
-
raise ValueError("Tokenizer not initialized")
|
| 55 |
-
return self.tokenizer.decode(token_ids, **kwargs)
|
| 56 |
-
|
| 57 |
-
def tokenize(self, text, **kwargs):
|
| 58 |
-
"""Tokenize text to tokens"""
|
| 59 |
-
if self.tokenizer is None:
|
| 60 |
-
raise ValueError("Tokenizer not initialized")
|
| 61 |
-
return self.tokenizer.tokenize(text, **kwargs)
|
| 62 |
-
|
| 63 |
-
def get_tokenizer(model_name="gpt2"):
|
| 64 |
-
"""Get a tokenizer instance with proper fallback handling"""
|
| 65 |
-
try:
|
| 66 |
-
return TokenizerWrapper(model_name)
|
| 67 |
-
except Exception as e:
|
| 68 |
-
logger.error(f"Error creating TokenizerWrapper: {e}")
|
| 69 |
-
try:
|
| 70 |
-
return AutoTokenizer.from_pretrained(model_name)
|
| 71 |
-
except Exception as e2:
|
| 72 |
-
logger.error(f"Error loading AutoTokenizer: {e2}")
|
| 73 |
-
return None
|
| 74 |
-
|
| 75 |
-
if __name__ == "__main__":
|
| 76 |
-
# Example usage showcasing advanced features
|
| 77 |
-
wrapper = TokenizerWrapper(sp_model_path="c:\\Users\\User\\OneDrive\\Documents\\tlm\\Wildnerve-tlm_HF\\sentencepiece.model")
|
| 78 |
-
sample_text = "This is an ADVANCED Test sentence! With multiple spaces and Punctuation."
|
| 79 |
-
tokens_sp = wrapper.tokenize(sample_text, use_sentencepiece=True)
|
| 80 |
-
tokens_tr = wrapper.tokenize(sample_text, use_sentencepiece=False)
|
| 81 |
-
encoded = wrapper.encode(sample_text)
|
| 82 |
-
decoded = wrapper.decode(encoded) if encoded else ""
|
| 83 |
-
print("SentencePiece Tokens:", tokens_sp)
|
| 84 |
-
print("Transformer Tokens:", tokens_tr)
|
| 85 |
-
print("Encoded:", encoded)
|
| 86 |
-
print("Decoded:", decoded)
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
+
import json
|
| 3 |
import logging
|
| 4 |
+
from typing import List, Dict, Optional, Union, Any
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
logger = logging.getLogger(__name__)
|
| 7 |
|
| 8 |
class TokenizerWrapper:
|
| 9 |
+
"""Lightweight wrapper around GPT-2 tokenizer with memory optimization"""
|
| 10 |
|
| 11 |
+
def __init__(self, model_name: str = "gpt2", load_vocab: bool = True):
|
| 12 |
self.model_name = model_name
|
| 13 |
+
self.pad_token = "<pad>"
|
| 14 |
+
self.eos_token = "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|