WildnerveAI commited on
Commit
6ffc9f3
·
verified ·
1 Parent(s): c9d7656

Upload 8 files

Browse files
Files changed (7) hide show
  1. adapter_layer.py +199 -278
  2. handler.py +100 -51
  3. model_PrTr.py +11 -3
  4. model_stub.py +58 -0
  5. service_registry.py +8 -133
  6. smartHybridAttention.py +9 -3
  7. tokenizer.py +6 -78
adapter_layer.py CHANGED
@@ -1,16 +1,15 @@
1
  import os
2
  import sys
3
  import json
4
- import nltk
5
- import torch
6
- import inspect
7
  import logging
8
  import pydantic # required
9
- import codecarbon
10
  import importlib.util # required
11
  from typing import Dict, Any, Optional, List, Tuple
12
  from service_registry import registry, MODEL, PRETRAINED_MODEL, TOKENIZER
13
 
 
 
 
14
  # Log versions and fail fast if missing
15
  logger = logging.getLogger(__name__)
16
  logger.info(f"Using pydantic v{pydantic.__version__}")
@@ -18,6 +17,18 @@ logger.info(f"Using codecarbon v{codecarbon.__version__}")
18
 
19
  print(f"Successfully using installed dependencies - pydantic: {pydantic.__version__}, codecarbon: {codecarbon.__version__}")
20
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  # Import dependency helpers
22
  def is_module_available(module_name):
23
  try:
@@ -49,299 +60,209 @@ except ImportError as e:
49
  return "model_Custm", 0.8
50
  return "model_PrTr", 0.6
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  class WildnerveModelAdapter:
53
- """Adapter layer that interfaces between HF inference endpoints and the model."""
54
- RETRY_COUNT = 5
55
 
56
  def __init__(self, model_path: str):
57
  self.model_path = model_path
58
  self.tokenizer = None
59
  self.model = None
60
- self.initialized = False
 
61
 
62
- # ensure model directory and repo root are first on import path
63
- root = os.getcwd()
64
- paths = []
65
- if os.path.isdir(model_path):
66
- paths.append(model_path)
67
- else:
68
- logger.warning(f"Model path not found or not a directory: {model_path}")
69
- paths.append(root)
70
- for p in paths:
71
- if p not in sys.path:
72
- sys.path.insert(0, p)
73
-
74
- logger.info(f"Model adapter initialized with path: {model_path}")
75
-
76
- # Initialize components with retry logic
77
- for attempt in range(1, self.RETRY_COUNT + 1):
78
- try:
79
- self._initialize_tokenizer()
80
- logger.info("Tokenizer initialized")
81
- break
82
- except Exception as e:
83
- logger.warning(f"Tokenizer init attempt {attempt}/{self.RETRY_COUNT} failed: {e}")
84
- logger.debug("Tokenizer init stack trace:", exc_info=True)
85
- if attempt == self.RETRY_COUNT:
86
- raise
87
-
88
- for attempt in range(1, self.RETRY_COUNT + 1):
89
- try:
90
- self._initialize_model()
91
- logger.info("Model initialized")
92
- break
93
- except Exception as e:
94
- logger.warning(f"Model init attempt {attempt}/{self.RETRY_COUNT} failed: {e}")
95
- logger.debug("Model init stack trace:", exc_info=True)
96
- if attempt == self.RETRY_COUNT:
97
- raise
98
- def _initialize_tokenizer(self):
99
- """Initialize tokenizer via our local wrapper first, then fallback."""
100
- try:
101
- # primary: use our tokenizer.py
102
- from tokenizer import TokenizerWrapper
103
- self.tokenizer = TokenizerWrapper()
104
- logger.info("Using TokenizerWrapper from tokenizer.py")
105
- return
106
- except Exception as e:
107
- logger.warning(f"TokenizerWrapper init failed: {e}")
108
-
109
- # Try to import from service_registry if available
110
- try:
111
- if is_module_available('service_registry'):
112
- from service_registry import registry, TOKENIZER
113
-
114
- if registry.has(TOKENIZER):
115
- self.tokenizer = registry.get(TOKENIZER)
116
- logger.info("Retrieved tokenizer from registry")
117
- return
118
-
119
- # Try loading from the original tokenizer.py
120
- if is_module_available('tokenizer'):
121
- from tokenizer import TokenizerWrapper, get_tokenizer
122
- self.tokenizer = get_tokenizer()
123
- logger.info("Created TokenizerWrapper instance")
124
- return
125
-
126
- except Exception as e:
127
- logger.warning(f"Error initializing original tokenizer: {e}")
128
 
129
- # Final fallback: use your get_tokenizer wrapper
130
- try:
131
- from tokenizer import get_tokenizer
132
- self.tokenizer = get_tokenizer()
133
- logger.info("Using get_tokenizer() fallback")
134
- return
135
- except Exception as e:
136
- logger.error(f"No tokenizer could be initialized: {e}")
137
- raise ImportError("Tokenizer initialization failed")
138
-
139
- def _initialize_model(self):
140
- """Initialize the model from service registry or create it directly."""
141
- max_attempts = 5
142
- attempt = 0
143
 
144
- while attempt < max_attempts:
145
- attempt += 1
146
- try:
147
- # Ensure there's a model in the registry
148
- from service_registry import registry, MODEL, ensure_models_registered
149
- ensure_models_registered() # This will load model_Custm if needed
150
-
151
- if registry.has(MODEL):
152
- self.model = registry.get(MODEL)
153
- logger.info(f"Successfully loaded model from service registry")
154
-
155
- # Ensure the model has the specializations loaded
156
- if hasattr(self.model, "load_specializations"):
157
- try:
158
- self.model.load_specializations()
159
- logger.info(f"Loaded specializations for model")
160
- except Exception as e:
161
- logger.warning(f"Error loading specializations: {e}")
162
-
163
- return
164
-
165
- # Fall back to creating the model directly
166
- logger.warning("No model in registry, creating model_Custm directly")
167
- from model_Custm import Wildnerve_tlm01
168
- self.model = Wildnerve_tlm01(
169
- vocab_size=30522,
170
- specialization="general",
171
- dataset_path=None,
172
- model_name="bert-base-uncased",
173
- embedding_dim=768,
174
- num_heads=12,
175
- hidden_dim=768,
176
- num_layers=2,
177
- output_size=768,
178
- dropout=0.1,
179
- max_seq_length=128,
180
- pooling_mode="mean",
181
- tokenizer=self.tokenizer
182
- )
183
- logger.info("Successfully created model_Custm directly")
184
- return
185
-
186
- except Exception as e:
187
- logger.warning(f"Error on model init attempt {attempt}/{max_attempts}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
- # Wait briefly before next attempt
190
- import time
191
- time.sleep(1)
192
 
193
- # All attempts failed
194
- raise ImportError("No model registered in service registry")
195
-
196
- # When storing models/objects, make sure we don't create circular references
197
- if registry.has(MODEL):
198
- self.model = registry.get(MODEL)
199
- # Don't add back-references to registry or other objects that might
200
- # include this adapter, to avoid circular references
201
-
202
- def _build_init_kwargs(self):
203
- return {
204
- "vocab_size": 30522,
205
- "specialization": "general",
206
- "dataset_path": None,
207
- "model_name": "bert-base-uncased",
208
- "embedding_dim": 768,
209
- "num_heads": 12,
210
- "hidden_dim": 768,
211
- "num_layers": 6,
212
- "output_size": 768,
213
- "dropout": 0.1,
214
- "max_seq_length": 512,
215
- "pooling_mode": "mean",
216
- "tokenizer": self.tokenizer
217
- }
218
-
219
- def _split_prompt(self, prompt: str) -> Tuple[str, str]:
220
- """Return (technical_sentences, general_sentences)."""
221
- # download punkt if needed
222
- try:
223
- nltk.data.find("tokenizers/punkt")
224
- except LookupError:
225
- nltk.download("punkt")
226
-
227
- sents = nltk.sent_tokenize(prompt)
228
- analyzer = PromptAnalyzer()
229
- tech_keys = set(analyzer.predefined_topics.get("programming", []))
230
- tech_list, gen_list = [], []
231
- for s in sents:
232
- # simple keyword check
233
- if any(k in s.lower() for k in tech_keys):
234
- tech_list.append(s)
235
- else:
236
- gen_list.append(s)
237
- return " ".join(tech_list).strip(), " ".join(gen_list).strip()
238
 
239
- def generate(self, text_input, max_length=None, **kwargs):
240
- """Generate text using the model - centralized generation point"""
241
  try:
242
- # Use PromptAnalyzer to determine which model to use
243
- try:
244
- from model_List import PromptAnalyzer
245
- analyzer = PromptAnalyzer()
246
- model_type, confidence = analyzer.analyze_prompt(text_input)
247
- logger.info(f"PromptAnalyzer selected {model_type} with confidence {confidence:.2f}")
248
- except Exception as e:
249
- logger.error(f"Error using PromptAnalyzer: {e}")
250
- model_type = "model_Custm" # Default to custom model on error
251
-
252
- # Enhanced generation parameters with strong repetition prevention
253
- generation_kwargs = {
254
- 'max_length': max_length or 150,
255
- 'temperature': kwargs.get('temperature', 0.7),
256
- 'top_p': kwargs.get('top_p', 0.95),
257
- 'top_k': kwargs.get('top_k', 50),
258
- 'repetition_penalty': kwargs.get('repetition_penalty', 1.3), # Increased from 1.2
259
- 'no_repeat_ngram_size': kwargs.get('no_repeat_ngram_size', 3), # Increased from 2
260
- 'do_sample': kwargs.get('do_sample', True),
261
- 'num_return_sequences': kwargs.get('num_return_sequences', 1),
262
- 'early_stopping': kwargs.get('early_stopping', True),
263
- 'bad_words_ids': kwargs.get('bad_words_ids', None), # Block repetitive phrases
264
- 'min_length': kwargs.get('min_length', 10), # Ensure reasonable response length
265
- }
266
 
267
- # Create penalty_alpha for GPT-2 encoder-decoder attention
268
- if 'penalty_alpha' not in kwargs:
269
- generation_kwargs['penalty_alpha'] = 0.6 # Helps prevent looping in GPT-2
270
-
271
- # Override with any explicitly provided kwargs
272
- generation_kwargs.update({k:v for k,v in kwargs.items() if k not in ('prompt', 'context')})
273
 
274
- if model_type == "model_Custm":
275
- # Use the Custom Wildnerve model for technical topics
276
- custom_model = registry.get(MODEL)
277
- if custom_model:
278
- try:
279
- logger.info("Using custom Wildnerve-tlm01_Hybrid_Model for technical prompt")
280
- # Check signature of the generate method
281
- import inspect
282
- if hasattr(custom_model, "generate"):
283
- sig = inspect.signature(custom_model.generate)
284
- if "prompt" in sig.parameters:
285
- return custom_model.generate(prompt=text_input, **generation_kwargs)
286
- else:
287
- # If no prompt parameter, try tokenizing first
288
- inputs = self.tokenizer(text_input, return_tensors="pt", truncation=True, padding=True)
289
- return custom_model.generate(input_ids=inputs.input_ids, **generation_kwargs)
290
- else:
291
- logger.warning("Custom model doesn't have generate method, falling back to pretrained")
292
- except Exception as e:
293
- logger.error(f"Error using custom model: {e}")
294
- else:
295
- # Use the Pretrained model (GPT-2) for general topics
296
- pre = registry.get(PRETRAINED_MODEL)
297
- if pre:
298
- try:
299
- logger.info("Using GPT-2 pretrained model for general prompt")
300
- # Try to use the pretrained model's generate method
301
- if hasattr(pre, "generate"):
302
- # Check the signature of the generate method to determine correct parameters
303
- import inspect
304
- sig = inspect.signature(pre.generate)
305
- if "prompt" in sig.parameters:
306
- return pre.generate(prompt=text_input, **generation_kwargs)
307
- else:
308
- # If no prompt parameter, try tokenizing first
309
- inputs = self.tokenizer(text_input, return_tensors="pt", truncation=True, padding=True)
310
- return pre.generate(input_ids=inputs.input_ids, **generation_kwargs) # Explicitly pass as input_ids
311
- else:
312
- logger.warning("Pretrained model doesn't have generate method")
313
- except Exception as e:
314
- logger.error(f"Error using pretrained model: {e}")
315
 
316
- # Fall back to using the custom model if needed
317
- if self.model:
318
  try:
319
- logger.info("Using custom model for generation")
 
 
 
 
 
 
320
 
321
- # Check if the model is expecting a prompt parameter or input_ids
322
- import inspect
323
- if hasattr(self.model, "generate"):
324
- sig = inspect.signature(self.model.generate)
325
- if "prompt" in sig.parameters:
326
- # Model accepts prompt parameter directly
327
- return self.model.generate(prompt=text_input, **generation_kwargs) # Explicitly pass as prompt
328
- else:
329
- # Model expects tokenized input_ids instead
330
- logger.info("Model expects tokenized input - converting prompt to input_ids")
331
- inputs = self.tokenizer(text_input, return_tensors="pt", truncation=True, padding=True)
332
- return self.model.generate(input_ids=inputs.input_ids, **generation_kwargs) # Explicitly pass as input_ids
333
- else:
334
- logger.error("Model has no generate method")
335
- # Simple fallback for models without generate
336
- return f"I'm processing your request about '{text_input[:30]}...'"
337
  except Exception as e:
338
- logger.error(f"Error using custom model: {e}")
 
 
 
 
 
 
 
 
 
 
 
339
 
340
- # Add last-chance fallback with generic response
341
- return f"I apologize, but I'm experiencing some technical difficulties processing your request about '{text_input[:30]}...'. (Error: {str(e)})"
 
 
 
 
 
 
 
 
 
 
342
 
343
- # Final fallback
344
- return f"I apologize, but I'm unable to process your request about '{text_input[:30]}...' at this time."
345
  except Exception as e:
346
- logger.error(f"Error in generate method: {e}")
347
- return f"An error occurred while generating text: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import sys
3
  import json
 
 
 
4
  import logging
5
  import pydantic # required
 
6
  import importlib.util # required
7
  from typing import Dict, Any, Optional, List, Tuple
8
  from service_registry import registry, MODEL, PRETRAINED_MODEL, TOKENIZER
9
 
10
+ # Force low memory usage mode
11
+ os.environ["LOW_MEMORY_MODE"] = "1"
12
+
13
  # Log versions and fail fast if missing
14
  logger = logging.getLogger(__name__)
15
  logger.info(f"Using pydantic v{pydantic.__version__}")
 
17
 
18
  print(f"Successfully using installed dependencies - pydantic: {pydantic.__version__}, codecarbon: {codecarbon.__version__}")
19
 
20
+ # MEMORY OPTIMIZATION: Show current memory usage
21
+ def log_memory_usage():
22
+ try:
23
+ import psutil
24
+ process = psutil.Process(os.getpid())
25
+ memory_info = process.memory_info()
26
+ memory_mb = memory_info.rss / 1024 / 1024
27
+ logger.info(f"Current memory usage: {memory_mb:.2f} MB")
28
+ return memory_mb
29
+ except:
30
+ return 0
31
+
32
  # Import dependency helpers
33
  def is_module_available(module_name):
34
  try:
 
60
  return "model_Custm", 0.8
61
  return "model_PrTr", 0.6
62
 
63
+ # MEMORY OPTIMIZATION: Create basic PromptAnalyzer without loading models
64
+ class BasicPromptAnalyzer:
65
+ def __init__(self, **kwargs):
66
+ self.logger = logging.getLogger(__name__)
67
+ self.predefined_topics = {
68
+ "programming": ["python", "java", "code"],
69
+ "general": ["weather", "hello", "chat"]
70
+ }
71
+
72
+ def analyze_prompt(self, prompt: str):
73
+ # Simple keyword-based routing
74
+ prompt_lower = prompt.lower()
75
+ for tech_word in self.predefined_topics.get("programming", []):
76
+ if tech_word in prompt_lower:
77
+ return "model_Custm", 0.8
78
+ return "model_PrTr", 0.6
79
+
80
  class WildnerveModelAdapter:
81
+ """Ultra-lightweight adapter layer for HF inference endpoints."""
 
82
 
83
  def __init__(self, model_path: str):
84
  self.model_path = model_path
85
  self.tokenizer = None
86
  self.model = None
87
+ self.model_loaded = False
88
+ logger.info(f"Creating adapter with path: {model_path}")
89
 
90
+ # Safe verification of model file existence
91
+ self._verify_model_files()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
+ def _verify_model_files(self):
94
+ """Verify model files exist without loading them"""
95
+ script_dir = os.path.dirname(os.path.abspath(__file__))
96
+ model_files = ["model_Custm.py", "model_PrTr.py"]
 
 
 
 
 
 
 
 
 
 
97
 
98
+ self.available_models = {}
99
+ for filename in model_files:
100
+ filepath = os.path.join(script_dir, filename)
101
+ if os.path.exists(filepath):
102
+ module_name = filename.replace('.py', '')
103
+ self.available_models[module_name] = filepath
104
+ logger.info(f"Found model file: {filename}")
105
+
106
+ if not self.available_models:
107
+ logger.warning("No model files found - will use stub implementation")
108
+ # Create stub file if needed
109
+ stub_path = os.path.join(script_dir, "model_stub.py")
110
+ if not os.path.exists(stub_path):
111
+ try:
112
+ with open(stub_path, "w") as f:
113
+ f.write("""
114
+ # Minimal stub model
115
+ import torch.nn as nn
116
+ class Wildnerve_tlm01(nn.Module):
117
+ def __init__(self, **kwargs):
118
+ super().__init__()
119
+ self.is_stub = True
120
+ for key, value in kwargs.items():
121
+ setattr(self, key, value)
122
+ def generate(self, prompt=None, **kwargs):
123
+ return f"Stub model response for: {prompt[:30]}..."
124
+ """)
125
+ logger.info("Created stub model file")
126
+ except Exception as e:
127
+ logger.error(f"Failed to create stub model: {e}")
128
+
129
+ def generate(self, text_input, max_length=None, **kwargs):
130
+ """Generate text - with lazy model loading"""
131
+ try:
132
+ # Try to load model on first use
133
+ if not self.model_loaded:
134
+ self._lazy_load_model()
135
+
136
+ # If we have a model now, use it
137
+ if self.model:
138
+ try:
139
+ logger.info(f"Generating with model: {type(self.model).__name__}")
140
+ return self.model.generate(
141
+ prompt=text_input,
142
+ max_length=max_length,
143
+ **kwargs
144
+ )
145
+ except Exception as e:
146
+ logger.error(f"Model generation error: {e}")
147
+ # Try tokenizer-only response as fallback
148
+
149
+ # If we have a tokenizer but no model, use simple responses
150
+ if self.tokenizer and not self.model:
151
+ # Try to get a slightly better response with tokenizer
152
+ tokenized = self.tokenizer(text_input, return_tensors="pt", truncation=True)
153
+ return f"Processing: {text_input[:50]}..."
154
 
155
+ # If no model or tokenizer, return simple response
156
+ return f"I've received your input about '{text_input[:30]}...'"
 
157
 
158
+ except Exception as e:
159
+ logger.error(f"Error in generate method: {e}")
160
+ return f"An error occurred processing your request: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
+ def _lazy_load_model(self):
163
+ """Try to load a model on demand, with multiple fallback options"""
164
  try:
165
+ logger.info("Attempting to load model on first request")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
+ # First initialize tokenizer if not already done
168
+ self._initialize_minimal_tokenizer()
 
 
 
 
169
 
170
+ # Try to load model_Custm first
171
+ if "model_Custm" in self.available_models:
172
+ try:
173
+ logger.info("Trying to load model_Custm")
174
+ model_custm_spec = importlib.util.spec_from_file_location(
175
+ "model_Custm",
176
+ self.available_models["model_Custm"]
177
+ )
178
+ model_custm = importlib.util.module_from_spec(model_custm_spec)
179
+ model_custm_spec.loader.exec_module(model_custm)
180
+
181
+ if hasattr(model_custm, "Wildnerve_tlm01"):
182
+ logger.info("Creating Wildnerve_tlm01 from model_Custm")
183
+ model_class = getattr(model_custm, "Wildnerve_tlm01")
184
+ self.model = model_class(
185
+ tokenizer=self.tokenizer,
186
+ vocab_size=50257, # GPT-2 vocab size
187
+ specialization="general",
188
+ embedding_dim=768,
189
+ num_heads=12,
190
+ hidden_dim=768,
191
+ num_layers=2, # Reduced for memory efficiency
192
+ output_size=50257, # Match GPT-2 vocab
193
+ dropout=0.1,
194
+ max_seq_length=128 # Reduced for memory
195
+ )
196
+ logger.info("Successfully created custom model")
197
+ self.model_loaded = True
198
+ return
199
+ except Exception as e:
200
+ logger.error(f"Failed to load model_Custm: {e}")
 
 
 
 
 
 
 
 
 
 
201
 
202
+ # Try model_PrTr next
203
+ if "model_PrTr" in self.available_models:
204
  try:
205
+ logger.info("Trying to load model_PrTr")
206
+ model_prtr_spec = importlib.util.spec_from_file_location(
207
+ "model_PrTr",
208
+ self.available_models["model_PrTr"]
209
+ )
210
+ model_prtr = importlib.util.module_from_spec(model_prtr_spec)
211
+ model_prtr_spec.loader.exec_module(model_prtr)
212
 
213
+ if hasattr(model_prtr, "Wildnerve_tlm01"):
214
+ logger.info("Creating Wildnerve_tlm01 from model_PrTr")
215
+ model_class = getattr(model_prtr, "Wildnerve_tlm01")
216
+ self.model = model_class(
217
+ tokenizer=self.tokenizer,
218
+ model_name="gpt2"
219
+ )
220
+ logger.info("Successfully created pretrained model")
221
+ self.model_loaded = True
222
+ return
 
 
 
 
 
 
223
  except Exception as e:
224
+ logger.error(f"Failed to load model_PrTr: {e}")
225
+
226
+ # Try stub model as last resort
227
+ try:
228
+ logger.info("Trying to load model_stub")
229
+ script_dir = os.path.dirname(os.path.abspath(__file__))
230
+ stub_path = os.path.join(script_dir, "model_stub.py")
231
+
232
+ if os.path.exists(stub_path):
233
+ stub_spec = importlib.util.spec_from_file_location("model_stub", stub_path)
234
+ model_stub = importlib.util.module_from_spec(stub_spec)
235
+ stub_spec.loader.exec_module(model_stub)
236
 
237
+ if hasattr(model_stub, "Wildnerve_tlm01"):
238
+ logger.info("Creating stub model")
239
+ model_class = getattr(model_stub, "Wildnerve_tlm01")
240
+ self.model = model_class(
241
+ tokenizer=self.tokenizer,
242
+ specialization="stub"
243
+ )
244
+ logger.warning("Using STUB model - limited functionality")
245
+ self.model_loaded = True
246
+ return
247
+ except Exception as e:
248
+ logger.error(f"Failed to load stub model: {e}")
249
 
250
+ logger.error("All model loading attempts failed")
251
+
252
  except Exception as e:
253
+ logger.error(f"Error in _lazy_load_model: {e}")
254
+ finally:
255
+ # Always mark as loaded to avoid repeated attempts
256
+ self.model_loaded = True
257
+
258
+ def _initialize_minimal_tokenizer(self):
259
+ """Initialize just the tokenizer, not the model"""
260
+ try:
261
+ from transformers import AutoTokenizer
262
+ self.tokenizer = AutoTokenizer.from_pretrained("gpt2", use_fast=True)
263
+ logger.info("Initialized minimal tokenizer")
264
+ except Exception as e:
265
+ logger.error(f"Failed to initialize tokenizer: {e}")
266
+
267
+ # Add import for inspect at the top
268
+ import inspect
handler.py CHANGED
@@ -16,7 +16,43 @@ logging.basicConfig(
16
  logger = logging.getLogger(__name__)
17
 
18
  # --- DEBUG: confirm correct handler.py is loaded ---
19
- print("DEBUG: using Wildnerve-tlm_HF/handler.py — v5 with robust config handling")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  # Safe config import that won't fail during initialization
22
  try:
@@ -33,12 +69,28 @@ except Exception as e:
33
  }
34
  }
35
 
36
- # Add this near the top (after imports)
37
- try:
38
- from service_registry import ensure_models_registered
39
- ensure_models_registered()
40
- except Exception as e:
41
- logger.error(f"Error ensuring models are registered: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  # Safely check for required packages without crashing
44
  try:
@@ -74,6 +126,7 @@ try:
74
  script_dir = os.path.dirname(os.path.abspath(__file__))
75
  sys.path.insert(0, script_dir)
76
 
 
77
  from adapter_layer import WildnerveModelAdapter
78
  logger.info("Successfully imported adapter_layer module")
79
 
@@ -106,61 +159,57 @@ except Exception as e:
106
 
107
  class EndpointHandler:
108
  def __init__(self, model_dir: str = None):
109
- # HF toolkit passes model directory here; log or ignore
110
- if model_dir:
111
- logger.info(f"Handler init with path: {model_dir}")
112
- try:
113
- # Try to import adapter layer
114
- try:
115
- # For more reliable importing
116
- script_dir = os.path.dirname(os.path.abspath(__file__))
117
- sys.path.insert(0, script_dir)
118
-
119
- from adapter_layer import WildnerveModelAdapter
120
- logger.info("Successfully imported adapter_layer module")
121
- except ImportError as e:
122
- logger.error(f"Could not import adapter_layer: {e}")
123
- # Create a minimal placeholder adapter class
124
- class WildnerveModelAdapter:
125
- def __init__(self, model_path: str =""):
126
- self.model_path = model_path
127
- logger.info(f"Using fallback WildnerveModelAdapter with path: {model_path}")
128
-
129
- def generate(self, text_input, **kwargs):
130
- return f"Model adapter unavailable. Received input: {text_input[:30]}..."
131
-
132
- # supply model_dir as the adapter's model_path
133
- self.adapter = WildnerveModelAdapter(model_dir or "")
134
- except Exception as e:
135
- logger.error(f"Adapter init failed: {e}", exc_info=True)
136
- self.init_error = str(e)
137
- self.adapter = None
138
-
139
  def __call__(self, data, parameters=None):
140
- if self.adapter is None:
141
- return [{"generated_text": f"Initialization error: {self.init_error}"}]
 
142
 
143
  # Extract prompt text
144
  text = data.get("inputs") if isinstance(data, dict) else str(data)
145
 
146
  try:
147
- # Generate response
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  out = self.adapter.generate(text, **(parameters or {}))
149
 
150
- # Ensure output is JSON serializable
151
- if torch.is_tensor(out):
152
- # Convert tensor to string
153
- logger.warning("Model returned a tensor instead of text, attempting to convert")
154
- if out.dim() > 0 and hasattr(self.adapter, "tokenizer"):
155
- out = self.adapter.tokenizer.decode(out.cpu().squeeze(), skip_special_tokens=True)
156
- else:
157
- out = str(out)
158
-
159
- # Final check to ensure string output
160
  if not isinstance(out, str):
161
  out = str(out)
162
 
163
  return [{"generated_text": out}]
164
  except Exception as e:
165
  logger.error(f"Generation error: {e}", exc_info=True)
166
- return [{"generated_text": f"Error: {e}"}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  logger = logging.getLogger(__name__)
17
 
18
  # --- DEBUG: confirm correct handler.py is loaded ---
19
+ print("DEBUG: using Wildnerve-tlm_HF/handler.py — v7 with file verification")
20
+
21
+ # Set aggressive memory optimization
22
+ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:32"
23
+ os.environ["TRANSFORMERS_OFFLINE"] = "1" # Prevent downloading models
24
+ os.environ["LOW_MEMORY_MODE"] = "1" # Custom flag for our code to detect
25
+
26
+ # VERIFY CRITICAL FILES: Check required model files exist before proceeding
27
+ def verify_required_files():
28
+ """Verify that critical model files exist without importing them"""
29
+ script_dir = os.path.dirname(os.path.abspath(__file__))
30
+ required_files = [
31
+ "model_Custm.py",
32
+ "model_PrTr.py",
33
+ "adapter_layer.py",
34
+ "tokenizer.py"
35
+ ]
36
+
37
+ missing_files = []
38
+ for filename in required_files:
39
+ filepath = os.path.join(script_dir, filename)
40
+ if not os.path.exists(filepath):
41
+ missing_files.append(filename)
42
+ logger.warning(f"Required file not found: {filename}")
43
+ else:
44
+ file_size = os.path.getsize(filepath) / 1024 # KB
45
+ logger.info(f"Found required file: {filename} ({file_size:.1f} KB)")
46
+
47
+ if missing_files:
48
+ logger.error(f"Missing required files: {', '.join(missing_files)}")
49
+ return False
50
+ return True
51
+
52
+ # Verify required files exist but don't load them yet
53
+ critical_files_verified = verify_required_files()
54
+ if not critical_files_verified:
55
+ logger.warning("Some critical model files are missing - expect errors during request handling")
56
 
57
  # Safe config import that won't fail during initialization
58
  try:
 
69
  }
70
  }
71
 
72
+ # MEMORY OPTIMIZATION: Avoid loading pretrained models during init
73
+ os.environ["TRANSFORMERS_OFFLINE"] = "1" # Prevent downloading models
74
+ os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128" # Limit CUDA allocations
75
+
76
+ # Add safeguard for memory usage
77
+ def check_memory_usage():
78
+ """Check memory usage and log warning if too high"""
79
+ try:
80
+ import psutil
81
+ process = psutil.Process(os.getpid())
82
+ memory_info = process.memory_info()
83
+ memory_mb = memory_info.rss / 1024 / 1024
84
+ logger.info(f"Current memory usage: {memory_mb:.2f} MB")
85
+ if memory_mb > 1800: # 1.8 GB (90% of limit)
86
+ logger.warning(f"Memory usage critical: {memory_mb:.2f} MB. Consider reducing model size.")
87
+ return memory_mb
88
+ except Exception as e:
89
+ logger.warning(f"Error checking memory usage: {e}")
90
+ return 0
91
+
92
+ # Check memory at startup
93
+ check_memory_usage()
94
 
95
  # Safely check for required packages without crashing
96
  try:
 
126
  script_dir = os.path.dirname(os.path.abspath(__file__))
127
  sys.path.insert(0, script_dir)
128
 
129
+ # MEMORY OPTIMIZATION: Import but don't initialize yet
130
  from adapter_layer import WildnerveModelAdapter
131
  logger.info("Successfully imported adapter_layer module")
132
 
 
159
 
160
  class EndpointHandler:
161
  def __init__(self, model_dir: str = None):
162
+ # Do absolute minimal initialization here
163
+ self.model_dir = model_dir
164
+ self.adapter = None
165
+ self.initialized = False
166
+ self.critical_files_verified = critical_files_verified
167
+ logger.info(f"Handler init with minimal footprint: {model_dir}")
168
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  def __call__(self, data, parameters=None):
170
+ # Lazy initialization on first request
171
+ if not self.initialized:
172
+ self._initialize_on_demand()
173
 
174
  # Extract prompt text
175
  text = data.get("inputs") if isinstance(data, dict) else str(data)
176
 
177
  try:
178
+ # Warning response if critical files are missing
179
+ if not self.critical_files_verified:
180
+ logger.warning("Attempting to process request with missing critical files")
181
+ return [{
182
+ "generated_text": "System initialization issue: Some model files appear to be missing. " +
183
+ f"Processing your request about '{text[:30]}...' with limited functionality."
184
+ }]
185
+
186
+ # Simple response for first call
187
+ if not self.adapter:
188
+ logger.info("Using simple text response (no adapter)")
189
+ return [{"generated_text": f"Processing your request about '{text[:30]}...'"}]
190
+
191
+ # Generate response with adapter if available
192
  out = self.adapter.generate(text, **(parameters or {}))
193
 
194
+ # Ensure output is valid string
 
 
 
 
 
 
 
 
 
195
  if not isinstance(out, str):
196
  out = str(out)
197
 
198
  return [{"generated_text": out}]
199
  except Exception as e:
200
  logger.error(f"Generation error: {e}", exc_info=True)
201
+ return [{"generated_text": f"Error processing your request: {str(e)}"}]
202
+
203
+ def _initialize_on_demand(self):
204
+ """Initialize adapter when first needed"""
205
+ try:
206
+ logger.info("Performing lazy initialization on first request")
207
+
208
+ # Import with minimal dependencies
209
+ from adapter_layer import WildnerveModelAdapter
210
+ self.adapter = WildnerveModelAdapter(self.model_dir or "")
211
+ self.initialized = True
212
+ logger.info("Adapter initialized successfully")
213
+ except Exception as e:
214
+ logger.error(f"Error initializing adapter: {e}", exc_info=True)
215
+ # Continue without adapter, we'll return simple responses
model_PrTr.py CHANGED
@@ -100,7 +100,8 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
100
  from transformers import GPT2LMHeadModel, GPT2Tokenizer
101
 
102
  # Initialize the model and tokenizer
103
- self.gpt2_model = GPT2LMHeadModel.from_pretrained(model_name)
 
104
 
105
  # Ensure proper tokenizer setup for GPT-2
106
  if tokenizer is not None:
@@ -115,10 +116,10 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
115
  self.tokenizer.pad_token = self.tokenizer.eos_token
116
  self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
117
 
118
- logger.info(f"Successfully loaded GPT-2 model: {model_name}")
119
 
120
  except Exception as e:
121
- logger.error(f"Error loading GPT-2 model: {e}", exc_info=True)
122
  raise
123
 
124
  # Register this model instance in the registry by specialization
@@ -129,6 +130,10 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
129
  registry.register(PRETRAINED_MODEL, self, overwrite=True)
130
  logger.info("Registered GPT-2 model as pretrained model")
131
 
 
 
 
 
132
  # Replace the old forward method with GPT-2 specific implementation
133
  def forward(self, src: torch.Tensor, tgt: Optional[torch.Tensor] = None,
134
  src_key_padding_mask: Optional[torch.Tensor] = None,
@@ -136,6 +141,7 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
136
  return_sequence: bool = False,
137
  **kwargs) -> torch.Tensor:
138
 
 
139
  # Use GPT-2 directly for generation
140
  outputs = self.gpt2_model(src, **kwargs)
141
  return outputs.logits
@@ -143,6 +149,7 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
143
  # Update generate to handle both direct prompt and tokenized input
144
  def generate(self, prompt=None, input_ids=None, max_length=None, **kwargs):
145
  """Generate text using the GPT-2 model"""
 
146
  try:
147
  # Try to use adapter_layer.generate if available (consolidate generation paths)
148
  adapter_layer = registry.get("adapter_layer")
@@ -204,6 +211,7 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
204
 
205
  def generate_streaming(self, prompt=None, input_ids=None, **kwargs):
206
  """Generate tokens one by one in streaming fashion"""
 
207
  try:
208
  # Handle either text or tokenized input
209
  if prompt is not None and input_ids is None:
 
100
  from transformers import GPT2LMHeadModel, GPT2Tokenizer
101
 
102
  # Initialize the model and tokenizer
103
+ self.model_name = model_name
104
+ self.gpt2_model = None # Will be loaded on first use
105
 
106
  # Ensure proper tokenizer setup for GPT-2
107
  if tokenizer is not None:
 
116
  self.tokenizer.pad_token = self.tokenizer.eos_token
117
  self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
118
 
119
+ logger.info(f"Successfully initialized GPT-2 model: {model_name}")
120
 
121
  except Exception as e:
122
+ logger.error(f"Error initializing GPT-2 model: {e}", exc_info=True)
123
  raise
124
 
125
  # Register this model instance in the registry by specialization
 
130
  registry.register(PRETRAINED_MODEL, self, overwrite=True)
131
  logger.info("Registered GPT-2 model as pretrained model")
132
 
133
+ def _ensure_model_loaded(self):
134
+ if self.gpt2_model is None:
135
+ self.gpt2_model = GPT2LMHeadModel.from_pretrained(self.model_name)
136
+
137
  # Replace the old forward method with GPT-2 specific implementation
138
  def forward(self, src: torch.Tensor, tgt: Optional[torch.Tensor] = None,
139
  src_key_padding_mask: Optional[torch.Tensor] = None,
 
141
  return_sequence: bool = False,
142
  **kwargs) -> torch.Tensor:
143
 
144
+ self._ensure_model_loaded() # Load model only when needed
145
  # Use GPT-2 directly for generation
146
  outputs = self.gpt2_model(src, **kwargs)
147
  return outputs.logits
 
149
  # Update generate to handle both direct prompt and tokenized input
150
  def generate(self, prompt=None, input_ids=None, max_length=None, **kwargs):
151
  """Generate text using the GPT-2 model"""
152
+ self._ensure_model_loaded() # Load model only when needed
153
  try:
154
  # Try to use adapter_layer.generate if available (consolidate generation paths)
155
  adapter_layer = registry.get("adapter_layer")
 
211
 
212
  def generate_streaming(self, prompt=None, input_ids=None, **kwargs):
213
  """Generate tokens one by one in streaming fashion"""
214
+ self._ensure_model_loaded() # Load model only when needed
215
  try:
216
  # Handle either text or tokenized input
217
  if prompt is not None and input_ids is None:
model_stub.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Minimal model stub that can be used if the real model files aren't found.
3
+ Uses near-zero memory while still providing the expected interface.
4
+ """
5
+ import os
6
+ import logging
7
+ import torch.nn as nn
8
+
9
+ logger = logging.getLogger(__name__)
10
+ logger.warning("Using minimal model stub - EMERGENCY FALLBACK MODE")
11
+
12
+ class Wildnerve_tlm01(nn.Module):
13
+ """Ultra-minimal model implementation that uses almost no memory"""
14
+
15
+ def __init__(self, **kwargs):
16
+ """Initialize with minimal footprint - store kwargs for compatibility"""
17
+ super().__init__()
18
+ self.is_stub = True
19
+
20
+ # Store passed parameters without using them
21
+ for key, value in kwargs.items():
22
+ setattr(self, key, value)
23
+
24
+ # Important: store tokenizer if provided
25
+ self.tokenizer = kwargs.get('tokenizer')
26
+ self.specialization = kwargs.get('specialization', 'general')
27
+
28
+ logger.warning(f"Created stub model with specialization: {self.specialization}")
29
+
30
+ def forward(self, input_ids=None, attention_mask=None, **kwargs):
31
+ """Minimal forward implementation that returns empty tensor"""
32
+ batch_size = 1
33
+ seq_length = 10
34
+ vocab_size = getattr(self, 'vocab_size', 50257)
35
+
36
+ if input_ids is not None:
37
+ batch_size = input_ids.shape[0]
38
+ seq_length = input_ids.shape[1]
39
+
40
+ import torch
41
+ # Return zeros - uses minimal memory
42
+ return torch.zeros((batch_size, seq_length, vocab_size))
43
+
44
+ def generate(self, prompt=None, **kwargs):
45
+ """Return a templated response"""
46
+ if prompt:
47
+ return f"[STUB MODEL] I've received your request about '{prompt[:50]}...'"
48
+ return "[STUB MODEL] I've received your request"
49
+
50
+ def generate_streaming(self, prompt=None, **kwargs):
51
+ """Simulate streaming response"""
52
+ import time
53
+ response = self.generate(prompt)
54
+ words = response.split()
55
+
56
+ for word in words:
57
+ yield word + " "
58
+ time.sleep(0.05) # Simulate streaming delay
service_registry.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- Simple service registry for dependency injection
3
  """
4
  import logging
5
  import traceback
@@ -7,17 +7,17 @@ from typing import Any, Dict, Optional
7
 
8
  logger = logging.getLogger(__name__)
9
 
10
- # Constants used as keys - let's clarify with better names
11
- MODEL = "model" # The custom Wildnerve-tlm01_Hybrid_Model
12
- PRETRAINED_MODEL = "pretrained_model" # GPT-2 model
13
  TOKENIZER = "tokenizer"
14
  MODEL_MANAGER = "model_manager"
15
  COMMUNICATOR = "communicator"
16
  PIPELINE = "pipeline"
17
- TRANSFORMER = "transformer" # Generic transformer key
18
 
19
  class ServiceRegistry:
20
- """A simple service registry for dependency management"""
21
 
22
  def __init__(self):
23
  self._services = {}
@@ -34,7 +34,7 @@ class ServiceRegistry:
34
  def get(self, key: str) -> Optional[Any]:
35
  """Get a service by its key"""
36
  if key not in self._services:
37
- logger.warning(f"No service registered with key: {key}")
38
  return None
39
 
40
  return self._services[key]
@@ -51,130 +51,5 @@ class ServiceRegistry:
51
  registry = ServiceRegistry()
52
 
53
  def ensure_models_registered():
54
- """Ensure at least one model is registered in the registry."""
55
- # First make sure we have a CUSTOM model (Wildnerve-tlm01_Hybrid_Model)
56
- if not registry.has(MODEL):
57
- logger.info("No custom model in registry, registering Wildnerve-tlm01_Hybrid_Model")
58
- try:
59
- import os, importlib.util
60
-
61
- # Find model_Custm.py in the same directory as this file
62
- this_dir = os.path.dirname(os.path.abspath(__file__))
63
- model_path = os.path.join(this_dir, "model_Custm.py")
64
-
65
- # Add more debug logging
66
- logger.info(f"Model path exists: {os.path.exists(model_path)}")
67
- logger.info(f"Model directory content: {os.listdir(os.path.dirname(model_path))}")
68
-
69
- if os.path.exists(model_path):
70
- # Dynamic import of model_Custm.py for Wildnerve-tlm01_Hybrid_Model
71
- spec = importlib.util.spec_from_file_location("model_custm", model_path)
72
- model_module = importlib.util.module_from_spec(spec)
73
- spec.loader.exec_module(model_module)
74
-
75
- # Get the model class for Wildnerve-tlm01_Hybrid_Model
76
- if hasattr(model_module, "Wildnerve_tlm01"):
77
- from tokenizer import TokenizerWrapper
78
-
79
- # Create tokenizer and model
80
- tok = TokenizerWrapper()
81
- model_class = getattr(model_module, "Wildnerve_tlm01")
82
- model = model_class(
83
- vocab_size=50257, # Updated to GPT-2 vocab size
84
- specialization="general",
85
- dataset_path=None,
86
- model_name="gpt2", # Changed from bert-base-uncased
87
- embedding_dim=768,
88
- num_heads=12,
89
- hidden_dim=768,
90
- num_layers=2,
91
- output_size=50257, # Match GPT-2 vocab size
92
- dropout=0.1,
93
- max_seq_length=128,
94
- pooling_mode="last", # GPT-2 typically uses last token
95
- tokenizer=tok
96
- )
97
-
98
- # Register both tokenizer and the Wildnerve-tlm01_Hybrid_Model
99
- registry.register(TOKENIZER, tok, overwrite=True)
100
- registry.register(MODEL, model, overwrite=True)
101
- logger.info("Successfully registered Wildnerve-tlm01_Hybrid_Model as MODEL")
102
- return True
103
-
104
- logger.error(f"model_Custm.py not found at {model_path}")
105
- return False
106
-
107
- except Exception as e:
108
- # More detailed error logging
109
- logger.error(f"Failed to register Wildnerve-tlm01_Hybrid_Model: {e}")
110
- logger.error(f"Exception details: {type(e).__name__}")
111
- logger.error(f"Exception traceback: {traceback.format_exc()}")
112
- return False
113
-
114
- # Then check if we have a GPT-2 PRETRAINED model
115
- if not registry.has(PRETRAINED_MODEL):
116
- logger.info("No GPT-2 model in registry, registering GPT-2")
117
- try:
118
- import os, importlib.util
119
- # Import required modules at this scope
120
- try:
121
- from transformers import GPT2LMHeadModel, GPT2Tokenizer
122
- except ImportError:
123
- logger.error("Failed to import required GPT-2 modules")
124
- return False
125
-
126
- # Find model_PrTr.py in the same directory as this file
127
- this_dir = os.path.dirname(os.path.abspath(__file__))
128
- model_path = os.path.join(this_dir, "model_PrTr.py")
129
-
130
- if os.path.exists(model_path):
131
- # Dynamic import of model_PrTr.py
132
- spec = importlib.util.spec_from_file_location("model_prtr", model_path)
133
- model_module = importlib.util.module_from_spec(spec)
134
- spec.loader.exec_module(model_module)
135
-
136
- # Get GPT-2 wrapper class
137
- model_class = None
138
- if hasattr(model_module, "PretrainedTransformer"):
139
- model_class = getattr(model_module, "PretrainedTransformer")
140
- elif hasattr(model_module, "Wildnerve_tlm01"):
141
- model_class = getattr(model_module, "Wildnerve_tlm01")
142
-
143
- if model_class:
144
- # Get tokenizer first
145
- tok = registry.get(TOKENIZER)
146
- if not tok:
147
- try:
148
- # Create GPT-2 tokenizer
149
- tok = GPT2Tokenizer.from_pretrained("gpt2")
150
- if tok.pad_token_id is None:
151
- tok.pad_token = tok.eos_token
152
- tok.pad_token_id = tok.eos_token_id
153
- registry.register(TOKENIZER, tok, overwrite=True)
154
- logger.info("Created GPT-2 tokenizer directly")
155
- except Exception as e:
156
- logger.error(f"Failed to create GPT-2 tokenizer: {e}")
157
- return False
158
-
159
- # Create GPT-2 model instance
160
- model = model_class(
161
- model_name="gpt2", # Explicitly use gpt2
162
- tokenizer=tok
163
- )
164
-
165
- # Register as GPT-2 pretrained model
166
- registry.register(PRETRAINED_MODEL, model, overwrite=True)
167
- logger.info("Successfully registered GPT-2 as PRETRAINED_MODEL")
168
- return True
169
-
170
- logger.error(f"model_PrTr.py not found at {model_path}")
171
-
172
- except Exception as e:
173
- logger.error(f"Failed to register GPT-2 model: {e}")
174
- logger.error(f"Exception details: {type(e).__name__}")
175
- logger.error(f"Exception traceback: {traceback.format_exc()}")
176
-
177
  return True
178
-
179
- # Execute this during module import to ensure models are registered
180
- ensure_models_registered()
 
1
  """
2
+ Minimal service registry for dependency injection
3
  """
4
  import logging
5
  import traceback
 
7
 
8
  logger = logging.getLogger(__name__)
9
 
10
+ # Constants used as keys
11
+ MODEL = "model"
12
+ PRETRAINED_MODEL = "pretrained_model"
13
  TOKENIZER = "tokenizer"
14
  MODEL_MANAGER = "model_manager"
15
  COMMUNICATOR = "communicator"
16
  PIPELINE = "pipeline"
17
+ TRANSFORMER = "transformer"
18
 
19
  class ServiceRegistry:
20
+ """A minimal service registry that avoids loading heavy models"""
21
 
22
  def __init__(self):
23
  self._services = {}
 
34
  def get(self, key: str) -> Optional[Any]:
35
  """Get a service by its key"""
36
  if key not in self._services:
37
+ # Don't log warning to avoid excessive logs
38
  return None
39
 
40
  return self._services[key]
 
51
  registry = ServiceRegistry()
52
 
53
  def ensure_models_registered():
54
+ """Placeholder function - don't actually register models at startup"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  return True
 
 
 
smartHybridAttention.py CHANGED
@@ -177,9 +177,8 @@ class SmartHybridAttention(nn.Module):
177
  # Ensure int type for memory tokens
178
  self.memory_tokens = int(memory_tokens) if isinstance(memory_tokens, (int, float)) else 32
179
 
180
- # Initialize memory parameter
181
- self.persistent_memory = nn.Parameter(torch.zeros(self.memory_tokens, 1, self.dim))
182
- nn.init.normal_(self.persistent_memory, mean=0.0, std=0.02)
183
 
184
  # Projections
185
  self.q_proj = nn.Linear(self.dim, self.dim)
@@ -232,6 +231,12 @@ class SmartHybridAttention(nn.Module):
232
  except:
233
  return {}
234
 
 
 
 
 
 
 
235
  def _create_sliding_window_mask(
236
  self,
237
  seq_len: int,
@@ -304,6 +309,7 @@ class SmartHybridAttention(nn.Module):
304
  ) -> torch.Tensor:
305
  """Apply attention with persistent memory tokens for long-range context.
306
  Returns: Output tensor after attention [seq_len, batch, dim]"""
 
307
  seq_len, batch_size, _ = query.size()
308
 
309
  # Expand memory tokens to batch size
 
177
  # Ensure int type for memory tokens
178
  self.memory_tokens = int(memory_tokens) if isinstance(memory_tokens, (int, float)) else 32
179
 
180
+ # Delayed initialization for memory parameter
181
+ self._persistent_memory_initialized = False
 
182
 
183
  # Projections
184
  self.q_proj = nn.Linear(self.dim, self.dim)
 
231
  except:
232
  return {}
233
 
234
+ def _init_memory(self):
235
+ if not self._persistent_memory_initialized:
236
+ self.persistent_memory = nn.Parameter(torch.zeros(self.memory_tokens, 1, self.dim))
237
+ nn.init.normal_(self.persistent_memory, mean=0.0, std=0.02)
238
+ self._persistent_memory_initialized = True
239
+
240
  def _create_sliding_window_mask(
241
  self,
242
  seq_len: int,
 
309
  ) -> torch.Tensor:
310
  """Apply attention with persistent memory tokens for long-range context.
311
  Returns: Output tensor after attention [seq_len, batch, dim]"""
312
+ self._init_memory() # Initialize memory only when needed
313
  seq_len, batch_size, _ = query.size()
314
 
315
  # Expand memory tokens to batch size
tokenizer.py CHANGED
@@ -1,86 +1,14 @@
1
- # Tokenizer Wrapper Module
2
  import os
3
- import torch
4
  import logging
5
- import sentencepiece as spm
6
- from typing import List, Union, Dict, Optional, Tuple, Any
7
- from transformers import AutoTokenizer, PreTrainedTokenizerBase, BertTokenizer
8
- from pathlib import Path
9
- from functools import lru_cache
10
- import importlib.util
11
-
12
- # Check if sentencepiece is available but don't crash if not
13
- SP_AVAILABLE = importlib.util.find_spec("sentencepiece") is not None
14
- if SP_AVAILABLE:
15
- import sentencepiece as spm
16
- else:
17
- logging.warning("sentencepiece not available; some tokenizer features will be limited")
18
-
19
- from config import app_config
20
- from service_registry import registry, TOKENIZER
21
 
22
  logger = logging.getLogger(__name__)
23
 
24
  class TokenizerWrapper:
25
- """A wrapper for transformer tokenizers with fallbacks"""
26
 
27
- def __init__(self, model_name="gpt2"):
28
  self.model_name = model_name
29
- try:
30
- self.tokenizer = AutoTokenizer.from_pretrained(model_name)
31
- # Add pad token if it doesn't exist (important for GPT-2)
32
- if self.tokenizer.pad_token is None:
33
- self.tokenizer.pad_token = self.tokenizer.eos_token
34
- logger.info(f"Initialized tokenizer from {model_name}")
35
- except Exception as e:
36
- logger.error(f"Error loading tokenizer: {e}")
37
- self.tokenizer = None
38
-
39
- def __call__(self, text, **kwargs):
40
- """Make the wrapper callable like a standard HF tokenizer"""
41
- if self.tokenizer is None:
42
- raise ValueError("Tokenizer not initialized")
43
- return self.tokenizer(text, **kwargs)
44
-
45
- def encode(self, text, **kwargs):
46
- """Encode text to token IDs"""
47
- if self.tokenizer is None:
48
- raise ValueError("Tokenizer not initialized")
49
- return self.tokenizer.encode(text, **kwargs)
50
-
51
- def decode(self, token_ids, **kwargs):
52
- """Decode token IDs to text"""
53
- if self.tokenizer is None:
54
- raise ValueError("Tokenizer not initialized")
55
- return self.tokenizer.decode(token_ids, **kwargs)
56
-
57
- def tokenize(self, text, **kwargs):
58
- """Tokenize text to tokens"""
59
- if self.tokenizer is None:
60
- raise ValueError("Tokenizer not initialized")
61
- return self.tokenizer.tokenize(text, **kwargs)
62
-
63
- def get_tokenizer(model_name="gpt2"):
64
- """Get a tokenizer instance with proper fallback handling"""
65
- try:
66
- return TokenizerWrapper(model_name)
67
- except Exception as e:
68
- logger.error(f"Error creating TokenizerWrapper: {e}")
69
- try:
70
- return AutoTokenizer.from_pretrained(model_name)
71
- except Exception as e2:
72
- logger.error(f"Error loading AutoTokenizer: {e2}")
73
- return None
74
-
75
- if __name__ == "__main__":
76
- # Example usage showcasing advanced features
77
- wrapper = TokenizerWrapper(sp_model_path="c:\\Users\\User\\OneDrive\\Documents\\tlm\\Wildnerve-tlm_HF\\sentencepiece.model")
78
- sample_text = "This is an ADVANCED Test sentence! With multiple spaces and Punctuation."
79
- tokens_sp = wrapper.tokenize(sample_text, use_sentencepiece=True)
80
- tokens_tr = wrapper.tokenize(sample_text, use_sentencepiece=False)
81
- encoded = wrapper.encode(sample_text)
82
- decoded = wrapper.decode(encoded) if encoded else ""
83
- print("SentencePiece Tokens:", tokens_sp)
84
- print("Transformer Tokens:", tokens_tr)
85
- print("Encoded:", encoded)
86
- print("Decoded:", decoded)
 
 
1
  import os
2
+ import json
3
  import logging
4
+ from typing import List, Dict, Optional, Union, Any
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  logger = logging.getLogger(__name__)
7
 
8
  class TokenizerWrapper:
9
+ """Lightweight wrapper around GPT-2 tokenizer with memory optimization"""
10
 
11
+ def __init__(self, model_name: str = "gpt2", load_vocab: bool = True):
12
  self.model_name = model_name
13
+ self.pad_token = "<pad>"
14
+ self.eos_token = "