WildnerveAI commited on
Commit
c9d7656
·
verified ·
1 Parent(s): 0b8ad4c

Upload 4 files

Browse files
Files changed (2) hide show
  1. model_List.py +51 -14
  2. service_registry.py +24 -22
model_List.py CHANGED
@@ -1,22 +1,25 @@
1
  # model_List.py - Model selection and analysis component with advanced features
2
- import logging
3
- import time
4
- import math
5
- import torch
6
- import importlib.util
7
  import os
8
  import re
9
- import logging
10
- from typing import List, Tuple, Dict, Type, Any, Optional
11
- import torch
12
- import numpy as np
13
- from sklearn.metrics.pairwise import cosine_similarity
14
  import nltk
15
  try:
16
  nltk.data.find('tokenizers/punkt')
17
  except LookupError:
18
  nltk.download("punkt")
19
- from service_registry import registry, TOKENIZER, MODEL
 
 
 
 
 
 
 
 
 
 
20
 
21
  # More robust config import
22
  try:
@@ -37,8 +40,38 @@ except ImportError:
37
  # Add SmartHybridAttention imports
38
  from utils.smartHybridAttention import SmartHybridAttention, get_hybrid_attention_config
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  logger = logging.getLogger(__name__)
41
 
 
 
 
 
 
 
 
 
42
  class PromptAnalyzer:
43
  """
44
  Enhanced prompt analyzer that combines:
@@ -96,9 +129,13 @@ class PromptAnalyzer:
96
  self.sentence_model = get_sentence_transformer('sentence-transformers/all-MiniLM-L6-v2')
97
  self.logger.info(f"Using SentenceTransformer model: sentence-transformers/all-MiniLM-L6-v2")
98
 
99
- # Use GPT-2 for perplexity calculation
100
- self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
101
- self.model = AutoModelForCausalLM.from_pretrained("gpt2")
 
 
 
 
102
  self.model.eval()
103
 
104
  logger.info(f"Initialized PromptAnalyzer with {self.model_name}, specialization: {self.specialization}, hidden_dim: {self.hidden_dim}")
 
1
  # model_List.py - Model selection and analysis component with advanced features
 
 
 
 
 
2
  import os
3
  import re
4
+ import json
5
+ import time
6
+ import math
 
 
7
  import nltk
8
  try:
9
  nltk.data.find('tokenizers/punkt')
10
  except LookupError:
11
  nltk.download("punkt")
12
+
13
+ import torch
14
+ import logging
15
+ import numpy as np
16
+ import importlib.util
17
+ from enum import Enum # Add this import for Enum
18
+ from service_registry import registry, MODEL, PRETRAINED_MODEL
19
+ from sklearn.metrics.pairwise import cosine_similarity
20
+ from typing import List, Tuple, Dict, Type, Any, Optional
21
+
22
+ logger = logging.getLogger(__name__)
23
 
24
  # More robust config import
25
  try:
 
40
  # Add SmartHybridAttention imports
41
  from utils.smartHybridAttention import SmartHybridAttention, get_hybrid_attention_config
42
 
43
+ # Fix: Import get_sentence_transformer properly
44
+ try:
45
+ from utils.transformer_utils import get_sentence_transformer
46
+ except ImportError:
47
+ # Create a fallback implementation if the import fails
48
+ def get_sentence_transformer(model_name):
49
+ try:
50
+ from sentence_transformers import SentenceTransformer
51
+ return SentenceTransformer(model_name)
52
+ except ImportError:
53
+ logger.error("sentence_transformers package not available")
54
+ # Return a minimal placeholder that won't crash initialization
55
+ class MinimalSentenceTransformer:
56
+ def __init__(self, *args, **kwargs):
57
+ pass
58
+ def encode(self, text):
59
+ return [0.0] * 384 # Return zero vector with typical dimension
60
+ return MinimalSentenceTransformer()
61
+
62
+ from model_Custm import Wildnerve_tlm01 as CustomModel
63
+
64
+ logging.basicConfig(level=logging.INFO)
65
  logger = logging.getLogger(__name__)
66
 
67
+ class ModelType(Enum):
68
+ CUSTOM = "model_Custm.py" # Wildnerve-tlm01 custom implementation
69
+ PRETRAINED = "model_PrTr.py" # GPT2 pretrained models
70
+ # COMBINED = "model_Combn.py" # Hybrid approach with both
71
+
72
+ # Replace generic Auto* classes with specific GPT-2 classes
73
+ from transformers import GPT2Tokenizer, GPT2LMHeadModel
74
+
75
  class PromptAnalyzer:
76
  """
77
  Enhanced prompt analyzer that combines:
 
129
  self.sentence_model = get_sentence_transformer('sentence-transformers/all-MiniLM-L6-v2')
130
  self.logger.info(f"Using SentenceTransformer model: sentence-transformers/all-MiniLM-L6-v2")
131
 
132
+ # Use specific GPT-2 classes instead of Auto* classes
133
+ self.tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
134
+ # Fix missing pad token in GPT-2
135
+ if self.tokenizer.pad_token is None:
136
+ self.tokenizer.pad_token = self.tokenizer.eos_token
137
+
138
+ self.model = GPT2LMHeadModel.from_pretrained("gpt2")
139
  self.model.eval()
140
 
141
  logger.info(f"Initialized PromptAnalyzer with {self.model_name}, specialization: {self.specialization}, hidden_dim: {self.hidden_dim}")
service_registry.py CHANGED
@@ -2,17 +2,19 @@
2
  Simple service registry for dependency injection
3
  """
4
  import logging
5
- import traceback # Add this missing import
6
  from typing import Any, Dict, Optional
7
 
8
  logger = logging.getLogger(__name__)
9
 
10
- # Constants used as keys
11
- MODEL = "model"
12
- PRETRAINED_MODEL = "pretrained_model"
13
  TOKENIZER = "tokenizer"
14
  MODEL_MANAGER = "model_manager"
15
  COMMUNICATOR = "communicator"
 
 
16
 
17
  class ServiceRegistry:
18
  """A simple service registry for dependency management"""
@@ -50,9 +52,9 @@ registry = ServiceRegistry()
50
 
51
  def ensure_models_registered():
52
  """Ensure at least one model is registered in the registry."""
53
- # First check and register CUSTOM model if needed
54
  if not registry.has(MODEL):
55
- logger.info("No model in registry, registering model_Custm")
56
  try:
57
  import os, importlib.util
58
 
@@ -65,12 +67,12 @@ def ensure_models_registered():
65
  logger.info(f"Model directory content: {os.listdir(os.path.dirname(model_path))}")
66
 
67
  if os.path.exists(model_path):
68
- # Dynamic import of model_Custm.py
69
  spec = importlib.util.spec_from_file_location("model_custm", model_path)
70
  model_module = importlib.util.module_from_spec(spec)
71
  spec.loader.exec_module(model_module)
72
 
73
- # Get the model class
74
  if hasattr(model_module, "Wildnerve_tlm01"):
75
  from tokenizer import TokenizerWrapper
76
 
@@ -93,10 +95,10 @@ def ensure_models_registered():
93
  tokenizer=tok
94
  )
95
 
96
- # Register both tokenizer and model
97
  registry.register(TOKENIZER, tok, overwrite=True)
98
  registry.register(MODEL, model, overwrite=True)
99
- logger.info("Successfully registered model_Custm.Wildnerve_tlm01")
100
  return True
101
 
102
  logger.error(f"model_Custm.py not found at {model_path}")
@@ -104,21 +106,21 @@ def ensure_models_registered():
104
 
105
  except Exception as e:
106
  # More detailed error logging
107
- logger.error(f"Failed to register model_Custm: {e}")
108
  logger.error(f"Exception details: {type(e).__name__}")
109
  logger.error(f"Exception traceback: {traceback.format_exc()}")
110
  return False
111
 
112
- # Next make sure PRETRAINED model is registered too
113
  if not registry.has(PRETRAINED_MODEL):
114
- logger.info("No pretrained model in registry, registering model_PrTr")
115
  try:
116
  import os, importlib.util
117
  # Import required modules at this scope
118
  try:
119
- from transformers import AutoModelForCausalLM, AutoTokenizer, GPT2LMHeadModel, GPT2Tokenizer
120
  except ImportError:
121
- logger.error("Failed to import required transformer modules")
122
  return False
123
 
124
  # Find model_PrTr.py in the same directory as this file
@@ -131,7 +133,7 @@ def ensure_models_registered():
131
  model_module = importlib.util.module_from_spec(spec)
132
  spec.loader.exec_module(model_module)
133
 
134
- # Use PretrainedTransformer if available, otherwise use Wildnerve_tlm01
135
  model_class = None
136
  if hasattr(model_module, "PretrainedTransformer"):
137
  model_class = getattr(model_module, "PretrainedTransformer")
@@ -143,7 +145,7 @@ def ensure_models_registered():
143
  tok = registry.get(TOKENIZER)
144
  if not tok:
145
  try:
146
- # Try using GPT2Tokenizer directly instead of AutoTokenizer
147
  tok = GPT2Tokenizer.from_pretrained("gpt2")
148
  if tok.pad_token_id is None:
149
  tok.pad_token = tok.eos_token
@@ -154,21 +156,21 @@ def ensure_models_registered():
154
  logger.error(f"Failed to create GPT-2 tokenizer: {e}")
155
  return False
156
 
157
- # Create pretrained model
158
  model = model_class(
159
- model_name="gpt2", # Explicitly use gpt2
160
  tokenizer=tok
161
  )
162
 
163
- # Register as pretrained model
164
  registry.register(PRETRAINED_MODEL, model, overwrite=True)
165
- logger.info("Successfully registered GPT-2 pretrained model")
166
  return True
167
 
168
  logger.error(f"model_PrTr.py not found at {model_path}")
169
 
170
  except Exception as e:
171
- logger.error(f"Failed to register pretrained model: {e}")
172
  logger.error(f"Exception details: {type(e).__name__}")
173
  logger.error(f"Exception traceback: {traceback.format_exc()}")
174
 
 
2
  Simple service registry for dependency injection
3
  """
4
  import logging
5
+ import traceback
6
  from typing import Any, Dict, Optional
7
 
8
  logger = logging.getLogger(__name__)
9
 
10
+ # Constants used as keys - let's clarify with better names
11
+ MODEL = "model" # The custom Wildnerve-tlm01_Hybrid_Model
12
+ PRETRAINED_MODEL = "pretrained_model" # GPT-2 model
13
  TOKENIZER = "tokenizer"
14
  MODEL_MANAGER = "model_manager"
15
  COMMUNICATOR = "communicator"
16
+ PIPELINE = "pipeline"
17
+ TRANSFORMER = "transformer" # Generic transformer key
18
 
19
  class ServiceRegistry:
20
  """A simple service registry for dependency management"""
 
52
 
53
  def ensure_models_registered():
54
  """Ensure at least one model is registered in the registry."""
55
+ # First make sure we have a CUSTOM model (Wildnerve-tlm01_Hybrid_Model)
56
  if not registry.has(MODEL):
57
+ logger.info("No custom model in registry, registering Wildnerve-tlm01_Hybrid_Model")
58
  try:
59
  import os, importlib.util
60
 
 
67
  logger.info(f"Model directory content: {os.listdir(os.path.dirname(model_path))}")
68
 
69
  if os.path.exists(model_path):
70
+ # Dynamic import of model_Custm.py for Wildnerve-tlm01_Hybrid_Model
71
  spec = importlib.util.spec_from_file_location("model_custm", model_path)
72
  model_module = importlib.util.module_from_spec(spec)
73
  spec.loader.exec_module(model_module)
74
 
75
+ # Get the model class for Wildnerve-tlm01_Hybrid_Model
76
  if hasattr(model_module, "Wildnerve_tlm01"):
77
  from tokenizer import TokenizerWrapper
78
 
 
95
  tokenizer=tok
96
  )
97
 
98
+ # Register both tokenizer and the Wildnerve-tlm01_Hybrid_Model
99
  registry.register(TOKENIZER, tok, overwrite=True)
100
  registry.register(MODEL, model, overwrite=True)
101
+ logger.info("Successfully registered Wildnerve-tlm01_Hybrid_Model as MODEL")
102
  return True
103
 
104
  logger.error(f"model_Custm.py not found at {model_path}")
 
106
 
107
  except Exception as e:
108
  # More detailed error logging
109
+ logger.error(f"Failed to register Wildnerve-tlm01_Hybrid_Model: {e}")
110
  logger.error(f"Exception details: {type(e).__name__}")
111
  logger.error(f"Exception traceback: {traceback.format_exc()}")
112
  return False
113
 
114
+ # Then check if we have a GPT-2 PRETRAINED model
115
  if not registry.has(PRETRAINED_MODEL):
116
+ logger.info("No GPT-2 model in registry, registering GPT-2")
117
  try:
118
  import os, importlib.util
119
  # Import required modules at this scope
120
  try:
121
+ from transformers import GPT2LMHeadModel, GPT2Tokenizer
122
  except ImportError:
123
+ logger.error("Failed to import required GPT-2 modules")
124
  return False
125
 
126
  # Find model_PrTr.py in the same directory as this file
 
133
  model_module = importlib.util.module_from_spec(spec)
134
  spec.loader.exec_module(model_module)
135
 
136
+ # Get GPT-2 wrapper class
137
  model_class = None
138
  if hasattr(model_module, "PretrainedTransformer"):
139
  model_class = getattr(model_module, "PretrainedTransformer")
 
145
  tok = registry.get(TOKENIZER)
146
  if not tok:
147
  try:
148
+ # Create GPT-2 tokenizer
149
  tok = GPT2Tokenizer.from_pretrained("gpt2")
150
  if tok.pad_token_id is None:
151
  tok.pad_token = tok.eos_token
 
156
  logger.error(f"Failed to create GPT-2 tokenizer: {e}")
157
  return False
158
 
159
+ # Create GPT-2 model instance
160
  model = model_class(
161
+ model_name="gpt2", # Explicitly use gpt2
162
  tokenizer=tok
163
  )
164
 
165
+ # Register as GPT-2 pretrained model
166
  registry.register(PRETRAINED_MODEL, model, overwrite=True)
167
+ logger.info("Successfully registered GPT-2 as PRETRAINED_MODEL")
168
  return True
169
 
170
  logger.error(f"model_PrTr.py not found at {model_path}")
171
 
172
  except Exception as e:
173
+ logger.error(f"Failed to register GPT-2 model: {e}")
174
  logger.error(f"Exception details: {type(e).__name__}")
175
  logger.error(f"Exception traceback: {traceback.format_exc()}")
176