Upload 4 files
Browse files- model_List.py +51 -14
- service_registry.py +24 -22
model_List.py
CHANGED
|
@@ -1,22 +1,25 @@
|
|
| 1 |
# model_List.py - Model selection and analysis component with advanced features
|
| 2 |
-
import logging
|
| 3 |
-
import time
|
| 4 |
-
import math
|
| 5 |
-
import torch
|
| 6 |
-
import importlib.util
|
| 7 |
import os
|
| 8 |
import re
|
| 9 |
-
import
|
| 10 |
-
|
| 11 |
-
import
|
| 12 |
-
import numpy as np
|
| 13 |
-
from sklearn.metrics.pairwise import cosine_similarity
|
| 14 |
import nltk
|
| 15 |
try:
|
| 16 |
nltk.data.find('tokenizers/punkt')
|
| 17 |
except LookupError:
|
| 18 |
nltk.download("punkt")
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
# More robust config import
|
| 22 |
try:
|
|
@@ -37,8 +40,38 @@ except ImportError:
|
|
| 37 |
# Add SmartHybridAttention imports
|
| 38 |
from utils.smartHybridAttention import SmartHybridAttention, get_hybrid_attention_config
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
logger = logging.getLogger(__name__)
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
class PromptAnalyzer:
|
| 43 |
"""
|
| 44 |
Enhanced prompt analyzer that combines:
|
|
@@ -96,9 +129,13 @@ class PromptAnalyzer:
|
|
| 96 |
self.sentence_model = get_sentence_transformer('sentence-transformers/all-MiniLM-L6-v2')
|
| 97 |
self.logger.info(f"Using SentenceTransformer model: sentence-transformers/all-MiniLM-L6-v2")
|
| 98 |
|
| 99 |
-
# Use GPT-2
|
| 100 |
-
self.tokenizer =
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
self.model.eval()
|
| 103 |
|
| 104 |
logger.info(f"Initialized PromptAnalyzer with {self.model_name}, specialization: {self.specialization}, hidden_dim: {self.hidden_dim}")
|
|
|
|
| 1 |
# model_List.py - Model selection and analysis component with advanced features
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import os
|
| 3 |
import re
|
| 4 |
+
import json
|
| 5 |
+
import time
|
| 6 |
+
import math
|
|
|
|
|
|
|
| 7 |
import nltk
|
| 8 |
try:
|
| 9 |
nltk.data.find('tokenizers/punkt')
|
| 10 |
except LookupError:
|
| 11 |
nltk.download("punkt")
|
| 12 |
+
|
| 13 |
+
import torch
|
| 14 |
+
import logging
|
| 15 |
+
import numpy as np
|
| 16 |
+
import importlib.util
|
| 17 |
+
from enum import Enum # Add this import for Enum
|
| 18 |
+
from service_registry import registry, MODEL, PRETRAINED_MODEL
|
| 19 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 20 |
+
from typing import List, Tuple, Dict, Type, Any, Optional
|
| 21 |
+
|
| 22 |
+
logger = logging.getLogger(__name__)
|
| 23 |
|
| 24 |
# More robust config import
|
| 25 |
try:
|
|
|
|
| 40 |
# Add SmartHybridAttention imports
|
| 41 |
from utils.smartHybridAttention import SmartHybridAttention, get_hybrid_attention_config
|
| 42 |
|
| 43 |
+
# Fix: Import get_sentence_transformer properly
|
| 44 |
+
try:
|
| 45 |
+
from utils.transformer_utils import get_sentence_transformer
|
| 46 |
+
except ImportError:
|
| 47 |
+
# Create a fallback implementation if the import fails
|
| 48 |
+
def get_sentence_transformer(model_name):
|
| 49 |
+
try:
|
| 50 |
+
from sentence_transformers import SentenceTransformer
|
| 51 |
+
return SentenceTransformer(model_name)
|
| 52 |
+
except ImportError:
|
| 53 |
+
logger.error("sentence_transformers package not available")
|
| 54 |
+
# Return a minimal placeholder that won't crash initialization
|
| 55 |
+
class MinimalSentenceTransformer:
|
| 56 |
+
def __init__(self, *args, **kwargs):
|
| 57 |
+
pass
|
| 58 |
+
def encode(self, text):
|
| 59 |
+
return [0.0] * 384 # Return zero vector with typical dimension
|
| 60 |
+
return MinimalSentenceTransformer()
|
| 61 |
+
|
| 62 |
+
from model_Custm import Wildnerve_tlm01 as CustomModel
|
| 63 |
+
|
| 64 |
+
logging.basicConfig(level=logging.INFO)
|
| 65 |
logger = logging.getLogger(__name__)
|
| 66 |
|
| 67 |
+
class ModelType(Enum):
|
| 68 |
+
CUSTOM = "model_Custm.py" # Wildnerve-tlm01 custom implementation
|
| 69 |
+
PRETRAINED = "model_PrTr.py" # GPT2 pretrained models
|
| 70 |
+
# COMBINED = "model_Combn.py" # Hybrid approach with both
|
| 71 |
+
|
| 72 |
+
# Replace generic Auto* classes with specific GPT-2 classes
|
| 73 |
+
from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
| 74 |
+
|
| 75 |
class PromptAnalyzer:
|
| 76 |
"""
|
| 77 |
Enhanced prompt analyzer that combines:
|
|
|
|
| 129 |
self.sentence_model = get_sentence_transformer('sentence-transformers/all-MiniLM-L6-v2')
|
| 130 |
self.logger.info(f"Using SentenceTransformer model: sentence-transformers/all-MiniLM-L6-v2")
|
| 131 |
|
| 132 |
+
# Use specific GPT-2 classes instead of Auto* classes
|
| 133 |
+
self.tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
| 134 |
+
# Fix missing pad token in GPT-2
|
| 135 |
+
if self.tokenizer.pad_token is None:
|
| 136 |
+
self.tokenizer.pad_token = self.tokenizer.eos_token
|
| 137 |
+
|
| 138 |
+
self.model = GPT2LMHeadModel.from_pretrained("gpt2")
|
| 139 |
self.model.eval()
|
| 140 |
|
| 141 |
logger.info(f"Initialized PromptAnalyzer with {self.model_name}, specialization: {self.specialization}, hidden_dim: {self.hidden_dim}")
|
service_registry.py
CHANGED
|
@@ -2,17 +2,19 @@
|
|
| 2 |
Simple service registry for dependency injection
|
| 3 |
"""
|
| 4 |
import logging
|
| 5 |
-
import traceback
|
| 6 |
from typing import Any, Dict, Optional
|
| 7 |
|
| 8 |
logger = logging.getLogger(__name__)
|
| 9 |
|
| 10 |
-
# Constants used as keys
|
| 11 |
-
MODEL = "model"
|
| 12 |
-
PRETRAINED_MODEL = "pretrained_model"
|
| 13 |
TOKENIZER = "tokenizer"
|
| 14 |
MODEL_MANAGER = "model_manager"
|
| 15 |
COMMUNICATOR = "communicator"
|
|
|
|
|
|
|
| 16 |
|
| 17 |
class ServiceRegistry:
|
| 18 |
"""A simple service registry for dependency management"""
|
|
@@ -50,9 +52,9 @@ registry = ServiceRegistry()
|
|
| 50 |
|
| 51 |
def ensure_models_registered():
|
| 52 |
"""Ensure at least one model is registered in the registry."""
|
| 53 |
-
# First
|
| 54 |
if not registry.has(MODEL):
|
| 55 |
-
logger.info("No model in registry, registering
|
| 56 |
try:
|
| 57 |
import os, importlib.util
|
| 58 |
|
|
@@ -65,12 +67,12 @@ def ensure_models_registered():
|
|
| 65 |
logger.info(f"Model directory content: {os.listdir(os.path.dirname(model_path))}")
|
| 66 |
|
| 67 |
if os.path.exists(model_path):
|
| 68 |
-
# Dynamic import of model_Custm.py
|
| 69 |
spec = importlib.util.spec_from_file_location("model_custm", model_path)
|
| 70 |
model_module = importlib.util.module_from_spec(spec)
|
| 71 |
spec.loader.exec_module(model_module)
|
| 72 |
|
| 73 |
-
# Get the model class
|
| 74 |
if hasattr(model_module, "Wildnerve_tlm01"):
|
| 75 |
from tokenizer import TokenizerWrapper
|
| 76 |
|
|
@@ -93,10 +95,10 @@ def ensure_models_registered():
|
|
| 93 |
tokenizer=tok
|
| 94 |
)
|
| 95 |
|
| 96 |
-
# Register both tokenizer and
|
| 97 |
registry.register(TOKENIZER, tok, overwrite=True)
|
| 98 |
registry.register(MODEL, model, overwrite=True)
|
| 99 |
-
logger.info("Successfully registered
|
| 100 |
return True
|
| 101 |
|
| 102 |
logger.error(f"model_Custm.py not found at {model_path}")
|
|
@@ -104,21 +106,21 @@ def ensure_models_registered():
|
|
| 104 |
|
| 105 |
except Exception as e:
|
| 106 |
# More detailed error logging
|
| 107 |
-
logger.error(f"Failed to register
|
| 108 |
logger.error(f"Exception details: {type(e).__name__}")
|
| 109 |
logger.error(f"Exception traceback: {traceback.format_exc()}")
|
| 110 |
return False
|
| 111 |
|
| 112 |
-
#
|
| 113 |
if not registry.has(PRETRAINED_MODEL):
|
| 114 |
-
logger.info("No
|
| 115 |
try:
|
| 116 |
import os, importlib.util
|
| 117 |
# Import required modules at this scope
|
| 118 |
try:
|
| 119 |
-
from transformers import
|
| 120 |
except ImportError:
|
| 121 |
-
logger.error("Failed to import required
|
| 122 |
return False
|
| 123 |
|
| 124 |
# Find model_PrTr.py in the same directory as this file
|
|
@@ -131,7 +133,7 @@ def ensure_models_registered():
|
|
| 131 |
model_module = importlib.util.module_from_spec(spec)
|
| 132 |
spec.loader.exec_module(model_module)
|
| 133 |
|
| 134 |
-
#
|
| 135 |
model_class = None
|
| 136 |
if hasattr(model_module, "PretrainedTransformer"):
|
| 137 |
model_class = getattr(model_module, "PretrainedTransformer")
|
|
@@ -143,7 +145,7 @@ def ensure_models_registered():
|
|
| 143 |
tok = registry.get(TOKENIZER)
|
| 144 |
if not tok:
|
| 145 |
try:
|
| 146 |
-
#
|
| 147 |
tok = GPT2Tokenizer.from_pretrained("gpt2")
|
| 148 |
if tok.pad_token_id is None:
|
| 149 |
tok.pad_token = tok.eos_token
|
|
@@ -154,21 +156,21 @@ def ensure_models_registered():
|
|
| 154 |
logger.error(f"Failed to create GPT-2 tokenizer: {e}")
|
| 155 |
return False
|
| 156 |
|
| 157 |
-
# Create
|
| 158 |
model = model_class(
|
| 159 |
-
model_name="gpt2",
|
| 160 |
tokenizer=tok
|
| 161 |
)
|
| 162 |
|
| 163 |
-
# Register as pretrained model
|
| 164 |
registry.register(PRETRAINED_MODEL, model, overwrite=True)
|
| 165 |
-
logger.info("Successfully registered GPT-2
|
| 166 |
return True
|
| 167 |
|
| 168 |
logger.error(f"model_PrTr.py not found at {model_path}")
|
| 169 |
|
| 170 |
except Exception as e:
|
| 171 |
-
logger.error(f"Failed to register
|
| 172 |
logger.error(f"Exception details: {type(e).__name__}")
|
| 173 |
logger.error(f"Exception traceback: {traceback.format_exc()}")
|
| 174 |
|
|
|
|
| 2 |
Simple service registry for dependency injection
|
| 3 |
"""
|
| 4 |
import logging
|
| 5 |
+
import traceback
|
| 6 |
from typing import Any, Dict, Optional
|
| 7 |
|
| 8 |
logger = logging.getLogger(__name__)
|
| 9 |
|
| 10 |
+
# Constants used as keys - let's clarify with better names
|
| 11 |
+
MODEL = "model" # The custom Wildnerve-tlm01_Hybrid_Model
|
| 12 |
+
PRETRAINED_MODEL = "pretrained_model" # GPT-2 model
|
| 13 |
TOKENIZER = "tokenizer"
|
| 14 |
MODEL_MANAGER = "model_manager"
|
| 15 |
COMMUNICATOR = "communicator"
|
| 16 |
+
PIPELINE = "pipeline"
|
| 17 |
+
TRANSFORMER = "transformer" # Generic transformer key
|
| 18 |
|
| 19 |
class ServiceRegistry:
|
| 20 |
"""A simple service registry for dependency management"""
|
|
|
|
| 52 |
|
| 53 |
def ensure_models_registered():
|
| 54 |
"""Ensure at least one model is registered in the registry."""
|
| 55 |
+
# First make sure we have a CUSTOM model (Wildnerve-tlm01_Hybrid_Model)
|
| 56 |
if not registry.has(MODEL):
|
| 57 |
+
logger.info("No custom model in registry, registering Wildnerve-tlm01_Hybrid_Model")
|
| 58 |
try:
|
| 59 |
import os, importlib.util
|
| 60 |
|
|
|
|
| 67 |
logger.info(f"Model directory content: {os.listdir(os.path.dirname(model_path))}")
|
| 68 |
|
| 69 |
if os.path.exists(model_path):
|
| 70 |
+
# Dynamic import of model_Custm.py for Wildnerve-tlm01_Hybrid_Model
|
| 71 |
spec = importlib.util.spec_from_file_location("model_custm", model_path)
|
| 72 |
model_module = importlib.util.module_from_spec(spec)
|
| 73 |
spec.loader.exec_module(model_module)
|
| 74 |
|
| 75 |
+
# Get the model class for Wildnerve-tlm01_Hybrid_Model
|
| 76 |
if hasattr(model_module, "Wildnerve_tlm01"):
|
| 77 |
from tokenizer import TokenizerWrapper
|
| 78 |
|
|
|
|
| 95 |
tokenizer=tok
|
| 96 |
)
|
| 97 |
|
| 98 |
+
# Register both tokenizer and the Wildnerve-tlm01_Hybrid_Model
|
| 99 |
registry.register(TOKENIZER, tok, overwrite=True)
|
| 100 |
registry.register(MODEL, model, overwrite=True)
|
| 101 |
+
logger.info("Successfully registered Wildnerve-tlm01_Hybrid_Model as MODEL")
|
| 102 |
return True
|
| 103 |
|
| 104 |
logger.error(f"model_Custm.py not found at {model_path}")
|
|
|
|
| 106 |
|
| 107 |
except Exception as e:
|
| 108 |
# More detailed error logging
|
| 109 |
+
logger.error(f"Failed to register Wildnerve-tlm01_Hybrid_Model: {e}")
|
| 110 |
logger.error(f"Exception details: {type(e).__name__}")
|
| 111 |
logger.error(f"Exception traceback: {traceback.format_exc()}")
|
| 112 |
return False
|
| 113 |
|
| 114 |
+
# Then check if we have a GPT-2 PRETRAINED model
|
| 115 |
if not registry.has(PRETRAINED_MODEL):
|
| 116 |
+
logger.info("No GPT-2 model in registry, registering GPT-2")
|
| 117 |
try:
|
| 118 |
import os, importlib.util
|
| 119 |
# Import required modules at this scope
|
| 120 |
try:
|
| 121 |
+
from transformers import GPT2LMHeadModel, GPT2Tokenizer
|
| 122 |
except ImportError:
|
| 123 |
+
logger.error("Failed to import required GPT-2 modules")
|
| 124 |
return False
|
| 125 |
|
| 126 |
# Find model_PrTr.py in the same directory as this file
|
|
|
|
| 133 |
model_module = importlib.util.module_from_spec(spec)
|
| 134 |
spec.loader.exec_module(model_module)
|
| 135 |
|
| 136 |
+
# Get GPT-2 wrapper class
|
| 137 |
model_class = None
|
| 138 |
if hasattr(model_module, "PretrainedTransformer"):
|
| 139 |
model_class = getattr(model_module, "PretrainedTransformer")
|
|
|
|
| 145 |
tok = registry.get(TOKENIZER)
|
| 146 |
if not tok:
|
| 147 |
try:
|
| 148 |
+
# Create GPT-2 tokenizer
|
| 149 |
tok = GPT2Tokenizer.from_pretrained("gpt2")
|
| 150 |
if tok.pad_token_id is None:
|
| 151 |
tok.pad_token = tok.eos_token
|
|
|
|
| 156 |
logger.error(f"Failed to create GPT-2 tokenizer: {e}")
|
| 157 |
return False
|
| 158 |
|
| 159 |
+
# Create GPT-2 model instance
|
| 160 |
model = model_class(
|
| 161 |
+
model_name="gpt2", # Explicitly use gpt2
|
| 162 |
tokenizer=tok
|
| 163 |
)
|
| 164 |
|
| 165 |
+
# Register as GPT-2 pretrained model
|
| 166 |
registry.register(PRETRAINED_MODEL, model, overwrite=True)
|
| 167 |
+
logger.info("Successfully registered GPT-2 as PRETRAINED_MODEL")
|
| 168 |
return True
|
| 169 |
|
| 170 |
logger.error(f"model_PrTr.py not found at {model_path}")
|
| 171 |
|
| 172 |
except Exception as e:
|
| 173 |
+
logger.error(f"Failed to register GPT-2 model: {e}")
|
| 174 |
logger.error(f"Exception details: {type(e).__name__}")
|
| 175 |
logger.error(f"Exception traceback: {traceback.format_exc()}")
|
| 176 |
|