Upload 8 files

Browse files

Files changed (7) hide show

adapter_layer.py +105 -58
config.json +12 -1
config.py +29 -6
model_Custm.py +24 -14
model_List.py +212 -180
model_PrTr.py +104 -25
service_registry.py +3 -3

adapter_layer.py CHANGED Viewed

@@ -208,65 +208,112 @@ class WildnerveModelAdapter:
                 gen_list.append(s)
         return " ".join(tech_list).strip(), " ".join(gen_list).strip()
-    def generate(self, prompt: str, **kwargs) -> str:
-        """Generate a response to the given prompt."""
-        # Determine prompt type
-        primary, _ = PromptAnalyzer().analyze_prompt(prompt) if hasattr(PromptAnalyzer(), 'analyze_prompt') else (None, None)
-        # Set appropriate max_length to prevent length errors
-        if 'max_length' in kwargs and isinstance(kwargs['max_length'], int):
-            if kwargs['max_length'] < 512:  # If max_length is too small
-                kwargs['max_length'] = 512  # Use a reasonable default
-        else:
-            kwargs['max_length'] = 1024  # Set a default if not provided
-        # Try using the pretrained GPT-2 model first for generation
-        pre = registry.get(PRETRAINED_MODEL)
-        if pre:
             try:
-                logger.info("Using GPT-2 pretrained model for generation")
-                # Try to use the pretrained model's generate method
-                if hasattr(pre, "generate"):
-                    # Check the signature of the generate method to determine correct parameters
-                    import inspect
-                    sig = inspect.signature(pre.generate)
-                    if "prompt" in sig.parameters:
-                        return pre.generate(prompt=prompt, **kwargs)
-                    else:
-                        # If no prompt parameter, try tokenizing first
-                        inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
-                        return pre.generate(input_ids=inputs.input_ids, **kwargs)  # Explicitly pass as input_ids
-                else:
-                    logger.warning("Pretrained model doesn't have generate method")
             except Exception as e:
-                logger.error(f"Error using pretrained model: {e}")
-        # Fall back to using the custom model if needed
-        if self.model:
-            try:
-                logger.info("Using custom model for generation")
-                # Check if the model is expecting a prompt parameter or input_ids
-                import inspect
-                if hasattr(self.model, "generate"):
-                    sig = inspect.signature(self.model.generate)
-                    if "prompt" in sig.parameters:
-                        # Model accepts prompt parameter directly
-                        return self.model.generate(prompt=prompt, **kwargs)  # Explicitly pass as prompt
-                    else:
-                        # Model expects tokenized input_ids instead
-                        logger.info("Model expects tokenized input - converting prompt to input_ids")
-                        inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
-                        return self.model.generate(input_ids=inputs.input_ids, **kwargs)  # Explicitly pass as input_ids
-                else:
-                    logger.error("Model has no generate method")
-                    # Simple fallback for models without generate
-                    return f"I'm processing your request about '{prompt[:30]}...'"
-            except Exception as e:
-                logger.error(f"Error using custom model: {e}")
-                # Add last-chance fallback with generic response
-                return f"I apologize, but I'm experiencing some technical difficulties processing your request about '{prompt[:30]}...'. (Error: {str(e)})"
-        # Final fallback
-        return f"I apologize, but I'm unable to process your request about '{prompt[:30]}...' at this time."

                 gen_list.append(s)
         return " ".join(tech_list).strip(), " ".join(gen_list).strip()
+    def generate(self, text_input, max_length=None, **kwargs):
+        """Generate text using the model - centralized generation point"""
+        try:
+            # Use PromptAnalyzer to determine which model to use
             try:
+                from model_List import PromptAnalyzer
+                analyzer = PromptAnalyzer()
+                model_type, confidence = analyzer.analyze_prompt(text_input)
+                logger.info(f"PromptAnalyzer selected {model_type} with confidence {confidence:.2f}")
             except Exception as e:
+                logger.error(f"Error using PromptAnalyzer: {e}")
+                model_type = "model_Custm"  # Default to custom model on error
+            # Enhanced generation parameters with strong repetition prevention
+            generation_kwargs = {
+                'max_length': max_length or 150,
+                'temperature': kwargs.get('temperature', 0.7),
+                'top_p': kwargs.get('top_p', 0.95),
+                'top_k': kwargs.get('top_k', 50),
+                'repetition_penalty': kwargs.get('repetition_penalty', 1.3),  # Increased from 1.2
+                'no_repeat_ngram_size': kwargs.get('no_repeat_ngram_size', 3),  # Increased from 2
+                'do_sample': kwargs.get('do_sample', True),
+                'num_return_sequences': kwargs.get('num_return_sequences', 1),
+                'early_stopping': kwargs.get('early_stopping', True),
+                'bad_words_ids': kwargs.get('bad_words_ids', None),  # Block repetitive phrases
+                'min_length': kwargs.get('min_length', 10),  # Ensure reasonable response length
+            }
+            # Create penalty_alpha for GPT-2 encoder-decoder attention
+            if 'penalty_alpha' not in kwargs:
+                generation_kwargs['penalty_alpha'] = 0.6  # Helps prevent looping in GPT-2
+            # Override with any explicitly provided kwargs
+            generation_kwargs.update({k:v for k,v in kwargs.items() if k not in ('prompt', 'context')})
+            if model_type == "model_Custm":
+                # Use the Custom Wildnerve model for technical topics
+                custom_model = registry.get(MODEL)
+                if custom_model:
+                    try:
+                        logger.info("Using custom Wildnerve-tlm01_Hybrid_Model for technical prompt")
+                        # Check signature of the generate method
+                        import inspect
+                        if hasattr(custom_model, "generate"):
+                            sig = inspect.signature(custom_model.generate)
+                            if "prompt" in sig.parameters:
+                                return custom_model.generate(prompt=text_input, **generation_kwargs)
+                            else:
+                                # If no prompt parameter, try tokenizing first
+                                inputs = self.tokenizer(text_input, return_tensors="pt", truncation=True, padding=True)
+                                return custom_model.generate(input_ids=inputs.input_ids, **generation_kwargs)
+                        else:
+                            logger.warning("Custom model doesn't have generate method, falling back to pretrained")
+                    except Exception as e:
+                        logger.error(f"Error using custom model: {e}")
+            else:
+                # Use the Pretrained model (GPT-2) for general topics
+                pre = registry.get(PRETRAINED_MODEL)
+                if pre:
+                    try:
+                        logger.info("Using GPT-2 pretrained model for general prompt")
+                        # Try to use the pretrained model's generate method
+                        if hasattr(pre, "generate"):
+                            # Check the signature of the generate method to determine correct parameters
+                            import inspect
+                            sig = inspect.signature(pre.generate)
+                            if "prompt" in sig.parameters:
+                                return pre.generate(prompt=text_input, **generation_kwargs)
+                            else:
+                                # If no prompt parameter, try tokenizing first
+                                inputs = self.tokenizer(text_input, return_tensors="pt", truncation=True, padding=True)
+                                return pre.generate(input_ids=inputs.input_ids, **generation_kwargs)  # Explicitly pass as input_ids
+                        else:
+                            logger.warning("Pretrained model doesn't have generate method")
+                    except Exception as e:
+                        logger.error(f"Error using pretrained model: {e}")
+            # Fall back to using the custom model if needed
+            if self.model:
+                try:
+                    logger.info("Using custom model for generation")
+                    # Check if the model is expecting a prompt parameter or input_ids
+                    import inspect
+                    if hasattr(self.model, "generate"):
+                        sig = inspect.signature(self.model.generate)
+                        if "prompt" in sig.parameters:
+                            # Model accepts prompt parameter directly
+                            return self.model.generate(prompt=text_input, **generation_kwargs)  # Explicitly pass as prompt
+                        else:
+                            # Model expects tokenized input_ids instead
+                            logger.info("Model expects tokenized input - converting prompt to input_ids")
+                            inputs = self.tokenizer(text_input, return_tensors="pt", truncation=True, padding=True)
+                            return self.model.generate(input_ids=inputs.input_ids, **generation_kwargs)  # Explicitly pass as input_ids
+                    else:
+                        logger.error("Model has no generate method")
+                        # Simple fallback for models without generate
+                        return f"I'm processing your request about '{text_input[:30]}...'"
+                except Exception as e:
+                    logger.error(f"Error using custom model: {e}")
+                    # Add last-chance fallback with generic response
+                    return f"I apologize, but I'm experiencing some technical difficulties processing your request about '{text_input[:30]}...'. (Error: {str(e)})"
+            # Final fallback
+            return f"I apologize, but I'm unable to process your request about '{text_input[:30]}...' at this time."
+        except Exception as e:
+            logger.error(f"Error in generate method: {e}")
+            return f"An error occurred while generating text: {str(e)}"

config.json CHANGED Viewed

@@ -241,8 +241,19 @@
     "HIDDEN_DIM": 768,
     "MAX_CACHE_SIZE": 10
   },
   "MAX_ACTIVE_MODELS": 5,
   "MODEL_IDLE_THRESHOLD": 600,
   "MAX_MEMORY_USAGE": 0.8,
-  "TOP_K": 3
 }

     "HIDDEN_DIM": 768,
     "MAX_CACHE_SIZE": 10
   },
+  "MODEL_PRIORITY": {
+    "PRIMARY": "model_Custm",
+    "SECONDARY": "model_PrTr",
+    "USE_PRETRAINED_FALLBACK": true
+  },
   "MAX_ACTIVE_MODELS": 5,
   "MODEL_IDLE_THRESHOLD": 600,
   "MAX_MEMORY_USAGE": 0.8,
+  "TOP_K": 3,
+  "TOPIC_KEYWORDS": {
+    "programming": ["python", "java", "javascript", /* other keywords */],
+    "computer_science": ["algorithm", "complexity", /* other keywords */],
+    "software_engineering": ["design pattern", "architecture", /* other keywords */],
+    "web_development": ["frontend", "backend", /* other keywords */]
+  }
 }

config.py CHANGED Viewed

@@ -388,6 +388,12 @@ class AppConfig(BaseModel):
     MAX_ACTIVE_MODELS: int = Field(default=2)
     MODEL_IDLE_THRESHOLD: int = Field(default=600)
 def load_config() -> AppConfig:
     config_path = os.path.join(os.path.dirname(__file__), "config.json")
     logger.info(f"Loading config from {config_path}")
@@ -395,14 +401,31 @@ def load_config() -> AppConfig:
         with open(config_path, "r") as f:
             raw = json.load(f)
-            # helper to convert a dict into an object with attribute access
-            class AttrDict(dict):
-                __getattr__ = dict.get
-                __setattr__ = dict.__setitem__
-            # wrap TRANSFORMER_CONFIG if it's a dict
             if isinstance(raw.get("TRANSFORMER_CONFIG"), dict):
                 raw["TRANSFORMER_CONFIG"] = AttrDict(raw["TRANSFORMER_CONFIG"])
     except Exception as e:
         logger.error(f"Failed to read config.json: {e}", exc_info=True)
         raise

     MAX_ACTIVE_MODELS: int = Field(default=2)
     MODEL_IDLE_THRESHOLD: int = Field(default=600)
+class AttrDict(dict):
+    """Dictionary subclass with attribute-style access"""
+    __getattr__ = dict.get
+    __setattr__ = dict.__setitem__
+    __delattr__ = dict.__delitem__
 def load_config() -> AppConfig:
     config_path = os.path.join(os.path.dirname(__file__), "config.json")
     logger.info(f"Loading config from {config_path}")
         with open(config_path, "r") as f:
             raw = json.load(f)
+            # Always wrap TRANSFORMER_CONFIG for attribute access
             if isinstance(raw.get("TRANSFORMER_CONFIG"), dict):
                 raw["TRANSFORMER_CONFIG"] = AttrDict(raw["TRANSFORMER_CONFIG"])
+                # Ensure GPT-2 parameters
+                if not isinstance(raw["TRANSFORMER_CONFIG"].get("VOCAB_SIZE"), int) or raw["TRANSFORMER_CONFIG"]["VOCAB_SIZE"] != 50257:
+                    raw["TRANSFORMER_CONFIG"]["VOCAB_SIZE"] = 50257  # Standard GPT-2 vocab size
+                if raw["TRANSFORMER_CONFIG"].get("MODEL_NAME") != "gpt2":
+                    raw["TRANSFORMER_CONFIG"]["MODEL_NAME"] = "gpt2"
+                # Ensure OUTPUT_SIZE matches VOCAB_SIZE
+                raw["TRANSFORMER_CONFIG"]["OUTPUT_SIZE"] = raw["TRANSFORMER_CONFIG"]["VOCAB_SIZE"]
+            # Add generation parameters if missing
+            if "GENERATION_CONFIG" not in raw:
+                raw["GENERATION_CONFIG"] = {
+                    "temperature": 0.7,
+                    "top_p": 0.95,
+                    "top_k": 50,
+                    "repetition_penalty": 1.3,
+                    "no_repeat_ngram_size": 3,
+                    "do_sample": True,
+                    "penalty_alpha": 0.6
+                }
     except Exception as e:
         logger.error(f"Failed to read config.json: {e}", exc_info=True)
         raise

model_Custm.py CHANGED Viewed

@@ -81,15 +81,15 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
       - SmartHybridAttention for better context handling"""
     def __init__(
         self,
-        vocab_size=30522,  # Default BERT vocab size
         specialization="general",
         dataset_path=None,
-        model_name="Wildnerve-tlm01_Hybrid_Model",  # Primary model name
         embedding_dim=768,
         num_heads=12,
         hidden_dim=768,
         num_layers=6,
-        output_size=768,
         dropout=0.1,
         max_seq_length=512,
         pooling_mode="mean",
@@ -123,18 +123,16 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
                 self.tokenizer = registry.get(TOKENIZER)
             else:
                 try:
-                    from transformers import AutoTokenizer
-                    self.tokenizer = AutoTokenizer.from_pretrained("Wildnerve-tlm01_Hybrid_Model")
-                    logger.info("Loaded primary tokenizer: Wildnerve-tlm01_Hybrid_Model")
                 except Exception as e:
-                    logger.warning(f"Primary tokenizer load failed: {e}")
-                    try:
-                        from transformers import BertTokenizer
-                        self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
-                        logger.info("Loaded fallback tokenizer: bert-base-uncased")
-                    except Exception as e2:
-                        logger.error(f"Fallback tokenizer load failed: {e2}")
-                        self.tokenizer = None
         registry.register(TOKENIZER, self.tokenizer, overwrite=True)
         # Register this model instance in the registry by specialization
@@ -363,6 +361,7 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
         embeddings = self.encode_sentences([sentence1, sentence2])
         return np.dot(embeddings[0], embeddings[1]) / (np.linalg.norm(embeddings[0]) * np.linalg.norm(embeddings[1]))
     def generate(
         self,
         prompt=None,
@@ -373,6 +372,17 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
         **kwargs
     ) -> str:
         """Generate text using the model, supporting either prompt string or input_ids."""
         # Log what we're working with
         logger.info(f"Generate called with: prompt={type(prompt).__name__ if prompt else None}, input_ids={type(input_ids).__name__ if input_ids else None}")

       - SmartHybridAttention for better context handling"""
     def __init__(
         self,
+        vocab_size=50257,  # Updated to GPT-2 vocab size
         specialization="general",
         dataset_path=None,
+        model_name="gpt2",  # Standardized to GPT-2
         embedding_dim=768,
         num_heads=12,
         hidden_dim=768,
         num_layers=6,
+        output_size=50257,  # Updated to GPT-2 vocab size
         dropout=0.1,
         max_seq_length=512,
         pooling_mode="mean",
                 self.tokenizer = registry.get(TOKENIZER)
             else:
                 try:
+                    from transformers import GPT2Tokenizer
+                    self.tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
+                    # Ensure pad_token is set for GPT-2
+                    if self.tokenizer.pad_token_id is None:
+                        self.tokenizer.pad_token = self.tokenizer.eos_token
+                        self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
                 except Exception as e:
+                    logger.warning(f"Failed to load GPT-2 tokenizer: {e}")
+                    from utils.transformer_utils import get_tokenizer
+                    self.tokenizer = get_tokenizer()
         registry.register(TOKENIZER, self.tokenizer, overwrite=True)
         # Register this model instance in the registry by specialization
         embeddings = self.encode_sentences([sentence1, sentence2])
         return np.dot(embeddings[0], embeddings[1]) / (np.linalg.norm(embeddings[0]) * np.linalg.norm(embeddings[1]))
+    # Update generate to use adapter_layer as the primary generation point
     def generate(
         self,
         prompt=None,
         **kwargs
     ) -> str:
         """Generate text using the model, supporting either prompt string or input_ids."""
+        # Try to use adapter_layer.generate if available
+        adapter_layer = registry.get("adapter_layer")
+        if adapter_layer and hasattr(adapter_layer, "generate"):
+            if prompt:
+                return adapter_layer.generate(prompt, max_length=max_length, temperature=temperature, **kwargs)
+            elif input_ids is not None and self.tokenizer:
+                # Convert input_ids back to text to use centralized generation
+                decoded_prompt = self.tokenizer.decode(input_ids[0], skip_special_tokens=True)
+                return adapter_layer.generate(decoded_prompt, max_length=max_length, temperature=temperature, **kwargs)
+        # Fall back to direct generation if adapter_layer is not available
         # Log what we're working with
         logger.info(f"Generate called with: prompt={type(prompt).__name__ if prompt else None}, input_ids={type(input_ids).__name__ if input_ids else None}")

model_List.py CHANGED Viewed

@@ -32,22 +32,46 @@ class PromptAnalyzer:
     - SmartHybridAttention for analyzing complex or long prompts
     - Performance tracking and caching for efficiency
     """
-    def __init__(self):
         self.logger = logging.getLogger(__name__)
         # For caching and performance tracking
         self._model_cache = {}
         self._performance_metrics = {}
-        # Define topic keywords for the simple approach
-        self.predefined_topics = {
-            "programming": ["code", "function", "class", "algorithm", "programming", "python", "javascript", "java", "c++", "developer", "api"],
-            "science": ["science", "physics", "chemistry", "biology", "scientific", "experiment", "hypothesis", "theory"],
-            "mathematics": ["math", "equation", "calculus", "algebra", "geometry", "theorem", "mathematical"],
-            "history": ["history", "historical", "ancient", "century", "war", "civilization", "empire"],
-            "general": ["how", "what", "when", "where", "why", "who", "can you", "please", "thanks", "hello"]
-        }
         # Initialize model_class attribute
         self.model_class = None
@@ -67,30 +91,88 @@ class PromptAnalyzer:
         except Exception:
             pass
     def _init_advanced_tools(self):
-        """Initialize advanced analysis tools with proper error handling"""
         self.sentence_model = None
         self.gpt2_model = None
         self.gpt2_tokenizer = None
-        # Only initialize if enabled by environment variable
-        if os.environ.get("LOAD_PRETRAINED_MODELS", "0") == "1":
-            try:
-                from utils.transformer_utils import get_sentence_transformer
-                self.sentence_model = get_sentence_transformer('sentence-transformers/all-MiniLM-L6-v2')
-                self.logger.info("Loaded SentenceTransformer model successfully")
-            except Exception as e:
-                self.logger.warning(f"Failed to load SentenceTransformer: {e}")
-            try:
-                from transformers import AutoModelForCausalLM, AutoTokenizer
-                self.gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
-                self.gpt2_model = AutoModelForCausalLM.from_pretrained("gpt2")
-                self.gpt2_model.eval()
-                self.logger.info("Loaded GPT-2 model for perplexity calculation")
-            except Exception as e:
-                self.logger.warning(f"Failed to load GPT-2: {e}")
         # Initialize SmartHybridAttention
         try:
             attention_config = get_hybrid_attention_config()
@@ -253,173 +335,123 @@ class PromptAnalyzer:
             self.logger.error(f"Error in attention-based analysis: {e}")
             return None
-    def analyze_prompt(self, prompt):
-        """
-        Enhanced prompt analysis with SmartHybridAttention for complex prompts
-        """
-        # Start with simple keyword-based classification
         prompt_lower = prompt.lower()
-        topic_scores = {}
-        for topic, keywords in self.predefined_topics.items():
-            score = sum(1 for keyword in keywords if keyword in prompt_lower)
-            topic_scores[topic] = score
-        # For complex prompts, use attention-based analysis
-        is_complex = len(prompt) > 100 or prompt.count('.') > 2  # Basic heuristic for complexity
-        if is_complex and self.attention:
-            attention_scores = self._analyze_with_attention(prompt)
-            if attention_scores:
-                # Combine scores with attention-based analysis
-                for topic, score in attention_scores.items():
-                    if topic in topic_scores:
-                        # Weighted combination based on prompt complexity
-                        complexity_factor = min(0.7, len(prompt) / 1000)
-                        topic_scores[topic] = (topic_scores[topic] * (1-complexity_factor)) + (score * complexity_factor)
-        # Advanced analysis if available
-        try:
-            if self.sentence_model:
-                # Get embedding and boost scores based on embedding similarity
-                embedding = self.sentence_model.encode(prompt)
-                # We could have reference embeddings for each topic and calculate similarity
-                # For now, we'll just use the embedding magnitude as a complexity measure
-                complexity = np.linalg.norm(embedding)
-                # Adjust scores based on complexity
-                if complexity > 15:  # High complexity
-                    if topic_scores.get("programming", 0) > 0:
-                        topic_scores["programming"] *= 1.5
-                    if topic_scores.get("science", 0) > 0:
-                        topic_scores["science"] *= 1.4
-                    if topic_scores.get("mathematics", 0) > 0:
-                        topic_scores["mathematics"] *= 1.3
-            if self.gpt2_model and self.gpt2_tokenizer:
-                # Calculate perplexity for another dimension of analysis
-                try:
-                    inputs = self.gpt2_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
-                    with torch.no_grad():
-                        outputs = self.gpt2_model(**inputs, labels=inputs["input_ids"])
-                    loss = outputs.loss.item()
-                    perplexity = math.exp(loss)
-                    # Adjust scores based on perplexity
-                    if perplexity > 100:  # Very specialized/technical content
-                        if topic_scores.get("programming", 0) > 0:
-                            topic_scores["programming"] *= 1.4
-                        if topic_scores.get("science", 0) > 0:
-                            topic_scores["science"] *= 1.3
-                        if topic_scores.get("mathematics", 0) > 0:
-                            topic_scores["mathematics"] *= 1.2
-                except Exception as e:
-                    logger.warning(f"Error in perplexity calculation: {e}")
-        except Exception as e:
-            logger.warning(f"Advanced analysis failed: {e}")
-        # Find the topic with the highest score
-        if not topic_scores or max(topic_scores.values()) == 0:
-            return "general", []
-        primary_topic = max(topic_scores.items(), key=lambda x: x[1])[0]
-        # Get secondary topics (any with non-zero scores except primary)
-        secondary_topics = [t for t, s in topic_scores.items()
-                            if s > 0 and t != primary_topic]
-        return primary_topic, secondary_topics
-    def analyze(self, prompt: str) -> int:
-        """
-        Analyze prompt complexity with attention-enhanced analysis
-        """
-        # First check if we can use attention-based analysis for complex prompts
-        if self.attention and len(prompt) > 150:
             try:
-                # Get sentence embeddings
-                sentences = nltk.sent_tokenize(prompt)
-                if len(sentences) > 1:
-                    # Apply attention to understand cross-sentence relationships
-                    sentence_embeddings = [self.sentence_model.encode(s) for s in sentences]
-                    embeddings_tensor = torch.tensor(sentence_embeddings).unsqueeze(1)
-                    # Use attention to focus on important parts of the prompt
-                    attended_embeddings, _ = self.attention(
-                        query=embeddings_tensor,
-                        key=embeddings_tensor,
-                        value=embeddings_tensor
-                    )
-                    # Calculate complexity based on attention-weighted embeddings
-                    complexity = torch.norm(attended_embeddings.mean(dim=0)).item()
-                    logger.info(f"Computed attention-weighted complexity: {complexity}")
-                    # Return candidate index based on complexity
-                    if complexity < 12:
-                        return 0  # Simpler model
-                    elif complexity < 24:
-                        return 1  # Moderate model
-                    else:
-                        return 2  # Complex model
             except Exception as e:
-                logger.warning(f"Attention-based analysis failed: {e}")
-        # Use embeddings if available for complexity analysis
-        if self.sentence_model:
-            try:
-                # Get embedding and calculate complexity based on vector properties
-                embedding = self.sentence_model.encode(prompt)
-                # Calculate complexity (vector magnitude)
-                complexity = np.linalg.norm(embedding)
-                logger.info(f"Computed embedding complexity: {complexity}")
-                # Return appropriate index based on complexity
-                if complexity < 10:
-                    return 0  # Less complex, use simpler model
-                elif complexity < 20:
-                    return 1  # Moderate complexity
-                else:
-                    return 2  # High complexity, use specialized model
-            except Exception as e:
-                logger.warning(f"Embedding-based analysis failed: {e}")
-        # Use perplexity as a fallback
-        if self.gpt2_model and self.gpt2_tokenizer:
-            try:
-                inputs = self.gpt2_tokenizer(prompt, return_tensors="pt", truncation=True)
-                with torch.no_grad():
-                    outputs = self.gpt2_model(**inputs, labels=inputs["input_ids"])
-                loss = outputs.loss.item()
-                perplexity = math.exp(loss)
-                logger.info(f"Computed perplexity: {perplexity}")
-                # Example threshold-based candidate selection:
-                if perplexity < 50:
-                    return 0  # Less perplexing, use simpler model
-                elif perplexity < 100:
-                    return 1  # Moderate perplexity
-                else:
-                    return 2  # High perplexity, use specialized model
-            except Exception as e:
-                logger.warning(f"Perplexity calculation failed: {e}")
-        # Fallback to simple keyword-based analysis
-        primary_topic, secondary_topics = self.analyze_prompt(prompt)
-        # Map topics to model indices
-        topic_to_index = {
-            "general": 0,
-            "history": 0,
-            "programming": 1,
-            "science": 1,
-            "mathematics": 2
-        }
-        # Return appropriate index or 0 if topic not in mapping
-        return topic_to_index.get(primary_topic, 0)
     def choose_model(self, prompt: str = None) -> Type:
         """Enhanced model selection that combines config and analysis"""

     - SmartHybridAttention for analyzing complex or long prompts
     - Performance tracking and caching for efficiency
     """
+    def __init__(self, model_name=None, dataset_path=None, specialization=None, hidden_dim=None):
         self.logger = logging.getLogger(__name__)
+        # Load config
+        self.config = load_config(config_file="config.json")
+        # Use provided values or config values
+        self.model_name = model_name or self.config.PROMPT_ANALYZER_CONFIG.MODEL_NAME
+        self.dataset_path = dataset_path or self.config.PROMPT_ANALYZER_CONFIG.DATASET_PATH
+        self.specialization = specialization or self.config.PROMPT_ANALYZER_CONFIG.SPECIALIZATION
+        self.hidden_dim = hidden_dim or self.config.PROMPT_ANALYZER_CONFIG.HIDDEN_DIM
+        self.logger.info(f"Initialized PromptAnalyzer with {self.model_name}")
+        self._model_cache: Dict[str, Type] = {}
+        self._performance_metrics: Dict[str, Dict[str, float]] = {}
+        # Load predefined topics from config or fall back to defaults
+        self._load_predefined_topics()
+        # Always use a proper SentenceTransformer model - fix this to avoid warnings
+        if hasattr(self, 'sentence_model'):
+            del self.sentence_model  # Remove any existing instance
+        # Use a proper SentenceTransformer model
+        self.sentence_model = get_sentence_transformer('sentence-transformers/all-MiniLM-L6-v2')
+        self.logger.info(f"Using SentenceTransformer model: sentence-transformers/all-MiniLM-L6-v2")
+        # Use GPT-2 for perplexity calculation
+        self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
+        self.model = AutoModelForCausalLM.from_pretrained("gpt2")
+        self.model.eval()
+        logger.info(f"Initialized PromptAnalyzer with {self.model_name}, specialization: {self.specialization}, hidden_dim: {self.hidden_dim}")
+        if self.dataset_path:
+            logger.info(f"Using dataset from: {self.dataset_path}")
         # For caching and performance tracking
         self._model_cache = {}
         self._performance_metrics = {}
         # Initialize model_class attribute
         self.model_class = None
         except Exception:
             pass
+    def _load_predefined_topics(self):
+        """Load topic keywords from config file or use defaults with caching"""
+        # Try to load from config first
+        try:
+            if hasattr(app_config, 'TOPIC_KEYWORDS') and app_config.TOPIC_KEYWORDS:
+                logger.info("Loading topic keywords from config")
+                self.predefined_topics = app_config.TOPIC_KEYWORDS
+                return
+            # Try loading from a JSON file in the data directory
+            topic_file = os.path.join(app_config.DATA_DIR, "topic_keywords.json")
+            if os.path.exists(topic_file):
+                with open(topic_file, 'r') as f:
+                    self.predefined_topics = json.load(f)
+                    logger.info(f"Loaded {len(self.predefined_topics)} topic categories from {topic_file}")
+                    return
+        except Exception as e:
+            logger.warning(f"Error loading topic keywords: {e}, using defaults")
+        # Fall back to default hardcoded topics
+        logger.info("Using default hardcoded topic keywords")
+        self.predefined_topics = {
+            "programming": [
+                "python", "java", "javascript", "typescript", "rust", "go", "golang",
+                # ...existing keywords...
+            ],
+            "computer_science": [
+                # ...existing keywords...
+            ],
+            "software_engineering": [
+                # ...existing keywords...
+            ],
+            "web_development": [
+                # ...existing keywords...
+            ]
+        }
+        # Cache the topics to a file for future use
+        try:
+            os.makedirs(app_config.DATA_DIR, exist_ok=True)
+            with open(os.path.join(app_config.DATA_DIR, "topic_keywords.json"), 'w') as f:
+                json.dump(self.predefined_topics, f, indent=2)
+        except Exception as e:
+            logger.debug(f"Could not cache topic keywords: {e}")
     def _init_advanced_tools(self):
+        """Initialize advanced analysis tools with proper error handling and fallbacks"""
         self.sentence_model = None
         self.gpt2_model = None
         self.gpt2_tokenizer = None
+        # For embedding model, implement multiple fallbacks
+        MAX_RETRIES = 3
+        embedding_models = [
+            'sentence-transformers/all-MiniLM-L6-v2',  # Primary choice
+            'sentence-transformers/paraphrase-MiniLM-L3-v2',  # Smaller fallback
+            'sentence-transformers/distilbert-base-nli-mean-tokens'  # Last resort
+        ]
+        for retry in range(MAX_RETRIES):
+            for model_name in embedding_models:
+                try:
+                    from utils.transformer_utils import get_sentence_transformer
+                    self.sentence_model = get_sentence_transformer(model_name)
+                    self.logger.info(f"Successfully loaded SentenceTransformer: {model_name}")
+                    break
+                except Exception as e:
+                    self.logger.warning(f"Failed to load embedding model {model_name}: {e}")
+            if self.sentence_model:
+                break
+            # Wait before retry
+            time.sleep(2)
+        # Create keyword-based fallback if embedding loading completely fails
+        if not self.sentence_model:
+            self.logger.warning("All embedding models failed to load - using keyword fallback")
+            self._use_keyword_fallback = True
+        else:
+            self._use_keyword_fallback = False
         # Initialize SmartHybridAttention
         try:
             attention_config = get_hybrid_attention_config()
             self.logger.error(f"Error in attention-based analysis: {e}")
             return None
+    def _analyze_with_keywords(self, prompt: str) -> Tuple[str, float]:
+        """Analyze prompt using only keywords when embeddings are unavailable"""
         prompt_lower = prompt.lower()
+        technical_matches = 0
+        total_words = len(prompt_lower.split())
+        # Count matches across all technical categories
+        for category, keywords in self.predefined_topics.items():
+            for keyword in keywords:
+                if keyword in prompt_lower:
+                    technical_matches += 1
+        # Simple ratio calculation
+        match_ratio = technical_matches / max(1, min(15, total_words))
+        if match_ratio > 0.1:  # Even a single match in a short query is significant
+            return "model_Custm", match_ratio
+        else:
+            return "model_PrTr", 0.7
+    def analyze_prompt(self, prompt: str) -> Tuple[str, float]:
+        """Analyze if a prompt is technical or general and return the appropriate model type and confidence score."""
+        # Check if we need to use keyword fallback due to embedding failure
+        if hasattr(self, '_use_keyword_fallback') and self._use_keyword_fallback:
+            return self._analyze_with_keywords(prompt)
+        # Convert prompt to lowercase for case-insensitive matching
+        prompt_lower = prompt.lower()
+        # Check for technical keywords from predefined topics - use memory-efficient approach
+        technical_matches = 0
+        word_count = len(prompt_lower.split())
+        # Use a set-based intersection approach for better performance on longer texts
+        prompt_words = set(prompt_lower.split())
+        # Count keyword matches across all technical categories more efficiently
+        for category, keywords in self.predefined_topics.items():
+            # Convert keywords to set for O(1) lookups - helps with longer texts
+            keywords_set = set(keywords)
+            matches = prompt_words.intersection(keywords_set)
+            technical_matches += len(matches)
+            # Also check for multi-word keywords not caught by simple splitting
+            for keyword in keywords:
+                if " " in keyword and keyword in prompt_lower:
+                    technical_matches += 1
+        # Calculate keyword match ratio (normalized by word count)
+        keyword_ratio = technical_matches / max(1, min(20, word_count))
+        # Get attention-based analysis for complex prompts
+        attention_scores = None
+        if len(prompt) > 100 and self.attention:  # Only use attention for longer prompts
             try:
+                attention_scores = self._analyze_with_attention(prompt)
             except Exception as e:
+                self.logger.warning(f"Error in attention analysis: {e}")
+        # Use embedding similarity for semantic understanding
+        try:
+            # Get embedding of the prompt
+            prompt_embedding = self.sentence_model.encode(prompt)
+            # Example technical and general reference texts
+            technical_reference = "Write code to solve a programming problem using algorithms and data structures."
+            general_reference = "Tell me about daily life topics like weather, food, or general conversation."
+            # Get embeddings for reference texts
+            technical_embedding = self.sentence_model.encode(technical_reference)
+            general_embedding = self.sentence_model.encode(general_reference)
+            # Calculate cosine similarities
+            technical_similarity = cosine_similarity([prompt_embedding], [technical_embedding])[0][0]
+            general_similarity = cosine_similarity([prompt_embedding], [general_embedding])[0][0]
+            # Calculate technical score combining all signals:
+            # 1. Keyword matching (30%)
+            # 2. Semantic similarity (40%)
+            # 3. Attention analysis if available (30%)
+            technical_score = 0.3 * keyword_ratio + 0.4 * technical_similarity
+            # Add attention score contribution if available
+            if attention_scores:
+                # Calculate tech score from attention - sum of programming/computer_science categories
+                tech_attention_score = (
+                    attention_scores.get("programming", 0) +
+                    attention_scores.get("computer_science", 0) +
+                    attention_scores.get("software_engineering", 0) +
+                    attention_scores.get("web_development", 0)
+                ) / 4.0  # Normalize
+                technical_score += 0.3 * tech_attention_score
+            # Decide based on combined score
+            if technical_score > 0.3:  # Threshold - tune this as needed
+                return "model_Custm", technical_score
+            else:
+                return "model_PrTr", 1.0 - technical_score
+        except Exception as e:
+            self.logger.error(f"Error in prompt analysis: {e}")
+            # Fallback to simple keyword matching
+            if technical_matches > 0:
+                return "model_Custm", 0.7
+            else:
+                return "model_PrTr", 0.7
+    def analyze(self, prompt: str) -> int:
+        """Legacy compatibility method that returns a candidate index."""
+        model_type, confidence = self.analyze_prompt(prompt)
+        # Map model_type to candidate index
+        if model_type == "model_Custm":
+            return 0  # Index 0 corresponds to model_Custm
+        else:
+            return 1  # Index 1 corresponds to model_PrTr
     def choose_model(self, prompt: str = None) -> Type:
         """Enhanced model selection that combines config and analysis"""

model_PrTr.py CHANGED Viewed

@@ -58,15 +58,15 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
     The model uses the GPT-2 tokenizer for consistent tokenization."""
     def __init__(
         self,
-        vocab_size: int = 50257,  # Updated to GPT-2 vocab size
         specialization: str = "general",
         dataset_path: str = None,
-        model_name: str = "gpt2",  # Changed from bert-base-uncased to gpt2
         embedding_dim: int = 768,
         num_heads: int = 12,
         hidden_dim: int = 768,
         num_layers: int = 6,
-        output_size: int = 50257,  # Match GPT-2 vocab size
         dropout: float = 0.1,
         max_seq_length: int = 1024,  # GPT-2 supports longer contexts
         pooling_mode: str = "last",  # GPT-2 typically uses last token
@@ -99,15 +99,18 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
             # Initialize the model and tokenizer
             self.gpt2_model = GPT2LMHeadModel.from_pretrained(model_name)
-            # Use tokenizer from params, registry, or create new GPT-2 tokenizer
             if tokenizer is not None:
                 self.tokenizer = tokenizer
             elif registry.has(TOKENIZER):
                 self.tokenizer = registry.get(TOKENIZER)
             else:
                 self.tokenizer = GPT2Tokenizer.from_pretrained(model_name)
-                if self.tokenizer.pad_token_id is None:
-                    self.tokenizer.pad_token = self.tokenizer.eos_token
             logger.info(f"Successfully loaded GPT-2 model: {model_name}")
@@ -135,36 +138,54 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
         return outputs.logits
     # Update generate to handle both direct prompt and tokenized input
-    def generate(self, prompt=None, input_ids=None, **kwargs):
         """Generate text using the GPT-2 model"""
         try:
             # Handle either string prompt or direct input_ids
             if isinstance(prompt, str) and input_ids is None:
                 inputs = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
                 input_ids = inputs.input_ids
             elif input_ids is None:
                 raise ValueError("Either prompt or input_ids must be provided")
-            # Set default parameters if not provided
-            generation_kwargs = {
-                "max_length": kwargs.get("max_length", min(self.max_length, 1024)),
-                "max_new_tokens": kwargs.get("max_new_tokens", 512),  # Added max_new_tokens
-                "temperature": kwargs.get("temperature", 0.7),
-                "top_p": kwargs.get("top_p", 0.9),
-                "top_k": kwargs.get("top_k", 50),
-                "repetition_penalty": kwargs.get("repetition_penalty", 1.0),
-                "do_sample": kwargs.get("do_sample", True),
-                "num_return_sequences": kwargs.get("num_return_sequences", 1),
-                "pad_token_id": self.tokenizer.pad_token_id
-            }
             # Use max_new_tokens instead of max_length if input is longer than max_length-50
-            if input_ids.shape[1] > (generation_kwargs["max_length"] - 50):
                 logger.info(f"Input length {input_ids.shape[1]} is close to max_length, using max_new_tokens instead")
-                del generation_kwargs["max_length"]
             # Generate output using the full GPT-2 model
-            output_ids = self.gpt2_model.generate(input_ids, **generation_kwargs)
             # Decode the output and ensure it's a string, not a tensor
             if torch.is_tensor(output_ids):
@@ -358,7 +379,7 @@ class Wildnerve_tlm01:
     """
     def __init__(
         self,
-        model_name="distilbert-base-uncased",
         tokenizer=None,
         device=None,
         **kwargs
@@ -408,4 +429,62 @@ try:
     registry.register(PRETRAINED_MODEL, model, overwrite=True)
     logger.info("Registered pretrained model in service registry")
 except Exception as e:
-    logger.error(f"Failed to register pretrained model: {e}")

     The model uses the GPT-2 tokenizer for consistent tokenization."""
     def __init__(
         self,
+        vocab_size: int = 50257,  # Standardized GPT-2 vocab size
         specialization: str = "general",
         dataset_path: str = None,
+        model_name: str = "gpt2",  # Standardized to GPT-2
         embedding_dim: int = 768,
         num_heads: int = 12,
         hidden_dim: int = 768,
         num_layers: int = 6,
+        output_size: int = 50257,  # Standardized GPT-2 vocab size
         dropout: float = 0.1,
         max_seq_length: int = 1024,  # GPT-2 supports longer contexts
         pooling_mode: str = "last",  # GPT-2 typically uses last token
             # Initialize the model and tokenizer
             self.gpt2_model = GPT2LMHeadModel.from_pretrained(model_name)
+            # Ensure proper tokenizer setup for GPT-2
             if tokenizer is not None:
                 self.tokenizer = tokenizer
             elif registry.has(TOKENIZER):
                 self.tokenizer = registry.get(TOKENIZER)
             else:
                 self.tokenizer = GPT2Tokenizer.from_pretrained(model_name)
+            # Ensure GPT-2 tokenizer has pad_token set (critical fix)
+            if self.tokenizer.pad_token_id is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+                self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
             logger.info(f"Successfully loaded GPT-2 model: {model_name}")
         return outputs.logits
     # Update generate to handle both direct prompt and tokenized input
+    def generate(self, prompt=None, input_ids=None, max_length=None, **kwargs):
         """Generate text using the GPT-2 model"""
         try:
+            # Try to use adapter_layer.generate if available (consolidate generation paths)
+            adapter_layer = registry.get("adapter_layer")
+            if adapter_layer and hasattr(adapter_layer, "generate"):
+                if prompt:
+                    return adapter_layer.generate(prompt, max_length=max_length, **kwargs)
+                elif input_ids is not None and self.tokenizer:
+                    # Convert input_ids back to text
+                    prompt = self.tokenizer.decode(input_ids[0], skip_special_tokens=True)
+                    return adapter_layer.generate(prompt, max_length=max_length, **kwargs)
+            # Continue with direct generation if adapter_layer not available
+            # Enhanced generation parameters
+            generation_config = {
+                "max_length": max_length or 150,
+                "temperature": kwargs.get('temperature', 0.7),
+                "top_p": kwargs.get('top_p', 0.95),
+                "top_k": kwargs.get('top_k', 50),
+                "repetition_penalty": kwargs.get('repetition_penalty', 1.3),
+                "no_repeat_ngram_size": kwargs.get('no_repeat_ngram_size', 3),
+                "do_sample": True,
+                "pad_token_id": self.tokenizer.pad_token_id,
+                "eos_token_id": self.tokenizer.eos_token_id,
+                "early_stopping": True,
+                "penalty_alpha": 0.6  # Add penalty alpha for better response quality
+            }
             # Handle either string prompt or direct input_ids
             if isinstance(prompt, str) and input_ids is None:
                 inputs = self.tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
                 input_ids = inputs.input_ids
             elif input_ids is None:
                 raise ValueError("Either prompt or input_ids must be provided")
+            # Add user-provided kwargs that we didn't explicitly set
+            for k, v in kwargs.items():
+                if k not in generation_config and k not in ('prompt', 'context'):
+                    generation_config[k] = v
             # Use max_new_tokens instead of max_length if input is longer than max_length-50
+            if input_ids.shape[1] > (generation_config["max_length"] - 50):
                 logger.info(f"Input length {input_ids.shape[1]} is close to max_length, using max_new_tokens instead")
+                del generation_config["max_length"]
             # Generate output using the full GPT-2 model
+            output_ids = self.gpt2_model.generate(input_ids, **generation_config)
             # Decode the output and ensure it's a string, not a tensor
             if torch.is_tensor(output_ids):
     """
     def __init__(
         self,
+        model_name="gpt2",
         tokenizer=None,
         device=None,
         **kwargs
     registry.register(PRETRAINED_MODEL, model, overwrite=True)
     logger.info("Registered pretrained model in service registry")
 except Exception as e:
+    logger.error(f"Failed to register pretrained model: {e}")
+def initialize_system():
+    """Initialize all components in the correct order"""
+    logger.info("Starting system initialization")
+    # First tokenizer - Use GPT-2 tokenizer instead of BERT
+    try:
+        from transformers import GPT2Tokenizer
+        tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
+        # GPT-2 tokenizer doesn't have a pad_token by default, so we set it
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+    except Exception as e:
+        logger.warning(f"Could not load GPT-2 tokenizer, falling back to wrapper: {e}")
+        from tokenizer import TokenizerWrapper
+        tokenizer = TokenizerWrapper(model_name="gpt2")
+    # Then register tokenizer
+    from service_registry import registry, TOKENIZER, PRETRAINED_MODEL
+    registry.register(TOKENIZER, tokenizer, overwrite=True)
+    logger.info("Tokenizer registered")
+    # Initialize pretrained model (GPT-2)
+    try:
+        from model_PrTr import Wildnerve_tlm01 as PretrainedModel
+        pretrained = PretrainedModel(model_name="gpt2", tokenizer=tokenizer)
+        registry.register(PRETRAINED_MODEL, pretrained, overwrite=True)
+        logger.info("GPT-2 pretrained model registered")
+    except Exception as e:
+        logger.error(f"Failed to initialize GPT-2 model: {e}", exc_info=True)
+    # Now load custom model
+    try:
+        from model_Custm import Wildnerve_tlm01
+        model = Wildnerve_tlm01(
+            vocab_size=50257,  # Match GPT-2 vocab size
+            specialization="general",
+            dataset_path=None,
+            model_name="gpt2",  # Use GPT-2 compatibility
+            embedding_dim=768,
+            num_heads=12,
+            hidden_dim=768,
+            num_layers=2,
+            output_size=50257,  # Match GPT-2 vocab
+            dropout=0.1,
+            max_seq_length=128,
+            pooling_mode="mean",
+            tokenizer=tokenizer
+        )
+        # Register model
+        from service_registry import MODEL
+        registry.register(MODEL, model, overwrite=True)
+        logger.info("Custom model registered successfully")
+        return True
+    except Exception as e:
+        logger.error(f"Failed to initialize custom model: {e}", exc_info=True)
+        return False

service_registry.py CHANGED Viewed

@@ -137,18 +137,18 @@ def ensure_models_registered():
                     tok = registry.get(TOKENIZER)
                     if not tok:
                         from tokenizer import TokenizerWrapper
-                        tok = TokenizerWrapper(model_name="gpt2")
                         registry.register(TOKENIZER, tok, overwrite=True)
                     # Create pretrained model
                     model = model_class(
-                        model_name="gpt2",
                         tokenizer=tok
                     )
                     # Register as pretrained model
                     registry.register(PRETRAINED_MODEL, model, overwrite=True)
-                    logger.info("Successfully registered pretrained model")
                     return True
             logger.error(f"model_PrTr.py not found at {model_path}")

                     tok = registry.get(TOKENIZER)
                     if not tok:
                         from tokenizer import TokenizerWrapper
+                        tok = TokenizerWrapper(model_name="gpt2") # Changed from bert-base-uncased
                         registry.register(TOKENIZER, tok, overwrite=True)
                     # Create pretrained model
                     model = model_class(
+                        model_name="gpt2", # Explicitly use gpt2
                         tokenizer=tok
                     )
                     # Register as pretrained model
                     registry.register(PRETRAINED_MODEL, model, overwrite=True)
+                    logger.info("Successfully registered GPT-2 pretrained model")
                     return True
             logger.error(f"model_PrTr.py not found at {model_path}")