import os import sys import json import logging import pydantic # required import importlib.util # required from typing import Dict, Any, Optional, List, Tuple from service_registry import registry, MODEL, PRETRAINED_MODEL, TOKENIZER # Force low memory usage mode os.environ["LOW_MEMORY_MODE"] = "1" # Log versions and fail fast if missing logger = logging.getLogger(__name__) logger.info(f"Using pydantic v{pydantic.__version__}") # Add proper codecarbon import handling try: import codecarbon codecarbon_available = True logger.info(f"Using codecarbon v{codecarbon.__version__}") except ImportError: codecarbon_available = False logger.warning("codecarbon is not available - carbon tracking disabled") # Create dummy class for compatibility class DummyEmissionsTracker: def __init__(self, *args, **kwargs): pass def start(self): return self def stop(self): return 0.0 class codecarbon: __version__ = "unavailable" EmissionsTracker = DummyEmissionsTracker print(f"Successfully using installed dependencies - pydantic: {pydantic.__version__}, codecarbon: {'available' if codecarbon_available else 'unavailable'}") # MEMORY OPTIMIZATION: Show current memory usage def log_memory_usage(): try: import psutil process = psutil.Process(os.getpid()) memory_info = process.memory_info() memory_mb = memory_info.rss / 1024 / 1024 logger.info(f"Current memory usage: {memory_mb:.2f} MB") return memory_mb except: return 0 # Import dependency helpers def is_module_available(module_name): try: importlib.util.find_spec(module_name) return True except ImportError: return False # More robust import for PromptAnalyzer try: from model_List import PromptAnalyzer logger.info("Successfully imported PromptAnalyzer") except ImportError as e: logger.error(f"Error importing PromptAnalyzer: {e}") # Create a minimal PromptAnalyzer class class PromptAnalyzer: def __init__(self, **kwargs): self.logger = logging.getLogger(__name__) self.predefined_topics = { "programming": ["python", "java", "code"], "general": ["weather", "hello", "chat"] } def analyze_prompt(self, prompt: str): # Simple keyword-based routing prompt_lower = prompt.lower() for tech_word in self.predefined_topics.get("programming", []): if tech_word in prompt_lower: return "model_Custm", 0.8 return "model_PrTr", 0.6 # MEMORY OPTIMIZATION: Create basic PromptAnalyzer without loading models class BasicPromptAnalyzer: def __init__(self, **kwargs): self.logger = logging.getLogger(__name__) self.predefined_topics = { "programming": ["python", "java", "code"], "general": ["weather", "hello", "chat"] } def analyze_prompt(self, prompt: str): # Simple keyword-based routing prompt_lower = prompt.lower() for tech_word in self.predefined_topics.get("programming", []): if tech_word in prompt_lower: return "model_Custm", 0.8 return "model_PrTr", 0.6 class WildnerveModelAdapter: """Ultra-lightweight adapter layer for HF inference endpoints.""" def __init__(self, model_path: str): self.model_path = model_path self.tokenizer = None self.model = None self.model_loaded = False logger.info(f"Creating adapter with path: {model_path}") # Safe verification of model file existence self._verify_model_files() def _verify_model_files(self): """Verify model files exist without loading them""" script_dir = os.path.dirname(os.path.abspath(__file__)) model_files = ["model_Custm.py", "model_PrTr.py"] self.available_models = {} for filename in model_files: filepath = os.path.join(script_dir, filename) if os.path.exists(filepath): module_name = filename.replace('.py', '') self.available_models[module_name] = filepath logger.info(f"Found model file: {filename}") if not self.available_models: logger.warning("No model files found - will use stub implementation") # Create stub file if needed stub_path = os.path.join(script_dir, "model_stub.py") if not os.path.exists(stub_path): try: with open(stub_path, "w") as f: f.write(""" # Minimal stub model import torch.nn as nn class Wildnerve_tlm01(nn.Module): def __init__(self, **kwargs): super().__init__() self.is_stub = True for key, value in kwargs.items(): setattr(self, key, value) def generate(self, prompt=None, **kwargs): return f"Stub model response for: {prompt[:30]}..." """) logger.info("Created stub model file") except Exception as e: logger.error(f"Failed to create stub model: {e}") def generate(self, text_input, max_length=None, **kwargs): """Generate text - with lazy model loading""" try: # 1. Load model if not already loaded if not self.model_loaded: logger.info("Loading model for first request") self._lazy_load_model() # 2. Let the model handle inference directly with NO pattern matching or rules if self.model: try: logger.info(f"Sending prompt directly to neural model: {type(self.model).__name__}") model_response = self.model.generate( prompt=text_input, max_length=max_length, **kwargs ) # Log response for debugging but don't intercept or alter it logger.info(f"Model generated response of length {len(model_response) if isinstance(model_response, str) else 'unknown'}") # Return the raw model response - let the model shine (or fail naturally) return model_response except Exception as e: # Only log the error but don't substitute with rule-based responses logger.error(f"Neural model inference error: {e}") # Continue to basic fallback only if the model completely failed else: logger.warning("No model available - only basic response possible") # 3. Minimal fallback ONLY if model couldn't be loaded or threw exception if self.tokenizer: return f"The model couldn't be properly initialized. Your input: '{text_input[:30]}...'" return f"No language model available to process: '{text_input[:30]}...'" except Exception as e: logger.error(f"Critical error in generate method: {e}") return f"An error occurred processing your request: {str(e)}" def _lazy_load_model(self): """Try to load a model on demand, with multiple fallback options""" try: logger.info("Attempting to load model on first request") # First initialize tokenizer if not already done self._initialize_minimal_tokenizer() # Download and load model weights first with better logging try: from load_model_weights import download_model_files, load_weights_into_model, verify_token # First verify token is available token_verified = verify_token() logger.info(f"HF Token verification: {token_verified}") # Get weights from HF repository with more robust error reporting logger.info("Downloading model weights...") try: # Try multiple repositories in priority order repositories = [ "EvolphTech/Weights", "Wildnerve/tlm-0.05Bx12", "Wildnerve/tlm", "EvolphTech/Checkpoints" ] weight_files = None for repo in repositories: logger.info(f"Attempting to download weights from {repo}...") try: weight_files = download_model_files(repo_id_base=repo) if weight_files and "transformer" in weight_files: logger.info(f"Successfully downloaded weights from {repo}") break except Exception as repo_error: logger.warning(f"Failed to download from {repo}: {repo_error}") # Add detailed logging about weight files if weight_files: logger.info(f"Download returned {len(weight_files)} weight files: {list(weight_files.keys())}") else: logger.warning("No weight files were returned from download_model_files") except Exception as e: logger.error(f"Error downloading weights: {str(e)}") weight_files = {} except ImportError: logger.error("Could not import load_model_weights - missing dependencies?") weight_files = {} # Rest of model loading code (unchanged) # Try to load model_Custm first if "model_Custm" in self.available_models: try: logger.info("Trying to load model_Custm") model_custm_spec = importlib.util.spec_from_file_location( "model_Custm", self.available_models["model_Custm"] ) model_custm = importlib.util.module_from_spec(model_custm_spec) model_custm_spec.loader.exec_module(model_custm) if hasattr(model_custm, "Wildnerve_tlm01"): logger.info("Creating Wildnerve_tlm01 from model_Custm") model_class = getattr(model_custm, "Wildnerve_tlm01") # Create model with safer config handling try: # Import config handling from config import app_config # Ensure config_data exists if app_config is a dict if isinstance(app_config, dict) and "TRANSFORMER_CONFIG" in app_config: if isinstance(app_config["TRANSFORMER_CONFIG"], dict) and "config_data" not in app_config["TRANSFORMER_CONFIG"]: app_config["TRANSFORMER_CONFIG"]["config_data"] = app_config["TRANSFORMER_CONFIG"] logger.info("Added config_data attribute to TRANSFORMER_CONFIG dictionary") except Exception as config_error: logger.warning(f"Config handling error: {config_error}") self.model = model_class( tokenizer=self.tokenizer, vocab_size=50257, # GPT-2 vocab size specialization="general", embedding_dim=768, num_heads=12, hidden_dim=768, num_layers=2, # Reduced for memory efficiency output_size=50257, # Match GPT-2 vocab dropout=0.1, max_seq_length=128 # Reduced for memory ) # Enhanced weight loading with detailed path information if "transformer" in weight_files and weight_files["transformer"]: weight_path = weight_files["transformer"] logger.info(f"Loading weights from {weight_path}") logger.info(f"Weight file exists: {os.path.exists(weight_path)}") logger.info(f"Weight file size: {os.path.getsize(weight_path) / 1024 / 1024:.2f} MB") success = load_weights_into_model(self.model, weight_path, strict=False) if success: logger.info("✅ Successfully loaded transformer weights") else: logger.warning("❌ Failed to load transformer weights") else: logger.warning("❌ No transformer weights found in weight_files") logger.info("Successfully created custom model") self.model_loaded = True return except Exception as e: logger.error(f"Failed to load model_Custm: {e}") # Try model_PrTr next if "model_PrTr" in self.available_models: try: logger.info("Trying to load model_PrTr") model_prtr_spec = importlib.util.spec_from_file_location( "model_PrTr", self.available_models["model_PrTr"] ) model_prtr = importlib.util.module_from_spec(model_prtr_spec) model_prtr_spec.loader.exec_module(model_prtr) if hasattr(model_prtr, "Wildnerve_tlm01"): logger.info("Creating Wildnerve_tlm01 from model_PrTr") model_class = getattr(model_prtr, "Wildnerve_tlm01") self.model = model_class( tokenizer=self.tokenizer, model_name="gpt2" ) logger.info("Successfully created pretrained model") self.model_loaded = True return except Exception as e: logger.error(f"Failed to load model_PrTr: {e}") # Try stub model as last resort try: logger.info("Trying to load model_stub") script_dir = os.path.dirname(os.path.abspath(__file__)) stub_path = os.path.join(script_dir, "model_stub.py") if os.path.exists(stub_path): stub_spec = importlib.util.spec_from_file_location("model_stub", stub_path) model_stub = importlib.util.module_from_spec(stub_spec) stub_spec.loader.exec_module(model_stub) if hasattr(model_stub, "Wildnerve_tlm01"): logger.info("Creating stub model") model_class = getattr(model_stub, "Wildnerve_tlm01") self.model = model_class( tokenizer=self.tokenizer, specialization="stub" ) logger.warning("Using STUB model - limited functionality") self.model_loaded = True return except Exception as e: logger.error(f"Failed to load stub model: {e}") logger.error("All model loading attempts failed") except Exception as e: logger.error(f"Error in _lazy_load_model: {e}") finally: # Always mark as loaded to avoid repeated attempts self.model_loaded = True def _initialize_minimal_tokenizer(self): """Initialize just the tokenizer, not the model""" try: from transformers import AutoTokenizer self.tokenizer = AutoTokenizer.from_pretrained("gpt2", use_fast=True) # Fix for GPT-2 tokenizer: set pad_token to eos_token if not self.tokenizer.pad_token: self.tokenizer.pad_token = self.tokenizer.eos_token logger.info("Set GPT-2 pad_token to eos_token") logger.info("Initialized minimal tokenizer") except Exception as e: logger.error(f"Failed to initialize tokenizer: {e}") # Add import for inspect at the top import inspect