WildnerveAI's picture
Upload 2 files
ff8d4e7 verified
import os
import sys
import json
import logging
import pydantic # required
import importlib.util # required
from typing import Dict, Any, Optional, List, Tuple
from service_registry import registry, MODEL, PRETRAINED_MODEL, TOKENIZER
# Force low memory usage mode
os.environ["LOW_MEMORY_MODE"] = "1"
# Log versions and fail fast if missing
logger = logging.getLogger(__name__)
logger.info(f"Using pydantic v{pydantic.__version__}")
# Add proper codecarbon import handling
try:
import codecarbon
codecarbon_available = True
logger.info(f"Using codecarbon v{codecarbon.__version__}")
except ImportError:
codecarbon_available = False
logger.warning("codecarbon is not available - carbon tracking disabled")
# Create dummy class for compatibility
class DummyEmissionsTracker:
def __init__(self, *args, **kwargs): pass
def start(self): return self
def stop(self): return 0.0
class codecarbon:
__version__ = "unavailable"
EmissionsTracker = DummyEmissionsTracker
print(f"Successfully using installed dependencies - pydantic: {pydantic.__version__}, codecarbon: {'available' if codecarbon_available else 'unavailable'}")
# MEMORY OPTIMIZATION: Show current memory usage
def log_memory_usage():
try:
import psutil
process = psutil.Process(os.getpid())
memory_info = process.memory_info()
memory_mb = memory_info.rss / 1024 / 1024
logger.info(f"Current memory usage: {memory_mb:.2f} MB")
return memory_mb
except:
return 0
# Import dependency helpers
def is_module_available(module_name):
try:
importlib.util.find_spec(module_name)
return True
except ImportError:
return False
# More robust import for PromptAnalyzer
try:
from model_List import PromptAnalyzer
logger.info("Successfully imported PromptAnalyzer")
except ImportError as e:
logger.error(f"Error importing PromptAnalyzer: {e}")
# Create a minimal PromptAnalyzer class
class PromptAnalyzer:
def __init__(self, **kwargs):
self.logger = logging.getLogger(__name__)
self.predefined_topics = {
"programming": ["python", "java", "code"],
"general": ["weather", "hello", "chat"]
}
def analyze_prompt(self, prompt: str):
# Simple keyword-based routing
prompt_lower = prompt.lower()
for tech_word in self.predefined_topics.get("programming", []):
if tech_word in prompt_lower:
return "model_Custm", 0.8
return "model_PrTr", 0.6
# MEMORY OPTIMIZATION: Create basic PromptAnalyzer without loading models
class BasicPromptAnalyzer:
def __init__(self, **kwargs):
self.logger = logging.getLogger(__name__)
self.predefined_topics = {
"programming": ["python", "java", "code"],
"general": ["weather", "hello", "chat"]
}
def analyze_prompt(self, prompt: str):
# Simple keyword-based routing
prompt_lower = prompt.lower()
for tech_word in self.predefined_topics.get("programming", []):
if tech_word in prompt_lower:
return "model_Custm", 0.8
return "model_PrTr", 0.6
class WildnerveModelAdapter:
"""Ultra-lightweight adapter layer for HF inference endpoints."""
def __init__(self, model_path: str):
self.model_path = model_path
self.tokenizer = None
self.model = None
self.model_loaded = False
logger.info(f"Creating adapter with path: {model_path}")
# Safe verification of model file existence
self._verify_model_files()
def _verify_model_files(self):
"""Verify model files exist without loading them"""
script_dir = os.path.dirname(os.path.abspath(__file__))
model_files = ["model_Custm.py", "model_PrTr.py"]
self.available_models = {}
for filename in model_files:
filepath = os.path.join(script_dir, filename)
if os.path.exists(filepath):
module_name = filename.replace('.py', '')
self.available_models[module_name] = filepath
logger.info(f"Found model file: {filename}")
if not self.available_models:
logger.warning("No model files found - will use stub implementation")
# Create stub file if needed
stub_path = os.path.join(script_dir, "model_stub.py")
if not os.path.exists(stub_path):
try:
with open(stub_path, "w") as f:
f.write("""
# Minimal stub model
import torch.nn as nn
class Wildnerve_tlm01(nn.Module):
def __init__(self, **kwargs):
super().__init__()
self.is_stub = True
for key, value in kwargs.items():
setattr(self, key, value)
def generate(self, prompt=None, **kwargs):
return f"Stub model response for: {prompt[:30]}..."
""")
logger.info("Created stub model file")
except Exception as e:
logger.error(f"Failed to create stub model: {e}")
def generate(self, text_input, max_length=None, **kwargs):
"""Generate text - with lazy model loading"""
try:
# 1. Load model if not already loaded
if not self.model_loaded:
logger.info("Loading model for first request")
self._lazy_load_model()
# 2. Let the model handle inference directly with NO pattern matching or rules
if self.model:
try:
logger.info(f"Sending prompt directly to neural model: {type(self.model).__name__}")
model_response = self.model.generate(
prompt=text_input,
max_length=max_length,
**kwargs
)
# Log response for debugging but don't intercept or alter it
logger.info(f"Model generated response of length {len(model_response) if isinstance(model_response, str) else 'unknown'}")
# Return the raw model response - let the model shine (or fail naturally)
return model_response
except Exception as e:
# Only log the error but don't substitute with rule-based responses
logger.error(f"Neural model inference error: {e}")
# Continue to basic fallback only if the model completely failed
else:
logger.warning("No model available - only basic response possible")
# 3. Minimal fallback ONLY if model couldn't be loaded or threw exception
if self.tokenizer:
return f"The model couldn't be properly initialized. Your input: '{text_input[:30]}...'"
return f"No language model available to process: '{text_input[:30]}...'"
except Exception as e:
logger.error(f"Critical error in generate method: {e}")
return f"An error occurred processing your request: {str(e)}"
def _lazy_load_model(self):
"""Try to load a model on demand, with multiple fallback options"""
try:
logger.info("Attempting to load model on first request")
# First initialize tokenizer if not already done
self._initialize_minimal_tokenizer()
# Download and load model weights first with better logging
try:
from load_model_weights import download_model_files, load_weights_into_model, verify_token
# First verify token is available
token_verified = verify_token()
logger.info(f"HF Token verification: {token_verified}")
# Get weights from HF repository with more robust error reporting
logger.info("Downloading model weights...")
try:
# Try multiple repositories in priority order
repositories = [
"EvolphTech/Weights",
"Wildnerve/tlm-0.05Bx12",
"Wildnerve/tlm",
"EvolphTech/Checkpoints"
]
weight_files = None
for repo in repositories:
logger.info(f"Attempting to download weights from {repo}...")
try:
weight_files = download_model_files(repo_id_base=repo)
if weight_files and "transformer" in weight_files:
logger.info(f"Successfully downloaded weights from {repo}")
break
except Exception as repo_error:
logger.warning(f"Failed to download from {repo}: {repo_error}")
# Add detailed logging about weight files
if weight_files:
logger.info(f"Download returned {len(weight_files)} weight files: {list(weight_files.keys())}")
else:
logger.warning("No weight files were returned from download_model_files")
except Exception as e:
logger.error(f"Error downloading weights: {str(e)}")
weight_files = {}
except ImportError:
logger.error("Could not import load_model_weights - missing dependencies?")
weight_files = {}
# Rest of model loading code (unchanged)
# Try to load model_Custm first
if "model_Custm" in self.available_models:
try:
logger.info("Trying to load model_Custm")
model_custm_spec = importlib.util.spec_from_file_location(
"model_Custm",
self.available_models["model_Custm"]
)
model_custm = importlib.util.module_from_spec(model_custm_spec)
model_custm_spec.loader.exec_module(model_custm)
if hasattr(model_custm, "Wildnerve_tlm01"):
logger.info("Creating Wildnerve_tlm01 from model_Custm")
model_class = getattr(model_custm, "Wildnerve_tlm01")
# Create model with safer config handling
try:
# Import config handling
from config import app_config
# Ensure config_data exists if app_config is a dict
if isinstance(app_config, dict) and "TRANSFORMER_CONFIG" in app_config:
if isinstance(app_config["TRANSFORMER_CONFIG"], dict) and "config_data" not in app_config["TRANSFORMER_CONFIG"]:
app_config["TRANSFORMER_CONFIG"]["config_data"] = app_config["TRANSFORMER_CONFIG"]
logger.info("Added config_data attribute to TRANSFORMER_CONFIG dictionary")
except Exception as config_error:
logger.warning(f"Config handling error: {config_error}")
self.model = model_class(
tokenizer=self.tokenizer,
vocab_size=50257, # GPT-2 vocab size
specialization="general",
embedding_dim=768,
num_heads=12,
hidden_dim=768,
num_layers=2, # Reduced for memory efficiency
output_size=50257, # Match GPT-2 vocab
dropout=0.1,
max_seq_length=128 # Reduced for memory
)
# Enhanced weight loading with detailed path information
if "transformer" in weight_files and weight_files["transformer"]:
weight_path = weight_files["transformer"]
logger.info(f"Loading weights from {weight_path}")
logger.info(f"Weight file exists: {os.path.exists(weight_path)}")
logger.info(f"Weight file size: {os.path.getsize(weight_path) / 1024 / 1024:.2f} MB")
success = load_weights_into_model(self.model, weight_path, strict=False)
if success:
logger.info("✅ Successfully loaded transformer weights")
else:
logger.warning("❌ Failed to load transformer weights")
else:
logger.warning("❌ No transformer weights found in weight_files")
logger.info("Successfully created custom model")
self.model_loaded = True
return
except Exception as e:
logger.error(f"Failed to load model_Custm: {e}")
# Try model_PrTr next
if "model_PrTr" in self.available_models:
try:
logger.info("Trying to load model_PrTr")
model_prtr_spec = importlib.util.spec_from_file_location(
"model_PrTr",
self.available_models["model_PrTr"]
)
model_prtr = importlib.util.module_from_spec(model_prtr_spec)
model_prtr_spec.loader.exec_module(model_prtr)
if hasattr(model_prtr, "Wildnerve_tlm01"):
logger.info("Creating Wildnerve_tlm01 from model_PrTr")
model_class = getattr(model_prtr, "Wildnerve_tlm01")
self.model = model_class(
tokenizer=self.tokenizer,
model_name="gpt2"
)
logger.info("Successfully created pretrained model")
self.model_loaded = True
return
except Exception as e:
logger.error(f"Failed to load model_PrTr: {e}")
# Try stub model as last resort
try:
logger.info("Trying to load model_stub")
script_dir = os.path.dirname(os.path.abspath(__file__))
stub_path = os.path.join(script_dir, "model_stub.py")
if os.path.exists(stub_path):
stub_spec = importlib.util.spec_from_file_location("model_stub", stub_path)
model_stub = importlib.util.module_from_spec(stub_spec)
stub_spec.loader.exec_module(model_stub)
if hasattr(model_stub, "Wildnerve_tlm01"):
logger.info("Creating stub model")
model_class = getattr(model_stub, "Wildnerve_tlm01")
self.model = model_class(
tokenizer=self.tokenizer,
specialization="stub"
)
logger.warning("Using STUB model - limited functionality")
self.model_loaded = True
return
except Exception as e:
logger.error(f"Failed to load stub model: {e}")
logger.error("All model loading attempts failed")
except Exception as e:
logger.error(f"Error in _lazy_load_model: {e}")
finally:
# Always mark as loaded to avoid repeated attempts
self.model_loaded = True
def _initialize_minimal_tokenizer(self):
"""Initialize just the tokenizer, not the model"""
try:
from transformers import AutoTokenizer
self.tokenizer = AutoTokenizer.from_pretrained("gpt2", use_fast=True)
# Fix for GPT-2 tokenizer: set pad_token to eos_token
if not self.tokenizer.pad_token:
self.tokenizer.pad_token = self.tokenizer.eos_token
logger.info("Set GPT-2 pad_token to eos_token")
logger.info("Initialized minimal tokenizer")
except Exception as e:
logger.error(f"Failed to initialize tokenizer: {e}")
# Add import for inspect at the top
import inspect