File size: 9,081 Bytes
566d62e 79fb938 1fccff2 e2070bb dbbc74a 1fccff2 399ede8 9399ae2 c640e48 e2070bb dbbc74a e2070bb 79fb938 dbbc74a 6ffc9f3 dbbc74a 14682e0 dbbc74a 14682e0 dbbc74a 14682e0 dbbc74a 6ffc9f3 dbbc74a 05ca8fc 1ed4342 05ca8fc 1ed4342 c640e48 566d62e c640e48 6ffc9f3 c640e48 566d62e c640e48 566d62e c640e48 566d62e 05ca8fc fed585a 05ca8fc c640e48 1a8d9bc 2e70e8c 0abf7a7 6ffc9f3 fa96858 6ffc9f3 c312d94 fa96858 c312d94 e2070bb 6ffc9f3 fa96858 c312d94 6ffc9f3 c312d94 fa96858 e2070bb fa96858 6ffc9f3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
# Handler.py - Entry point for Hugging Face inference API
import os
import sys
import time
import torch
import logging
import traceback
from typing import Dict, Any, List
import importlib.util
# Configure logging first
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
# --- DEBUG: confirm correct handler.py is loaded ---
print("DEBUG: using Wildnerve-tlm_HF/handler.py — v7 with file verification")
# Set aggressive memory optimization
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:32"
os.environ["TRANSFORMERS_OFFLINE"] = "1" # Prevent downloading models
os.environ["LOW_MEMORY_MODE"] = "1" # Custom flag for our code to detect
# VERIFY CRITICAL FILES: Check required model files exist before proceeding
def verify_required_files():
"""Verify that critical model files exist without importing them"""
script_dir = os.path.dirname(os.path.abspath(__file__))
required_files = [
"model_Custm.py",
"model_PrTr.py",
"adapter_layer.py",
"tokenizer.py"
]
missing_files = []
for filename in required_files:
filepath = os.path.join(script_dir, filename)
if not os.path.exists(filepath):
missing_files.append(filename)
logger.warning(f"Required file not found: {filename}")
else:
file_size = os.path.getsize(filepath) / 1024 # KB
logger.info(f"Found required file: {filename} ({file_size:.1f} KB)")
if missing_files:
logger.error(f"Missing required files: {', '.join(missing_files)}")
return False
return True
# Verify required files exist but don't load them yet
critical_files_verified = verify_required_files()
if not critical_files_verified:
logger.warning("Some critical model files are missing - expect errors during request handling")
# Safe config import that won't fail during initialization
try:
from config import app_config
logger.info("Successfully imported config")
# Ensure config_data exists
if hasattr(app_config, 'TRANSFORMER_CONFIG'):
if isinstance(app_config.TRANSFORMER_CONFIG, dict) and 'config_data' not in app_config.TRANSFORMER_CONFIG:
app_config.TRANSFORMER_CONFIG['config_data'] = app_config.TRANSFORMER_CONFIG
logger.info("Added missing config_data reference to TRANSFORMER_CONFIG")
except Exception as e:
logger.error(f"Error importing config: {e}")
# Create minimal config to avoid further errors
app_config = {
"MODEL_NAME": "Wildnerve-tlm01_Hybrid_Model",
"TRANSFORMER_CONFIG": {
"MODEL_NAME": "gpt2",
"VOCAB_SIZE": 50257,
"config_data": {} # Add empty config_data to avoid attribute errors
}
}
# Make config_data reference itself to match expected behavior
app_config["TRANSFORMER_CONFIG"]["config_data"] = app_config["TRANSFORMER_CONFIG"]
# MEMORY OPTIMIZATION: Avoid loading pretrained models during init
os.environ["TRANSFORMERS_OFFLINE"] = "1" # Prevent downloading models
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128" # Limit CUDA allocations
# Add safeguard for memory usage
def check_memory_usage():
"""Check memory usage and log warning if too high"""
try:
import psutil
process = psutil.Process(os.getpid())
memory_info = process.memory_info()
memory_mb = memory_info.rss / 1024 / 1024
logger.info(f"Current memory usage: {memory_mb:.2f} MB")
if memory_mb > 1800: # 1.8 GB (90% of limit)
logger.warning(f"Memory usage critical: {memory_mb:.2f} MB. Consider reducing model size.")
return memory_mb
except Exception as e:
logger.warning(f"Error checking memory usage: {e}")
return 0
# Check memory at startup
check_memory_usage()
# Safely check for required packages without crashing
try:
import pydantic
print(f"pydantic is available: {pydantic.__version__}")
except ImportError:
print("pydantic is not available - continuing without it")
# Create minimal compatibility layer
class pydantic:
@staticmethod
def __version__():
return "unavailable"
class BaseModel:
def __init__(self, **kwargs):
for k, v in kwargs.items():
setattr(self, k, v)
try:
from codecarbon import EmissionsTracker
print(f"codecarbon is available")
except ImportError:
print("codecarbon is not available - continuing without carbon tracking")
# Create minimal compatibility class
class EmissionsTracker:
def __init__(self, *args, **kwargs): pass
def start(self): return self
def stop(self): return 0.0
# Make sure adapter_layer.py is properly located
try:
# For more reliable importing
script_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, script_dir)
# MEMORY OPTIMIZATION: Import but don't initialize yet
from adapter_layer import WildnerveModelAdapter
logger.info("Successfully imported adapter_layer module")
# Also try to import TokenizerWrapper
try:
from tokenizer import TokenizerWrapper, get_tokenizer
logger.info("Successfully imported TokenizerWrapper")
except ImportError as e:
logger.warning(f"TokenizerWrapper not found: {e}")
except ImportError as e:
logger.error(f"Could not import adapter_layer: {e}")
# Don't raise error - provide fallback adapter implementation
class WildnerveModelAdapter:
def __init__(self, model_path: str =""):
self.model_path = model_path
logger.info(f"Using fallback WildnerveModelAdapter with path: {model_path}")
def generate(self, text_input, **kwargs):
return f"Model adapter unavailable. Received input: {text_input[:30]}..."
# After imports but before EndpointHandler class
try:
# Try to initialize the system first
from main import initialize_system
success = initialize_system()
logger.info(f"System initialization {'successful' if success else 'failed'}")
except Exception as e:
logger.error(f"Failed to initialize system: {e}")
class EndpointHandler:
def __init__(self, model_dir: str = None):
# Do absolute minimal initialization here
self.model_dir = model_dir
self.adapter = None
self.initialized = False
self.critical_files_verified = critical_files_verified
logger.info(f"Handler init with minimal footprint: {model_dir}")
def __call__(self, data, parameters=None):
# Lazy initialization on first request
if not self.initialized:
self._initialize_on_demand()
# Extract prompt text
text = data.get("inputs") if isinstance(data, dict) else str(data)
try:
# Warning response if critical files are missing
if not self.critical_files_verified:
logger.warning("Attempting to process request with missing critical files")
return [{
"generated_text": "System initialization issue: Some model files appear to be missing. " +
f"Processing your request about '{text[:30]}...' with limited functionality."
}]
# Simple response for first call
if not self.adapter:
logger.info("Using simple text response (no adapter)")
return [{"generated_text": f"Processing your request about '{text[:30]}...'"}]
# Generate response with adapter if available
out = self.adapter.generate(text, **(parameters or {}))
# Ensure output is valid string
if not isinstance(out, str):
out = str(out)
return [{"generated_text": out}]
except Exception as e:
logger.error(f"Generation error: {e}", exc_info=True)
return [{"generated_text": f"Error processing your request: {str(e)}"}]
def _initialize_on_demand(self):
"""Initialize adapter when first needed"""
try:
logger.info("Performing lazy initialization on first request")
# Import with minimal dependencies
from adapter_layer import WildnerveModelAdapter
self.adapter = WildnerveModelAdapter(self.model_dir or "")
self.initialized = True
logger.info("Adapter initialized successfully")
except Exception as e:
logger.error(f"Error initializing adapter: {e}", exc_info=True)
# Continue without adapter, we'll return simple responses |