WildnerveAI's picture
Upload handler.py
a09e963 verified
# Handler.py - Entry point for Hugging Face inference API
import os
import sys
import time
import torch
import logging
import traceback
from typing import Dict, Any, List
import importlib.util
# Configure logging first
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
# --- DEBUG: confirm correct handler.py is loaded ---
print("DEBUG: using Wildnerve-tlm_HF/handler.py — v7 with file verification")
# Set aggressive memory optimization
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:32"
os.environ["TRANSFORMERS_OFFLINE"] = "1" # Prevent downloading models
os.environ["LOW_MEMORY_MODE"] = "1" # Custom flag for our code to detect
# VERIFY CRITICAL FILES: Check required model files exist before proceeding
def verify_required_files():
"""Verify that critical model files exist without importing them"""
script_dir = os.path.dirname(os.path.abspath(__file__))
required_files = [
"model_Custm.py",
"model_PrTr.py",
"adapter_layer.py",
"tokenizer.py"
]
missing_files = []
for filename in required_files:
filepath = os.path.join(script_dir, filename)
if not os.path.exists(filepath):
missing_files.append(filename)
logger.warning(f"Required file not found: {filename}")
else:
file_size = os.path.getsize(filepath) / 1024 # KB
logger.info(f"Found required file: {filename} ({file_size:.1f} KB)")
if missing_files:
logger.error(f"Missing required files: {', '.join(missing_files)}")
return False
return True
# Verify required files exist but don't load them yet
critical_files_verified = verify_required_files()
if not critical_files_verified:
logger.warning("Some critical model files are missing - expect errors during request handling")
# Safe config import that won't fail during initialization
try:
from config import app_config
logger.info("Successfully imported config")
# Ensure config_data exists
if hasattr(app_config, 'TRANSFORMER_CONFIG'):
if isinstance(app_config.TRANSFORMER_CONFIG, dict) and 'config_data' not in app_config.TRANSFORMER_CONFIG:
app_config.TRANSFORMER_CONFIG['config_data'] = app_config.TRANSFORMER_CONFIG
logger.info("Added missing config_data reference to TRANSFORMER_CONFIG")
except Exception as e:
logger.error(f"Error importing config: {e}")
# Create minimal config to avoid further errors
app_config = {
"MODEL_NAME": "Wildnerve-tlm01_Hybrid_Model",
"TRANSFORMER_CONFIG": {
"MODEL_NAME": "gpt2",
"VOCAB_SIZE": 50257,
"config_data": {} # Add empty config_data to avoid attribute errors
}
}
# Make config_data reference itself to match expected behavior
app_config["TRANSFORMER_CONFIG"]["config_data"] = app_config["TRANSFORMER_CONFIG"]
# MEMORY OPTIMIZATION: Avoid loading pretrained models during init
os.environ["TRANSFORMERS_OFFLINE"] = "1" # Prevent downloading models
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128" # Limit CUDA allocations
# Add safeguard for memory usage
def check_memory_usage():
"""Check memory usage and log warning if too high"""
try:
import psutil
process = psutil.Process(os.getpid())
memory_info = process.memory_info()
memory_mb = memory_info.rss / 1024 / 1024
logger.info(f"Current memory usage: {memory_mb:.2f} MB")
if memory_mb > 1800: # 1.8 GB (90% of limit)
logger.warning(f"Memory usage critical: {memory_mb:.2f} MB. Consider reducing model size.")
return memory_mb
except Exception as e:
logger.warning(f"Error checking memory usage: {e}")
return 0
# Check memory at startup
check_memory_usage()
# Safely check for required packages without crashing
try:
import pydantic
print(f"pydantic is available: {pydantic.__version__}")
except ImportError:
print("pydantic is not available - continuing without it")
# Create minimal compatibility layer
class pydantic:
@staticmethod
def __version__():
return "unavailable"
class BaseModel:
def __init__(self, **kwargs):
for k, v in kwargs.items():
setattr(self, k, v)
try:
from codecarbon import EmissionsTracker
print(f"codecarbon is available")
except ImportError:
print("codecarbon is not available - continuing without carbon tracking")
# Create minimal compatibility class
class EmissionsTracker:
def __init__(self, *args, **kwargs): pass
def start(self): return self
def stop(self): return 0.0
# Make sure adapter_layer.py is properly located
try:
# For more reliable importing
script_dir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, script_dir)
# MEMORY OPTIMIZATION: Import but don't initialize yet
from adapter_layer import WildnerveModelAdapter
logger.info("Successfully imported adapter_layer module")
# Also try to import TokenizerWrapper
try:
from tokenizer import TokenizerWrapper, get_tokenizer
logger.info("Successfully imported TokenizerWrapper")
except ImportError as e:
logger.warning(f"TokenizerWrapper not found: {e}")
except ImportError as e:
logger.error(f"Could not import adapter_layer: {e}")
# Don't raise error - provide fallback adapter implementation
class WildnerveModelAdapter:
def __init__(self, model_path: str =""):
self.model_path = model_path
logger.info(f"Using fallback WildnerveModelAdapter with path: {model_path}")
def generate(self, text_input, **kwargs):
return f"Model adapter unavailable. Received input: {text_input[:30]}..."
# After imports but before EndpointHandler class
try:
# Try to initialize the system first
from main import initialize_system
success = initialize_system()
logger.info(f"System initialization {'successful' if success else 'failed'}")
except Exception as e:
logger.error(f"Failed to initialize system: {e}")
class EndpointHandler:
def __init__(self, model_dir: str = None):
# Do absolute minimal initialization here
self.model_dir = model_dir
self.adapter = None
self.initialized = False
self.critical_files_verified = critical_files_verified
logger.info(f"Handler init with minimal footprint: {model_dir}")
def __call__(self, data, parameters=None):
# Lazy initialization on first request
if not self.initialized:
self._initialize_on_demand()
# Extract prompt text
text = data.get("inputs") if isinstance(data, dict) else str(data)
try:
# Warning response if critical files are missing
if not self.critical_files_verified:
logger.warning("Attempting to process request with missing critical files")
return [{
"generated_text": "System initialization issue: Some model files appear to be missing. " +
f"Processing your request about '{text[:30]}...' with limited functionality."
}]
# Simple response for first call
if not self.adapter:
logger.info("Using simple text response (no adapter)")
return [{"generated_text": f"Processing your request about '{text[:30]}...'"}]
# Generate response with adapter if available
out = self.adapter.generate(text, **(parameters or {}))
# Ensure output is valid string
if not isinstance(out, str):
out = str(out)
return [{"generated_text": out}]
except Exception as e:
logger.error(f"Generation error: {e}", exc_info=True)
return [{"generated_text": f"Error processing your request: {str(e)}"}]
def _initialize_on_demand(self):
"""Initialize adapter when first needed"""
try:
logger.info("Performing lazy initialization on first request")
# Import with minimal dependencies
from adapter_layer import WildnerveModelAdapter
self.adapter = WildnerveModelAdapter(self.model_dir or "")
self.initialized = True
logger.info("Adapter initialized successfully")
except Exception as e:
logger.error(f"Error initializing adapter: {e}", exc_info=True)
# Continue without adapter, we'll return simple responses