|
|
|
|
|
import os
|
|
|
import sys
|
|
|
import time
|
|
|
import torch
|
|
|
import logging
|
|
|
import traceback
|
|
|
from typing import Dict, Any, List
|
|
|
import importlib.util
|
|
|
|
|
|
|
|
|
logging.basicConfig(
|
|
|
level=logging.INFO,
|
|
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
|
)
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
print("DEBUG: using Wildnerve-tlm_HF/handler.py — v7 with file verification")
|
|
|
|
|
|
|
|
|
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:32"
|
|
|
os.environ["TRANSFORMERS_OFFLINE"] = "1"
|
|
|
os.environ["LOW_MEMORY_MODE"] = "1"
|
|
|
|
|
|
|
|
|
def verify_required_files():
|
|
|
"""Verify that critical model files exist without importing them"""
|
|
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
required_files = [
|
|
|
"model_Custm.py",
|
|
|
"model_PrTr.py",
|
|
|
"adapter_layer.py",
|
|
|
"tokenizer.py"
|
|
|
]
|
|
|
|
|
|
missing_files = []
|
|
|
for filename in required_files:
|
|
|
filepath = os.path.join(script_dir, filename)
|
|
|
if not os.path.exists(filepath):
|
|
|
missing_files.append(filename)
|
|
|
logger.warning(f"Required file not found: {filename}")
|
|
|
else:
|
|
|
file_size = os.path.getsize(filepath) / 1024
|
|
|
logger.info(f"Found required file: {filename} ({file_size:.1f} KB)")
|
|
|
|
|
|
if missing_files:
|
|
|
logger.error(f"Missing required files: {', '.join(missing_files)}")
|
|
|
return False
|
|
|
return True
|
|
|
|
|
|
|
|
|
critical_files_verified = verify_required_files()
|
|
|
if not critical_files_verified:
|
|
|
logger.warning("Some critical model files are missing - expect errors during request handling")
|
|
|
|
|
|
|
|
|
try:
|
|
|
from config import app_config
|
|
|
logger.info("Successfully imported config")
|
|
|
|
|
|
|
|
|
if hasattr(app_config, 'TRANSFORMER_CONFIG'):
|
|
|
if isinstance(app_config.TRANSFORMER_CONFIG, dict) and 'config_data' not in app_config.TRANSFORMER_CONFIG:
|
|
|
app_config.TRANSFORMER_CONFIG['config_data'] = app_config.TRANSFORMER_CONFIG
|
|
|
logger.info("Added missing config_data reference to TRANSFORMER_CONFIG")
|
|
|
|
|
|
except Exception as e:
|
|
|
logger.error(f"Error importing config: {e}")
|
|
|
|
|
|
app_config = {
|
|
|
"MODEL_NAME": "Wildnerve-tlm01_Hybrid_Model",
|
|
|
"TRANSFORMER_CONFIG": {
|
|
|
"MODEL_NAME": "gpt2",
|
|
|
"VOCAB_SIZE": 50257,
|
|
|
"config_data": {}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
app_config["TRANSFORMER_CONFIG"]["config_data"] = app_config["TRANSFORMER_CONFIG"]
|
|
|
|
|
|
|
|
|
os.environ["TRANSFORMERS_OFFLINE"] = "1"
|
|
|
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"
|
|
|
|
|
|
|
|
|
def check_memory_usage():
|
|
|
"""Check memory usage and log warning if too high"""
|
|
|
try:
|
|
|
import psutil
|
|
|
process = psutil.Process(os.getpid())
|
|
|
memory_info = process.memory_info()
|
|
|
memory_mb = memory_info.rss / 1024 / 1024
|
|
|
logger.info(f"Current memory usage: {memory_mb:.2f} MB")
|
|
|
if memory_mb > 1800:
|
|
|
logger.warning(f"Memory usage critical: {memory_mb:.2f} MB. Consider reducing model size.")
|
|
|
return memory_mb
|
|
|
except Exception as e:
|
|
|
logger.warning(f"Error checking memory usage: {e}")
|
|
|
return 0
|
|
|
|
|
|
|
|
|
check_memory_usage()
|
|
|
|
|
|
|
|
|
try:
|
|
|
import pydantic
|
|
|
print(f"pydantic is available: {pydantic.__version__}")
|
|
|
except ImportError:
|
|
|
print("pydantic is not available - continuing without it")
|
|
|
|
|
|
class pydantic:
|
|
|
@staticmethod
|
|
|
def __version__():
|
|
|
return "unavailable"
|
|
|
|
|
|
class BaseModel:
|
|
|
def __init__(self, **kwargs):
|
|
|
for k, v in kwargs.items():
|
|
|
setattr(self, k, v)
|
|
|
|
|
|
try:
|
|
|
from codecarbon import EmissionsTracker
|
|
|
print(f"codecarbon is available")
|
|
|
except ImportError:
|
|
|
print("codecarbon is not available - continuing without carbon tracking")
|
|
|
|
|
|
class EmissionsTracker:
|
|
|
def __init__(self, *args, **kwargs): pass
|
|
|
def start(self): return self
|
|
|
def stop(self): return 0.0
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
sys.path.insert(0, script_dir)
|
|
|
|
|
|
|
|
|
from adapter_layer import WildnerveModelAdapter
|
|
|
logger.info("Successfully imported adapter_layer module")
|
|
|
|
|
|
|
|
|
try:
|
|
|
from tokenizer import TokenizerWrapper, get_tokenizer
|
|
|
logger.info("Successfully imported TokenizerWrapper")
|
|
|
except ImportError as e:
|
|
|
logger.warning(f"TokenizerWrapper not found: {e}")
|
|
|
|
|
|
except ImportError as e:
|
|
|
logger.error(f"Could not import adapter_layer: {e}")
|
|
|
|
|
|
class WildnerveModelAdapter:
|
|
|
def __init__(self, model_path: str =""):
|
|
|
self.model_path = model_path
|
|
|
logger.info(f"Using fallback WildnerveModelAdapter with path: {model_path}")
|
|
|
|
|
|
def generate(self, text_input, **kwargs):
|
|
|
return f"Model adapter unavailable. Received input: {text_input[:30]}..."
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
from main import initialize_system
|
|
|
success = initialize_system()
|
|
|
logger.info(f"System initialization {'successful' if success else 'failed'}")
|
|
|
except Exception as e:
|
|
|
logger.error(f"Failed to initialize system: {e}")
|
|
|
|
|
|
class EndpointHandler:
|
|
|
def __init__(self, model_dir: str = None):
|
|
|
|
|
|
self.model_dir = model_dir
|
|
|
self.adapter = None
|
|
|
self.initialized = False
|
|
|
self.critical_files_verified = critical_files_verified
|
|
|
logger.info(f"Handler init with minimal footprint: {model_dir}")
|
|
|
|
|
|
def __call__(self, data, parameters=None):
|
|
|
|
|
|
if not self.initialized:
|
|
|
self._initialize_on_demand()
|
|
|
|
|
|
|
|
|
text = data.get("inputs") if isinstance(data, dict) else str(data)
|
|
|
|
|
|
try:
|
|
|
|
|
|
if not self.critical_files_verified:
|
|
|
logger.warning("Attempting to process request with missing critical files")
|
|
|
return [{
|
|
|
"generated_text": "System initialization issue: Some model files appear to be missing. " +
|
|
|
f"Processing your request about '{text[:30]}...' with limited functionality."
|
|
|
}]
|
|
|
|
|
|
|
|
|
if not self.adapter:
|
|
|
logger.info("Using simple text response (no adapter)")
|
|
|
return [{"generated_text": f"Processing your request about '{text[:30]}...'"}]
|
|
|
|
|
|
|
|
|
out = self.adapter.generate(text, **(parameters or {}))
|
|
|
|
|
|
|
|
|
if not isinstance(out, str):
|
|
|
out = str(out)
|
|
|
|
|
|
return [{"generated_text": out}]
|
|
|
except Exception as e:
|
|
|
logger.error(f"Generation error: {e}", exc_info=True)
|
|
|
return [{"generated_text": f"Error processing your request: {str(e)}"}]
|
|
|
|
|
|
def _initialize_on_demand(self):
|
|
|
"""Initialize adapter when first needed"""
|
|
|
try:
|
|
|
logger.info("Performing lazy initialization on first request")
|
|
|
|
|
|
|
|
|
from adapter_layer import WildnerveModelAdapter
|
|
|
self.adapter = WildnerveModelAdapter(self.model_dir or "")
|
|
|
self.initialized = True
|
|
|
logger.info("Adapter initialized successfully")
|
|
|
except Exception as e:
|
|
|
logger.error(f"Error initializing adapter: {e}", exc_info=True)
|
|
|
|