# Handler.py - Entry point for Hugging Face inference API import os import sys import time import torch import logging import traceback from typing import Dict, Any, List import importlib.util # Configure logging first logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" ) logger = logging.getLogger(__name__) # --- DEBUG: confirm correct handler.py is loaded --- print("DEBUG: using Wildnerve-tlm_HF/handler.py — v7 with file verification") # Set aggressive memory optimization os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:32" os.environ["TRANSFORMERS_OFFLINE"] = "1" # Prevent downloading models os.environ["LOW_MEMORY_MODE"] = "1" # Custom flag for our code to detect # VERIFY CRITICAL FILES: Check required model files exist before proceeding def verify_required_files(): """Verify that critical model files exist without importing them""" script_dir = os.path.dirname(os.path.abspath(__file__)) required_files = [ "model_Custm.py", "model_PrTr.py", "adapter_layer.py", "tokenizer.py" ] missing_files = [] for filename in required_files: filepath = os.path.join(script_dir, filename) if not os.path.exists(filepath): missing_files.append(filename) logger.warning(f"Required file not found: {filename}") else: file_size = os.path.getsize(filepath) / 1024 # KB logger.info(f"Found required file: {filename} ({file_size:.1f} KB)") if missing_files: logger.error(f"Missing required files: {', '.join(missing_files)}") return False return True # Verify required files exist but don't load them yet critical_files_verified = verify_required_files() if not critical_files_verified: logger.warning("Some critical model files are missing - expect errors during request handling") # Safe config import that won't fail during initialization try: from config import app_config logger.info("Successfully imported config") # Ensure config_data exists if hasattr(app_config, 'TRANSFORMER_CONFIG'): if isinstance(app_config.TRANSFORMER_CONFIG, dict) and 'config_data' not in app_config.TRANSFORMER_CONFIG: app_config.TRANSFORMER_CONFIG['config_data'] = app_config.TRANSFORMER_CONFIG logger.info("Added missing config_data reference to TRANSFORMER_CONFIG") except Exception as e: logger.error(f"Error importing config: {e}") # Create minimal config to avoid further errors app_config = { "MODEL_NAME": "Wildnerve-tlm01_Hybrid_Model", "TRANSFORMER_CONFIG": { "MODEL_NAME": "gpt2", "VOCAB_SIZE": 50257, "config_data": {} # Add empty config_data to avoid attribute errors } } # Make config_data reference itself to match expected behavior app_config["TRANSFORMER_CONFIG"]["config_data"] = app_config["TRANSFORMER_CONFIG"] # MEMORY OPTIMIZATION: Avoid loading pretrained models during init os.environ["TRANSFORMERS_OFFLINE"] = "1" # Prevent downloading models os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128" # Limit CUDA allocations # Add safeguard for memory usage def check_memory_usage(): """Check memory usage and log warning if too high""" try: import psutil process = psutil.Process(os.getpid()) memory_info = process.memory_info() memory_mb = memory_info.rss / 1024 / 1024 logger.info(f"Current memory usage: {memory_mb:.2f} MB") if memory_mb > 1800: # 1.8 GB (90% of limit) logger.warning(f"Memory usage critical: {memory_mb:.2f} MB. Consider reducing model size.") return memory_mb except Exception as e: logger.warning(f"Error checking memory usage: {e}") return 0 # Check memory at startup check_memory_usage() # Safely check for required packages without crashing try: import pydantic print(f"pydantic is available: {pydantic.__version__}") except ImportError: print("pydantic is not available - continuing without it") # Create minimal compatibility layer class pydantic: @staticmethod def __version__(): return "unavailable" class BaseModel: def __init__(self, **kwargs): for k, v in kwargs.items(): setattr(self, k, v) try: from codecarbon import EmissionsTracker print(f"codecarbon is available") except ImportError: print("codecarbon is not available - continuing without carbon tracking") # Create minimal compatibility class class EmissionsTracker: def __init__(self, *args, **kwargs): pass def start(self): return self def stop(self): return 0.0 # Make sure adapter_layer.py is properly located try: # For more reliable importing script_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, script_dir) # MEMORY OPTIMIZATION: Import but don't initialize yet from adapter_layer import WildnerveModelAdapter logger.info("Successfully imported adapter_layer module") # Also try to import TokenizerWrapper try: from tokenizer import TokenizerWrapper, get_tokenizer logger.info("Successfully imported TokenizerWrapper") except ImportError as e: logger.warning(f"TokenizerWrapper not found: {e}") except ImportError as e: logger.error(f"Could not import adapter_layer: {e}") # Don't raise error - provide fallback adapter implementation class WildnerveModelAdapter: def __init__(self, model_path: str =""): self.model_path = model_path logger.info(f"Using fallback WildnerveModelAdapter with path: {model_path}") def generate(self, text_input, **kwargs): return f"Model adapter unavailable. Received input: {text_input[:30]}..." # After imports but before EndpointHandler class try: # Try to initialize the system first from main import initialize_system success = initialize_system() logger.info(f"System initialization {'successful' if success else 'failed'}") except Exception as e: logger.error(f"Failed to initialize system: {e}") class EndpointHandler: def __init__(self, model_dir: str = None): # Do absolute minimal initialization here self.model_dir = model_dir self.adapter = None self.initialized = False self.critical_files_verified = critical_files_verified logger.info(f"Handler init with minimal footprint: {model_dir}") def __call__(self, data, parameters=None): # Lazy initialization on first request if not self.initialized: self._initialize_on_demand() # Extract prompt text text = data.get("inputs") if isinstance(data, dict) else str(data) try: # Warning response if critical files are missing if not self.critical_files_verified: logger.warning("Attempting to process request with missing critical files") return [{ "generated_text": "System initialization issue: Some model files appear to be missing. " + f"Processing your request about '{text[:30]}...' with limited functionality." }] # Simple response for first call if not self.adapter: logger.info("Using simple text response (no adapter)") return [{"generated_text": f"Processing your request about '{text[:30]}...'"}] # Generate response with adapter if available out = self.adapter.generate(text, **(parameters or {})) # Ensure output is valid string if not isinstance(out, str): out = str(out) return [{"generated_text": out}] except Exception as e: logger.error(f"Generation error: {e}", exc_info=True) return [{"generated_text": f"Error processing your request: {str(e)}"}] def _initialize_on_demand(self): """Initialize adapter when first needed""" try: logger.info("Performing lazy initialization on first request") # Import with minimal dependencies from adapter_layer import WildnerveModelAdapter self.adapter = WildnerveModelAdapter(self.model_dir or "") self.initialized = True logger.info("Adapter initialized successfully") except Exception as e: logger.error(f"Error initializing adapter: {e}", exc_info=True) # Continue without adapter, we'll return simple responses