EvolphTech
/

Wildnerve-tlm01_Hybrid_Model

Text Generation

wildnerve_tlm01

Model card Files Files and versions

xet

Community

WildnerveAI commited on May 7, 2025

Commit

566d62e

verified ·

1 Parent(s): e9d9bd0

Upload 2 files

Browse files

Files changed (2) hide show

adapter_layer.py +237 -219
handler.py +24 -34

adapter_layer.py CHANGED Viewed

@@ -6,29 +6,132 @@ import traceback
 from typing import Dict, Any, Optional, List
 import importlib.util
-# Import dependency helpers
-try:
-    from dependency_helpers import safely_import, is_module_available, with_fallback
-except ImportError:
-    # Inline implementation if module isn't available
-    def safely_import(module_name):
-        try:
-            return importlib.import_module(module_name)
-        except ImportError:
-            return None
-    def is_module_available(module_name):
-        try:
-            importlib.util.find_spec(module_name)
-            return True
-        except ImportError:
-            return False
-    def with_fallback(primary_func, fallback_func, *args, **kwargs):
-        try:
-            return primary_func(*args, **kwargs)
-        except Exception:
-            return fallback_func(*args, **kwargs)
 logger = logging.getLogger(__name__)
@@ -41,7 +144,7 @@ class WildnerveModelAdapter:
     def __init__(self, model_path: str):
         self.model_path = model_path
         self.tokenizer = None
-        self.fallback_model = None
         self.initialized = False
         # Ensure the model path is in sys.path so we can import from it
@@ -50,62 +153,27 @@ class WildnerveModelAdapter:
         logger.info(f"Model adapter initialized with path: {model_path}")
-        # Initialize tokenizer - try to use the original TokenizerWrapper
         self._initialize_tokenizer()
     def _initialize_tokenizer(self):
-        """Initialize tokenizer while respecting the original architecture"""
-        # First try loading config - use original implementation if available
         try:
-            # Check if we have a config module
-            has_config = is_module_available('config')
             # Try to import from service_registry if available
-            has_registry = is_module_available('service_registry')
-            # Use the appropriate approach based on available modules
-            if has_registry:
-                # Use original registry approach
                 from service_registry import registry, TOKENIZER
                 if registry.has(TOKENIZER):
                     self.tokenizer = registry.get(TOKENIZER)
                     logger.info("Retrieved tokenizer from registry")
                     return
             # Try loading from the original tokenizer.py
-            tokenizer_module = None
-            # First check if it's directly importable
             if is_module_available('tokenizer'):
-                tokenizer_module = safely_import('tokenizer')
-                logger.info("Imported tokenizer module from Python path")
-            # Next try to load it from model_path
-            if tokenizer_module is None:
-                tokenizer_path = os.path.join(self.model_path, "tokenizer.py")
-                if os.path.exists(tokenizer_path):
-                    spec = importlib.util.spec_from_file_location("tokenizer_module", tokenizer_path)
-                    tokenizer_module = importlib.util.module_from_spec(spec)
-                    spec.loader.exec_module(tokenizer_module)
-                    logger.info("Loaded tokenizer module from model path")
-            # Create tokenizer if module was loaded
-            if tokenizer_module is not None and hasattr(tokenizer_module, 'TokenizerWrapper'):
-                # Handle potential missing config_app
-                if hasattr(tokenizer_module, 'get_tokenizer'):
-                    self.tokenizer = tokenizer_module.get_tokenizer()
-                else:
-                    # Try direct instantiation
-                    self.tokenizer = tokenizer_module.TokenizerWrapper()
                 logger.info("Created TokenizerWrapper instance")
-                # Register in registry if available
-                if has_registry:
-                    from service_registry import registry, TOKENIZER
-                    registry.register(TOKENIZER, self.tokenizer)
                 return
         except Exception as e:
@@ -116,176 +184,110 @@ class WildnerveModelAdapter:
             from transformers import AutoTokenizer
             models_to_try = [
-                "bert-base-uncased",  # Standard BERT model
-                "distilbert-base-uncased",  # Smaller, faster alternative
-                "gpt2"  # Another commonly available model
             ]
             for model_name in models_to_try:
                 try:
                     self.tokenizer = AutoTokenizer.from_pretrained(model_name)
                     logger.info(f"Using transformers AutoTokenizer with {model_name}")
-                    # Register if registry is available
-                    if 'registry' in locals() and 'TOKENIZER' in locals():
-                        registry.register(TOKENIZER, self.tokenizer)
                     return
                 except Exception as e:
                     logger.warning(f"Failed to load {model_name}: {e}")
         except ImportError:
             logger.warning("transformers package not available")
-        # Last resort: use our SimpleTokenizer implementation
-        logger.warning("Using SimpleTokenizer as final fallback")
-        self.tokenizer = SimpleTokenizer()
-    def load_fallback_model(self):
-        """Create a simple fallback model for responses"""
-        if self.fallback_model is not None:
-            return self.fallback_model
         try:
-            # First try to import original model if available
-            model_module = None
-            models_to_try = ["model_Combn", "model_Custm", "model_PrTr"]
-            for model_name in models_to_try:
                 try:
-                    if is_module_available(model_name):
-                        model_module = safely_import(model_name)
-                        if model_module:
-                            logger.info(f"Imported {model_name} module")
-                            break
                 except Exception as e:
-                    logger.warning(f"Failed to import {model_name}: {e}")
-            # If we found a valid model module, try to instantiate it
-            if model_module:
-                model_classes = [
-                    "Wildnerve_tlm01_Hybrid_Model",
-                    "Wildnerve_tlm01"
-                ]
-                for class_name in model_classes:
-                    if hasattr(model_module, class_name):
-                        try:
-                            # Try to instantiate with minimal parameters
-                            model_class = getattr(model_module, class_name)
-                            instance = model_class(
-                                vocab_size=30522,
-                                specialization="general",
-                                dataset_path=None,
-                                model_name="bert-base-uncased",
-                                embedding_dim=768,
-                                num_heads=12,
-                                hidden_dim=768,
-                                num_layers=6,
-                                output_size=768,
-                                dropout=0.1,
-                                max_seq_length=512,
-                                pooling_mode="mean",
-                                tokenizer=self.tokenizer
-                            )
-                            logger.info(f"Created {class_name} instance from {model_module.__name__}")
-                            self.fallback_model = instance
-                            return self.fallback_model
-                        except Exception as e:
-                            logger.warning(f"Failed to instantiate {class_name}: {e}")
-            # If we couldn't use the original model, use our fallback
-            self.fallback_model = SimpleFallbackModel(self.tokenizer)
-            logger.info("Created SimpleFallbackModel")
-            return self.fallback_model
         except Exception as e:
-            logger.error(f"Failed to create any fallback model: {e}")
-            # As an absolute last resort, create a minimal model on the fly
-            self.fallback_model = SimpleFallbackModel(self.tokenizer)
-            return self.fallback_model
     def generate(self, prompt: str, **kwargs) -> str:
         """Generate a response to the prompt"""
-        # Ensure we have a tokenizer - reload if needed
-        if self.tokenizer is None:
-            logger.warning("No tokenizer found, re-initializing")
-            self._initialize_tokenizer()
-        # If tokenizer is still None after re-initialization, return error message
-        if self.tokenizer is None:
-            return "Unable to process your request due to missing tokenizer."
         try:
-            # Try to use the fallback model if it exists or can be created
-            model = self.load_fallback_model()
-            if model is not None:
-                # Try different generation methods the model might have
-                if hasattr(model, "generate_streaming"):
-                    try:
-                        # For streaming we need to collect all tokens
-                        tokens = []
-                        for token in model.generate_streaming(prompt, **kwargs):
-                            tokens.append(token)
-                        return "".join(tokens)
-                    except Exception as e:
-                        logger.warning(f"Streaming generation failed: {e}")
-                # Try standard generate methods
-                gen_methods = ["generate_with_decoding", "generate"]
-                for method_name in gen_methods:
-                    if hasattr(model, method_name):
-                        try:
-                            # Tokenize the input if needed
-                            if hasattr(self.tokenizer, "__call__"):
-                                input_ids = self.tokenizer(prompt, return_tensors="pt").input_ids
-                                # Get the result
-                                method = getattr(model, method_name)
-                                result = method(input_ids, **kwargs)
-                                if isinstance(result, str) and result:
-                                    return result
-                        except Exception as e:
-                            logger.warning(f"{method_name} failed: {e}")
-                # If we get here, try a final simple generate method
                 try:
-                    return model.generate(prompt, **kwargs)
                 except Exception as e:
-                    logger.warning(f"Direct generation failed: {e}")
-            # If fallback model failed, use a simple hardcoded response based on prompt
-            logger.warning("Using hardcoded response as fallback")
-            if "code" in prompt.lower() or "programming" in prompt.lower():
-                return """I can help with coding tasks! However, I'm currently running in fallback mode due to model loading issues.
-In normal operation, I can:
-- Write and debug code in multiple languages
-- Explain algorithms and programming concepts
-- Help design software architecture
-- Optimize existing code
-Please try again later when the full model capabilities are available."""
-            elif "?" in prompt:
-                return """I received your question, but I'm currently running in fallback mode.
-The full model is temporarily unavailable due to loading issues. When operational, I can provide detailed, accurate responses to questions across many topics.
-Please try again later when normal service has been restored."""
-            else:
-                return f"""Hello! I received your message: "{prompt[:30]}..."
-I'm currently operating in emergency fallback mode because the main model couldn't be loaded properly. This is likely due to a configuration issue with the deployment environment.
-The administrator should check:
-1. Whether all required dependencies are installed
-2. If the model files are correctly positioned in the repository
-3. If the tokenizer vocabulary files are accessible
-Please try again later when these issues have been resolved."""
         except Exception as e:
             logger.error(f"Error in generate: {e}")
             logger.error(traceback.format_exc())
@@ -455,22 +457,38 @@ class SimpleFallbackModel:
     def generate(self, prompt, **kwargs):
         """Generate a simple response based on prompt content"""
         import random
-        prompt_lower = prompt.lower()
-        # Select response category based on prompt content
-        if any(word in prompt_lower for word in ["hello", "hi", "hey", "greetings"]):
-            response_type = "greeting"
-        elif "?" in prompt:
-            response_type = "question"
-        elif any(word in prompt_lower for word in ["code", "program", "function", "error", "bug", "algorithm"]):
-            response_type = "code"
-        else:
-            response_type = "default"
-        # Get a random response from that category
-        response = random.choice(self.responses[response_type])
-        # Add additional fallback explanation
         response += "\n\nThe system is experiencing issues loading the full model capabilities. Please try again later."
         return response

 from typing import Dict, Any, Optional, List
 import importlib.util
+# IMPROVED MOCKING STRATEGY: Pre-check and create more complete mock modules
+# These need to be defined BEFORE any imports that might require them
+class CompleteModelLoader:
+    """A utility to ensure model loading succeeds by properly handling dependencies"""
+    @staticmethod
+    def setup_environment():
+        """Set up the environment to ensure model loading succeeds"""
+        # First, create more comprehensive mocks for critical dependencies
+        if 'pydantic' not in sys.modules:
+            # Create a more complete mock for pydantic
+            class BaseModel:
+                def __init__(self, **kwargs):
+                    for key, value in kwargs.items():
+                        setattr(self, key, value)
+                @classmethod
+                def model_validate(cls, obj, **kwargs):
+                    return cls(**obj)
+            class Field:
+                def __call__(self, *args, **kwargs):
+                    return None
+                def __new__(cls, *args, **kwargs):
+                    return None
+            class MockPydantic:
+                BaseModel = BaseModel
+                Field = Field
+                ValidationError = Exception
+                ConfigDict = type('ConfigDict', (), {})
+                class ConfigError(Exception):
+                    pass
+            sys.modules['pydantic'] = MockPydantic
+            print("Created comprehensive mock for pydantic")
+        if 'codecarbon' not in sys.modules:
+            # Create a more complete mock for codecarbon
+            class EmissionsTracker:
+                def __init__(self, *args, **kwargs):
+                    pass
+                def start(self):
+                    return self
+                def stop(self):
+                    return 0.0
+            class MockCodecarbon:
+                EmissionsTracker = EmissionsTracker
+            sys.modules['codecarbon'] = MockCodecarbon
+            print("Created comprehensive mock for codecarbon")
+        # Now ensure that these modules are found when imported
+        return True
+    @staticmethod
+    def preload_models():
+        """Preload model modules to ensure they're available"""
+        import importlib
+        # List of model modules that need to be available
+        model_modules = ["model_Combn", "model_Custm", "model_PrTr"]
+        loaded_modules = []
+        for module_name in model_modules:
+            try:
+                # First check if the module already exists
+                if module_name in sys.modules:
+                    print(f"Module {module_name} already loaded")
+                    loaded_modules.append(module_name)
+                    continue
+                # Try to import the module directly
+                module = importlib.import_module(module_name)
+                loaded_modules.append(module_name)
+                print(f"Successfully loaded {module_name}")
+            except ImportError as e:
+                # If direct import fails, check if the module file exists
+                module_path = None
+                # Check standard paths
+                potential_paths = [
+                    f"{module_name}.py",
+                    os.path.join(os.getcwd(), f"{module_name}.py"),
+                    os.path.join("/repository", f"{module_name}.py")
+                ]
+                for path in potential_paths:
+                    if os.path.exists(path):
+                        module_path = path
+                        break
+                if module_path:
+                    try:
+                        # Try to load the module from file
+                        spec = importlib.util.spec_from_file_location(module_name, module_path)
+                        module = importlib.util.module_from_spec(spec)
+                        sys.modules[module_name] = module
+                        spec.loader.exec_module(module)
+                        loaded_modules.append(module_name)
+                        print(f"Loaded {module_name} from file: {module_path}")
+                    except Exception as e2:
+                        print(f"Error loading {module_name} from file: {e2}")
+                else:
+                    print(f"Could not find module file for {module_name}")
+        return loaded_modules
+# Set up the environment before any other imports
+CompleteModelLoader.setup_environment()
+# Now preload the model modules
+loaded_model_modules = CompleteModelLoader.preload_models()
+# Proceed with regular imports
+# Import dependency helpers - keep this simple
+def is_module_available(module_name):
+    try:
+        importlib.util.find_spec(module_name)
+        return True
+    except ImportError:
+        return False
 logger = logging.getLogger(__name__)
     def __init__(self, model_path: str):
         self.model_path = model_path
         self.tokenizer = None
+        self.model = None
         self.initialized = False
         # Ensure the model path is in sys.path so we can import from it
         logger.info(f"Model adapter initialized with path: {model_path}")
+        # Initialize components
         self._initialize_tokenizer()
+        self._initialize_model()
     def _initialize_tokenizer(self):
+        """Initialize tokenizer from registry or directly"""
         try:
             # Try to import from service_registry if available
+            if is_module_available('service_registry'):
                 from service_registry import registry, TOKENIZER
                 if registry.has(TOKENIZER):
                     self.tokenizer = registry.get(TOKENIZER)
                     logger.info("Retrieved tokenizer from registry")
                     return
             # Try loading from the original tokenizer.py
             if is_module_available('tokenizer'):
+                from tokenizer import TokenizerWrapper, get_tokenizer
+                self.tokenizer = get_tokenizer()
                 logger.info("Created TokenizerWrapper instance")
                 return
         except Exception as e:
             from transformers import AutoTokenizer
             models_to_try = [
+                "bert-base-uncased",
+                "distilbert-base-uncased",
+                "gpt2"
             ]
             for model_name in models_to_try:
                 try:
                     self.tokenizer = AutoTokenizer.from_pretrained(model_name)
                     logger.info(f"Using transformers AutoTokenizer with {model_name}")
                     return
                 except Exception as e:
                     logger.warning(f"Failed to load {model_name}: {e}")
         except ImportError:
             logger.warning("transformers package not available")
+            raise ImportError("No tokenizer could be initialized")
+    def _initialize_model(self):
+        """Initialize the actual model"""
         try:
+            # Check for actual model modules
+            model_modules = ["model_Combn", "model_Custm", "model_PrTr"]
+            for module_name in model_modules:
                 try:
+                    if is_module_available(module_name):
+                        module = importlib.import_module(module_name)
+                        # Look for model classes
+                        model_classes = [
+                            "Wildnerve_tlm01_Hybrid_Model",
+                            "Wildnerve_tlm01"
+                        ]
+                        for class_name in model_classes:
+                            if hasattr(module, class_name):
+                                model_class = getattr(module, class_name)
+                                # Initialize the model
+                                self.model = model_class(
+                                    vocab_size=30522,
+                                    specialization="general",
+                                    dataset_path=None,
+                                    model_name="bert-base-uncased",
+                                    embedding_dim=768,
+                                    num_heads=12,
+                                    hidden_dim=768,
+                                    num_layers=6,
+                                    output_size=768,
+                                    dropout=0.1,
+                                    max_seq_length=512,
+                                    pooling_mode="mean",
+                                    tokenizer=self.tokenizer
+                                )
+                                logger.info(f"Successfully created {class_name} from {module_name}")
+                                self.initialized = True
+                                return
                 except Exception as e:
+                    logger.warning(f"Failed to import or initialize from {module_name}: {e}")
+            # If no model was initialized, raise error
+            if self.model is None:
+                raise ImportError("No suitable model class found")
         except Exception as e:
+            logger.error(f"Failed to initialize model: {e}")
+            raise
     def generate(self, prompt: str, **kwargs) -> str:
         """Generate a response to the prompt"""
+        if not self.initialized or self.model is None:
+            raise RuntimeError("Model not initialized")
         try:
+            if hasattr(self.model, "generate_streaming"):
                 try:
+                    tokens = []
+                    for token in self.model.generate_streaming(prompt, **kwargs):
+                        tokens.append(token)
+                    return "".join(tokens)
                 except Exception as e:
+                    logger.warning(f"Streaming generation failed: {e}")
+            # Try standard generate methods
+            gen_methods = ["generate_with_decoding", "generate"]
+            for method_name in gen_methods:
+                if hasattr(self.model, method_name):
+                    try:
+                        # Tokenize the input if needed
+                        input_ids = self.tokenizer(prompt, return_tensors="pt").input_ids
+                        # Get the result
+                        method = getattr(self.model, method_name)
+                        result = method(input_ids, **kwargs)
+                        if isinstance(result, str) and result:
+                            return result
+                    except Exception as e:
+                        logger.warning(f"{method_name} failed: {e}")
+            # If we get here, try a simple direct generate method
+            return self.model.generate(prompt, **kwargs)
         except Exception as e:
             logger.error(f"Error in generate: {e}")
             logger.error(traceback.format_exc())
     def generate(self, prompt, **kwargs):
         """Generate a simple response based on prompt content"""
         import random
+        # COMPLETELY SIMPLIFIED IMPLEMENTATION:
+        # This is a critical function that must not fail
+        # Always set a default response type
+        response_type = "default"
+        # Just check for tensor and use a fixed response to prevent ANY processing errors
+        if isinstance(prompt, torch.Tensor):
+            return """I apologize, but I'm currently operating in fallback mode due to loading issues.
+The system administrator should check for missing dependencies like pydantic and codecarbon.
+Please try again later when full model capabilities are restored."""
+        # For strings, do minimal processing
+        if isinstance(prompt, str):
+            # Use the most basic string operations that can't fail
+            if "?" in prompt:
+                response_type = "question"
+            elif "code" in prompt.lower():
+                response_type = "code"
+            elif any(word in prompt.lower() for word in ["hello", "hi"]):
+                response_type = "greeting"
+        # Get a response - default if anything went wrong
+        try:
+            response = random.choice(self.responses[response_type])
+        except:
+            response = "I'm currently operating in fallback mode due to technical issues."
+        # Add standard explanation
         response += "\n\nThe system is experiencing issues loading the full model capabilities. Please try again later."
         return response

handler.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Handler.py - Enhanced version with full tokenizer support
 import os
 import sys
 import time
@@ -7,8 +7,18 @@ import traceback
 from typing import Dict, Any, List
 import importlib.util
-# --- DEBUG: confirm correct handler.py is loaded by HF endpoint ---
-print("DEBUG: using Wildnerve-tlm_HF/handler.py — update v7 with full tokenizer support")
 # Set up logging
 logging.basicConfig(
@@ -19,35 +29,23 @@ logger = logging.getLogger(__name__)
 # Make sure adapter_layer.py is properly located
 try:
-    # For more reliable importing of adapter_layer.py
     script_dir = os.path.dirname(os.path.abspath(__file__))
     sys.path.insert(0, script_dir)
-    # Try to import WildnerveModelAdapter
     from adapter_layer import WildnerveModelAdapter
     logger.info("Successfully imported adapter_layer module")
-    # Also try to import TokenizerWrapper for better tokenization
-    tokenizer_found = False
     try:
         from tokenizer import TokenizerWrapper, get_tokenizer
-        logger.info("Successfully imported TokenizerWrapper from tokenizer module")
-        tokenizer_found = True
-    except ImportError:
-        logger.warning("TokenizerWrapper not found, will use fallbacks in adapter")
 except ImportError as e:
-    logger.warning(f"Could not import adapter_layer: {e} - using inline implementation")
-    # Minimal implementations - these are fallbacks if modules can't be imported
-    exec("""
-class WildnerveModelAdapter:
-    def __init__(self, model_path):
-        self.path = model_path
-    def generate(self, prompt, **kwargs):
-        return f"Received: '{prompt[:30]}...' - Running in emergency fallback mode. Cannot load required modules."
-    """)
 class EndpointHandler:
     def __init__(self, path=""):
@@ -65,7 +63,7 @@ class EndpointHandler:
             # Handle result formatting
             if isinstance(result, list):
                 logger.info(f"Returning list result with {len(result)} items")
-                return result if result else [{"generated_text": "No output generated"}]
             elif isinstance(result, dict):
                 return [result]
             else:
@@ -82,7 +80,7 @@ class EndpointHandler:
             return True
         try:
-            # Create the adapter
             self.model_adapter = WildnerveModelAdapter(self.path)
             self.initialized = True
             return True
@@ -98,7 +96,7 @@ class EndpointHandler:
         if not self.initialized:
             success = self.initialize()
             if not success:
-                return [{"generated_text": "Failed to initialize the model. Please try again later."}]
         # Extract the prompt text
         text_input = self._extract_input_text(inputs)
@@ -124,15 +122,7 @@ class EndpointHandler:
             logger.error(f"Error during prediction: {e}")
             logger.error(traceback.format_exc())
-            # Emergency fallback response
-            fallback_message = (
-                f"I received your message: '{text_input[:30]}...' (truncated)\n\n"
-                "I apologize, but I encountered a critical error while processing your request. "
-                "The model is currently unavailable or running in emergency fallback mode.\n\n"
-                "Error details: " + str(e)
-            )
-            return [{"generated_text": fallback_message}]
     def _extract_input_text(self, inputs) -> str:
         """Extract the input text from various possible input formats"""

+# Handler.py - Entry point for Hugging Face inference API
 import os
 import sys
 import time
 from typing import Dict, Any, List
 import importlib.util
+# --- DEBUG: confirm correct handler.py is loaded ---
+print("DEBUG: using Wildnerve-tlm_HF/handler.py — update with direct dependency installation")
+# CRITICAL FIX: Install required dependencies first before any imports
+try:
+    import subprocess
+    print("Installing required dependencies...")
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "pydantic"])
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "codecarbon"])
+    print("Dependencies successfully installed")
+except Exception as e:
+    print(f"Error installing dependencies: {e}")
 # Set up logging
 logging.basicConfig(
 # Make sure adapter_layer.py is properly located
 try:
+    # For more reliable importing
     script_dir = os.path.dirname(os.path.abspath(__file__))
     sys.path.insert(0, script_dir)
     from adapter_layer import WildnerveModelAdapter
     logger.info("Successfully imported adapter_layer module")
+    # Also try to import TokenizerWrapper
     try:
         from tokenizer import TokenizerWrapper, get_tokenizer
+        logger.info("Successfully imported TokenizerWrapper")
+    except ImportError as e:
+        logger.warning(f"TokenizerWrapper not found: {e}")
 except ImportError as e:
+    logger.error(f"Could not import adapter_layer: {e}")
+    raise
 class EndpointHandler:
     def __init__(self, path=""):
             # Handle result formatting
             if isinstance(result, list):
                 logger.info(f"Returning list result with {len(result)} items")
+                return result
             elif isinstance(result, dict):
                 return [result]
             else:
             return True
         try:
+            # Create the adapter - this will load the actual model
             self.model_adapter = WildnerveModelAdapter(self.path)
             self.initialized = True
             return True
         if not self.initialized:
             success = self.initialize()
             if not success:
+                return [{"generated_text": "Failed to initialize the model."}]
         # Extract the prompt text
         text_input = self._extract_input_text(inputs)
             logger.error(f"Error during prediction: {e}")
             logger.error(traceback.format_exc())
+            return [{"generated_text": f"Error generating response: {str(e)}"}]
     def _extract_input_text(self, inputs) -> str:
         """Extract the input text from various possible input formats"""