#!/usr/bin/env python3 """ Validation script for Advanced NLP Pipeline setup Run this script to verify that all components are properly installed and configured. """ import asyncio import sys import time import logging from typing import Dict, Any, List # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) def check_dependencies() -> Dict[str, bool]: """Check if all required dependencies are installed""" dependencies = { 'spacy': False, 'sklearn': False, 'numpy': False, 'sentence_transformers': False, 'transformers': False, 'torch': False } logger.info("Checking dependencies...") # Check spaCy try: import spacy dependencies['spacy'] = True logger.info("✓ spaCy installed") except ImportError: logger.error("✗ spaCy not installed") # Check scikit-learn try: import sklearn dependencies['sklearn'] = True logger.info("✓ scikit-learn installed") except ImportError: logger.error("✗ scikit-learn not installed") # Check numpy try: import numpy dependencies['numpy'] = True logger.info("✓ numpy installed") except ImportError: logger.error("✗ numpy not installed") # Check sentence-transformers try: import sentence_transformers dependencies['sentence_transformers'] = True logger.info("✓ sentence-transformers installed") except ImportError: logger.error("✗ sentence-transformers not installed") # Check transformers try: import transformers dependencies['transformers'] = True logger.info("✓ transformers installed") except ImportError: logger.error("✗ transformers not installed") # Check torch try: import torch dependencies['torch'] = True logger.info("✓ torch installed") except ImportError: logger.error("✗ torch not installed") return dependencies def check_spacy_model() -> bool: """Check if spaCy model is available""" logger.info("Checking spaCy model...") try: import spacy nlp = spacy.load("en_core_web_sm") logger.info("✓ spaCy model 'en_core_web_sm' loaded successfully") return True except OSError: logger.error("✗ spaCy model 'en_core_web_sm' not found") logger.error(" Run: python -m spacy download en_core_web_sm") return False except Exception as e: logger.error(f"✗ Error loading spaCy model: {e}") return False def check_sentence_transformer_model() -> bool: """Check if sentence transformer model can be loaded""" logger.info("Checking sentence transformer model...") try: from sentence_transformers import SentenceTransformer model = SentenceTransformer('all-MiniLM-L6-v2') logger.info("✓ Sentence transformer model 'all-MiniLM-L6-v2' loaded successfully") return True except Exception as e: logger.error(f"✗ Error loading sentence transformer model: {e}") logger.error(" Model will be downloaded on first use") return False async def test_advanced_nlp_pipeline() -> bool: """Test the advanced NLP pipeline""" logger.info("Testing Advanced NLP Pipeline...") try: # Import the pipeline from app.services.advanced_nlp import advanced_nlp_pipeline # Test with a simple query test_query = "find a hair salon near me" start_time = time.time() result = await advanced_nlp_pipeline.process_query(test_query) processing_time = time.time() - start_time # Check if result has expected structure required_keys = ['query', 'primary_intent', 'entities', 'similar_services', 'search_parameters'] missing_keys = [key for key in required_keys if key not in result] if missing_keys: logger.error(f"✗ Missing keys in result: {missing_keys}") return False logger.info(f"✓ Advanced NLP Pipeline working (processed in {processing_time:.3f}s)") logger.info(f" Intent: {result['primary_intent']['intent']} (confidence: {result['primary_intent']['confidence']:.3f})") logger.info(f" Entities found: {len(result['entities'])}") logger.info(f" Similar services: {len(result['similar_services'])}") logger.info(f" Search parameters: {len(result['search_parameters'])}") return True except ImportError as e: logger.error(f"✗ Cannot import Advanced NLP Pipeline: {e}") return False except Exception as e: logger.error(f"✗ Error testing Advanced NLP Pipeline: {e}") return False async def test_individual_components() -> Dict[str, bool]: """Test individual NLP components""" logger.info("Testing individual components...") results = { 'intent_classifier': False, 'entity_extractor': False, 'semantic_matcher': False, 'context_processor': False } try: from app.services.advanced_nlp import ( IntentClassifier, BusinessEntityExtractor, SemanticMatcher, ContextAwareProcessor ) # Test Intent Classifier try: classifier = IntentClassifier() intent, confidence = classifier.get_primary_intent("find a salon") if intent and confidence >= 0: results['intent_classifier'] = True logger.info(f"✓ Intent Classifier working (detected: {intent})") else: logger.error("✗ Intent Classifier returned invalid results") except Exception as e: logger.error(f"✗ Intent Classifier error: {e}") # Test Entity Extractor try: extractor = BusinessEntityExtractor() entities = extractor.extract_entities("luxury spa with parking") if isinstance(entities, dict): results['entity_extractor'] = True logger.info(f"✓ Entity Extractor working (found {len(entities)} entity types)") else: logger.error("✗ Entity Extractor returned invalid results") except Exception as e: logger.error(f"✗ Entity Extractor error: {e}") # Test Semantic Matcher try: matcher = SemanticMatcher() matches = matcher.find_similar_services("hair salon") if isinstance(matches, list): results['semantic_matcher'] = True logger.info(f"✓ Semantic Matcher working (found {len(matches)} matches)") else: logger.error("✗ Semantic Matcher returned invalid results") except Exception as e: logger.error(f"✗ Semantic Matcher error: {e}") # Test Context Processor try: processor = ContextAwareProcessor() context_result = await processor.process_with_context( "spa treatment", {}, [("spa", 0.9)] ) if isinstance(context_result, dict): results['context_processor'] = True logger.info("✓ Context Processor working") else: logger.error("✗ Context Processor returned invalid results") except Exception as e: logger.error(f"✗ Context Processor error: {e}") except ImportError as e: logger.error(f"✗ Cannot import NLP components: {e}") return results def test_configuration() -> bool: """Test configuration loading""" logger.info("Testing configuration...") try: from app.config.nlp_config import nlp_config # Check if configuration is accessible config_dict = nlp_config.get_config_dict() if isinstance(config_dict, dict) and len(config_dict) > 0: logger.info("✓ Configuration loaded successfully") logger.info(f" Max workers: {nlp_config.ASYNC_PROCESSOR_MAX_WORKERS}") logger.info(f" Cache duration: {nlp_config.CACHE_DURATION_SECONDS}s") logger.info(f" Advanced NLP enabled: {nlp_config.ENABLE_ADVANCED_NLP}") return True else: logger.error("✗ Configuration is empty or invalid") return False except ImportError as e: logger.error(f"✗ Cannot import configuration: {e}") return False except Exception as e: logger.error(f"✗ Configuration error: {e}") return False async def run_performance_benchmark() -> Dict[str, float]: """Run a simple performance benchmark""" logger.info("Running performance benchmark...") test_queries = [ "find a hair salon", "best spa near me", "gym with parking", "luxury massage therapy", "dental clinic open now" ] try: from app.services.advanced_nlp import advanced_nlp_pipeline total_time = 0 successful_queries = 0 for query in test_queries: try: start_time = time.time() result = await advanced_nlp_pipeline.process_query(query) processing_time = time.time() - start_time if 'error' not in result: total_time += processing_time successful_queries += 1 logger.info(f" '{query}' processed in {processing_time:.3f}s") else: logger.warning(f" '{query}' failed: {result.get('error', 'Unknown error')}") except Exception as e: logger.warning(f" '{query}' error: {e}") if successful_queries > 0: avg_time = total_time / successful_queries logger.info(f"✓ Performance benchmark completed") logger.info(f" Average processing time: {avg_time:.3f}s") logger.info(f" Successful queries: {successful_queries}/{len(test_queries)}") return { 'average_time': avg_time, 'success_rate': successful_queries / len(test_queries), 'total_queries': len(test_queries) } else: logger.error("✗ No queries processed successfully") return {} except Exception as e: logger.error(f"✗ Performance benchmark failed: {e}") return {} def generate_report( dependencies: Dict[str, bool], spacy_model: bool, sentence_model: bool, pipeline_test: bool, component_tests: Dict[str, bool], config_test: bool, performance: Dict[str, float] ) -> None: """Generate a comprehensive validation report""" print("\n" + "="*60) print("ADVANCED NLP PIPELINE VALIDATION REPORT") print("="*60) # Dependencies print("\n📦 DEPENDENCIES:") all_deps_ok = all(dependencies.values()) for dep, status in dependencies.items(): status_icon = "✓" if status else "✗" print(f" {status_icon} {dep}") print(f"\n Overall: {'✓ All dependencies installed' if all_deps_ok else '✗ Missing dependencies'}") # Models print("\n🤖 MODELS:") print(f" {'✓' if spacy_model else '✗'} spaCy model (en_core_web_sm)") print(f" {'✓' if sentence_model else '✗'} Sentence transformer model") # Pipeline print("\n🔧 PIPELINE:") print(f" {'✓' if pipeline_test else '✗'} Advanced NLP Pipeline") # Components print("\n⚙️ COMPONENTS:") for component, status in component_tests.items(): status_icon = "✓" if status else "✗" component_name = component.replace('_', ' ').title() print(f" {status_icon} {component_name}") # Configuration print("\n⚙️ CONFIGURATION:") print(f" {'✓' if config_test else '✗'} Configuration loading") # Performance print("\n⚡ PERFORMANCE:") if performance: print(f" Average processing time: {performance.get('average_time', 0):.3f}s") print(f" Success rate: {performance.get('success_rate', 0)*100:.1f}%") if performance.get('average_time', 0) < 0.5: print(" ✓ Good performance") elif performance.get('average_time', 0) < 1.0: print(" ⚠ Acceptable performance") else: print(" ✗ Slow performance - consider optimization") else: print(" ✗ Performance test failed") # Overall Status print("\n" + "="*60) overall_status = ( all_deps_ok and spacy_model and pipeline_test and all(component_tests.values()) and config_test ) if overall_status: print("🎉 OVERALL STATUS: ✓ READY FOR PRODUCTION") print("\nThe Advanced NLP Pipeline is properly installed and configured.") print("You can now use the enhanced natural language processing features.") else: print("⚠️ OVERALL STATUS: ✗ ISSUES FOUND") print("\nPlease address the issues above before using the Advanced NLP Pipeline.") print("The system will fall back to basic processing until issues are resolved.") # Recommendations print("\n📋 RECOMMENDATIONS:") if not all_deps_ok: print(" • Install missing dependencies: pip install -r requirements.txt") if not spacy_model: print(" • Download spaCy model: python -m spacy download en_core_web_sm") if not sentence_model: print(" • Sentence transformer model will download automatically on first use") if performance and performance.get('average_time', 0) > 0.5: print(" • Consider increasing ASYNC_PROCESSOR_MAX_WORKERS for better performance") print(" • Enable caching with longer CACHE_DURATION_SECONDS") if not all(component_tests.values()): print(" • Check logs above for specific component errors") print("\n" + "="*60) async def main(): """Main validation function""" print("Starting Advanced NLP Pipeline validation...") print("This may take a few minutes on first run due to model downloads.\n") # Run all validation checks dependencies = check_dependencies() spacy_model = check_spacy_model() sentence_model = check_sentence_transformer_model() pipeline_test = await test_advanced_nlp_pipeline() component_tests = await test_individual_components() config_test = test_configuration() performance = await run_performance_benchmark() # Generate comprehensive report generate_report( dependencies, spacy_model, sentence_model, pipeline_test, component_tests, config_test, performance ) # Return exit code overall_success = ( all(dependencies.values()) and spacy_model and pipeline_test and all(component_tests.values()) and config_test ) return 0 if overall_success else 1 if __name__ == "__main__": try: exit_code = asyncio.run(main()) sys.exit(exit_code) except KeyboardInterrupt: print("\n\nValidation interrupted by user.") sys.exit(1) except Exception as e: print(f"\n\nUnexpected error during validation: {e}") sys.exit(1)