#!/usr/bin/env python3
"""
Validation script for Advanced NLP Pipeline setup
Run this script to verify that all components are properly installed and configured.
"""

import asyncio
import sys
import time
import logging
from typing import Dict, Any, List

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def check_dependencies() -> Dict[str, bool]:
    """Check if all required dependencies are installed"""
    dependencies = {
        'spacy': False,
        'sklearn': False,
        'numpy': False,
        'sentence_transformers': False,
        'transformers': False,
        'torch': False
    }
    
    logger.info("Checking dependencies...")
    
    # Check spaCy
    try:
        import spacy
        dependencies['spacy'] = True
        logger.info("✓ spaCy installed")
    except ImportError:
        logger.error("✗ spaCy not installed")
    
    # Check scikit-learn
    try:
        import sklearn
        dependencies['sklearn'] = True
        logger.info("✓ scikit-learn installed")
    except ImportError:
        logger.error("✗ scikit-learn not installed")
    
    # Check numpy
    try:
        import numpy
        dependencies['numpy'] = True
        logger.info("✓ numpy installed")
    except ImportError:
        logger.error("✗ numpy not installed")
    
    # Check sentence-transformers
    try:
        import sentence_transformers
        dependencies['sentence_transformers'] = True
        logger.info("✓ sentence-transformers installed")
    except ImportError:
        logger.error("✗ sentence-transformers not installed")
    
    # Check transformers
    try:
        import transformers
        dependencies['transformers'] = True
        logger.info("✓ transformers installed")
    except ImportError:
        logger.error("✗ transformers not installed")
    
    # Check torch
    try:
        import torch
        dependencies['torch'] = True
        logger.info("✓ torch installed")
    except ImportError:
        logger.error("✗ torch not installed")
    
    return dependencies

def check_spacy_model() -> bool:
    """Check if spaCy model is available"""
    logger.info("Checking spaCy model...")
    
    try:
        import spacy
        nlp = spacy.load("en_core_web_sm")
        logger.info("✓ spaCy model 'en_core_web_sm' loaded successfully")
        return True
    except OSError:
        logger.error("✗ spaCy model 'en_core_web_sm' not found")
        logger.error("  Run: python -m spacy download en_core_web_sm")
        return False
    except Exception as e:
        logger.error(f"✗ Error loading spaCy model: {e}")
        return False

def check_sentence_transformer_model() -> bool:
    """Check if sentence transformer model can be loaded"""
    logger.info("Checking sentence transformer model...")
    
    try:
        from sentence_transformers import SentenceTransformer
        model = SentenceTransformer('all-MiniLM-L6-v2')
        logger.info("✓ Sentence transformer model 'all-MiniLM-L6-v2' loaded successfully")
        return True
    except Exception as e:
        logger.error(f"✗ Error loading sentence transformer model: {e}")
        logger.error("  Model will be downloaded on first use")
        return False

async def test_advanced_nlp_pipeline() -> bool:
    """Test the advanced NLP pipeline"""
    logger.info("Testing Advanced NLP Pipeline...")
    
    try:
        # Import the pipeline
        from app.services.advanced_nlp import advanced_nlp_pipeline
        
        # Test with a simple query
        test_query = "find a hair salon near me"
        start_time = time.time()
        
        result = await advanced_nlp_pipeline.process_query(test_query)
        
        processing_time = time.time() - start_time
        
        # Check if result has expected structure
        required_keys = ['query', 'primary_intent', 'entities', 'similar_services', 'search_parameters']
        missing_keys = [key for key in required_keys if key not in result]
        
        if missing_keys:
            logger.error(f"✗ Missing keys in result: {missing_keys}")
            return False
        
        logger.info(f"✓ Advanced NLP Pipeline working (processed in {processing_time:.3f}s)")
        logger.info(f"  Intent: {result['primary_intent']['intent']} (confidence: {result['primary_intent']['confidence']:.3f})")
        logger.info(f"  Entities found: {len(result['entities'])}")
        logger.info(f"  Similar services: {len(result['similar_services'])}")
        logger.info(f"  Search parameters: {len(result['search_parameters'])}")
        
        return True
        
    except ImportError as e:
        logger.error(f"✗ Cannot import Advanced NLP Pipeline: {e}")
        return False
    except Exception as e:
        logger.error(f"✗ Error testing Advanced NLP Pipeline: {e}")
        return False

async def test_individual_components() -> Dict[str, bool]:
    """Test individual NLP components"""
    logger.info("Testing individual components...")
    
    results = {
        'intent_classifier': False,
        'entity_extractor': False,
        'semantic_matcher': False,
        'context_processor': False
    }
    
    try:
        from app.services.advanced_nlp import (
            IntentClassifier, BusinessEntityExtractor, 
            SemanticMatcher, ContextAwareProcessor
        )
        
        # Test Intent Classifier
        try:
            classifier = IntentClassifier()
            intent, confidence = classifier.get_primary_intent("find a salon")
            if intent and confidence >= 0:
                results['intent_classifier'] = True
                logger.info(f"✓ Intent Classifier working (detected: {intent})")
            else:
                logger.error("✗ Intent Classifier returned invalid results")
        except Exception as e:
            logger.error(f"✗ Intent Classifier error: {e}")
        
        # Test Entity Extractor
        try:
            extractor = BusinessEntityExtractor()
            entities = extractor.extract_entities("luxury spa with parking")
            if isinstance(entities, dict):
                results['entity_extractor'] = True
                logger.info(f"✓ Entity Extractor working (found {len(entities)} entity types)")
            else:
                logger.error("✗ Entity Extractor returned invalid results")
        except Exception as e:
            logger.error(f"✗ Entity Extractor error: {e}")
        
        # Test Semantic Matcher
        try:
            matcher = SemanticMatcher()
            matches = matcher.find_similar_services("hair salon")
            if isinstance(matches, list):
                results['semantic_matcher'] = True
                logger.info(f"✓ Semantic Matcher working (found {len(matches)} matches)")
            else:
                logger.error("✗ Semantic Matcher returned invalid results")
        except Exception as e:
            logger.error(f"✗ Semantic Matcher error: {e}")
        
        # Test Context Processor
        try:
            processor = ContextAwareProcessor()
            context_result = await processor.process_with_context(
                "spa treatment", {}, [("spa", 0.9)]
            )
            if isinstance(context_result, dict):
                results['context_processor'] = True
                logger.info("✓ Context Processor working")
            else:
                logger.error("✗ Context Processor returned invalid results")
        except Exception as e:
            logger.error(f"✗ Context Processor error: {e}")
    
    except ImportError as e:
        logger.error(f"✗ Cannot import NLP components: {e}")
    
    return results

def test_configuration() -> bool:
    """Test configuration loading"""
    logger.info("Testing configuration...")
    
    try:
        from app.config.nlp_config import nlp_config
        
        # Check if configuration is accessible
        config_dict = nlp_config.get_config_dict()
        
        if isinstance(config_dict, dict) and len(config_dict) > 0:
            logger.info("✓ Configuration loaded successfully")
            logger.info(f"  Max workers: {nlp_config.ASYNC_PROCESSOR_MAX_WORKERS}")
            logger.info(f"  Cache duration: {nlp_config.CACHE_DURATION_SECONDS}s")
            logger.info(f"  Advanced NLP enabled: {nlp_config.ENABLE_ADVANCED_NLP}")
            return True
        else:
            logger.error("✗ Configuration is empty or invalid")
            return False
            
    except ImportError as e:
        logger.error(f"✗ Cannot import configuration: {e}")
        return False
    except Exception as e:
        logger.error(f"✗ Configuration error: {e}")
        return False

async def run_performance_benchmark() -> Dict[str, float]:
    """Run a simple performance benchmark"""
    logger.info("Running performance benchmark...")
    
    test_queries = [
        "find a hair salon",
        "best spa near me",
        "gym with parking",
        "luxury massage therapy",
        "dental clinic open now"
    ]
    
    try:
        from app.services.advanced_nlp import advanced_nlp_pipeline
        
        total_time = 0
        successful_queries = 0
        
        for query in test_queries:
            try:
                start_time = time.time()
                result = await advanced_nlp_pipeline.process_query(query)
                processing_time = time.time() - start_time
                
                if 'error' not in result:
                    total_time += processing_time
                    successful_queries += 1
                    logger.info(f"  '{query}' processed in {processing_time:.3f}s")
                else:
                    logger.warning(f"  '{query}' failed: {result.get('error', 'Unknown error')}")
                    
            except Exception as e:
                logger.warning(f"  '{query}' error: {e}")
        
        if successful_queries > 0:
            avg_time = total_time / successful_queries
            logger.info(f"✓ Performance benchmark completed")
            logger.info(f"  Average processing time: {avg_time:.3f}s")
            logger.info(f"  Successful queries: {successful_queries}/{len(test_queries)}")
            
            return {
                'average_time': avg_time,
                'success_rate': successful_queries / len(test_queries),
                'total_queries': len(test_queries)
            }
        else:
            logger.error("✗ No queries processed successfully")
            return {}
            
    except Exception as e:
        logger.error(f"✗ Performance benchmark failed: {e}")
        return {}

def generate_report(
    dependencies: Dict[str, bool],
    spacy_model: bool,
    sentence_model: bool,
    pipeline_test: bool,
    component_tests: Dict[str, bool],
    config_test: bool,
    performance: Dict[str, float]
) -> None:
    """Generate a comprehensive validation report"""
    
    print("\n" + "="*60)
    print("ADVANCED NLP PIPELINE VALIDATION REPORT")
    print("="*60)
    
    # Dependencies
    print("\n📦 DEPENDENCIES:")
    all_deps_ok = all(dependencies.values())
    for dep, status in dependencies.items():
        status_icon = "✓" if status else "✗"
        print(f"  {status_icon} {dep}")
    
    print(f"\n  Overall: {'✓ All dependencies installed' if all_deps_ok else '✗ Missing dependencies'}")
    
    # Models
    print("\n🤖 MODELS:")
    print(f"  {'✓' if spacy_model else '✗'} spaCy model (en_core_web_sm)")
    print(f"  {'✓' if sentence_model else '✗'} Sentence transformer model")
    
    # Pipeline
    print("\n🔧 PIPELINE:")
    print(f"  {'✓' if pipeline_test else '✗'} Advanced NLP Pipeline")
    
    # Components
    print("\n⚙️  COMPONENTS:")
    for component, status in component_tests.items():
        status_icon = "✓" if status else "✗"
        component_name = component.replace('_', ' ').title()
        print(f"  {status_icon} {component_name}")
    
    # Configuration
    print("\n⚙️  CONFIGURATION:")
    print(f"  {'✓' if config_test else '✗'} Configuration loading")
    
    # Performance
    print("\n⚡ PERFORMANCE:")
    if performance:
        print(f"  Average processing time: {performance.get('average_time', 0):.3f}s")
        print(f"  Success rate: {performance.get('success_rate', 0)*100:.1f}%")
        
        if performance.get('average_time', 0) < 0.5:
            print("  ✓ Good performance")
        elif performance.get('average_time', 0) < 1.0:
            print("  ⚠ Acceptable performance")
        else:
            print("  ✗ Slow performance - consider optimization")
    else:
        print("  ✗ Performance test failed")
    
    # Overall Status
    print("\n" + "="*60)
    
    overall_status = (
        all_deps_ok and spacy_model and pipeline_test and 
        all(component_tests.values()) and config_test
    )
    
    if overall_status:
        print("🎉 OVERALL STATUS: ✓ READY FOR PRODUCTION")
        print("\nThe Advanced NLP Pipeline is properly installed and configured.")
        print("You can now use the enhanced natural language processing features.")
    else:
        print("⚠️  OVERALL STATUS: ✗ ISSUES FOUND")
        print("\nPlease address the issues above before using the Advanced NLP Pipeline.")
        print("The system will fall back to basic processing until issues are resolved.")
    
    # Recommendations
    print("\n📋 RECOMMENDATIONS:")
    
    if not all_deps_ok:
        print("  • Install missing dependencies: pip install -r requirements.txt")
    
    if not spacy_model:
        print("  • Download spaCy model: python -m spacy download en_core_web_sm")
    
    if not sentence_model:
        print("  • Sentence transformer model will download automatically on first use")
    
    if performance and performance.get('average_time', 0) > 0.5:
        print("  • Consider increasing ASYNC_PROCESSOR_MAX_WORKERS for better performance")
        print("  • Enable caching with longer CACHE_DURATION_SECONDS")
    
    if not all(component_tests.values()):
        print("  • Check logs above for specific component errors")
    
    print("\n" + "="*60)

async def main():
    """Main validation function"""
    print("Starting Advanced NLP Pipeline validation...")
    print("This may take a few minutes on first run due to model downloads.\n")
    
    # Run all validation checks
    dependencies = check_dependencies()
    spacy_model = check_spacy_model()
    sentence_model = check_sentence_transformer_model()
    pipeline_test = await test_advanced_nlp_pipeline()
    component_tests = await test_individual_components()
    config_test = test_configuration()
    performance = await run_performance_benchmark()
    
    # Generate comprehensive report
    generate_report(
        dependencies, spacy_model, sentence_model, 
        pipeline_test, component_tests, config_test, performance
    )
    
    # Return exit code
    overall_success = (
        all(dependencies.values()) and spacy_model and pipeline_test and 
        all(component_tests.values()) and config_test
    )
    
    return 0 if overall_success else 1

if __name__ == "__main__":
    try:
        exit_code = asyncio.run(main())
        sys.exit(exit_code)
    except KeyboardInterrupt:
        print("\n\nValidation interrupted by user.")
        sys.exit(1)
    except Exception as e:
        print(f"\n\nUnexpected error during validation: {e}")
        sys.exit(1)