"""
Test script to validate the crawler module and system components.
"""

import sys
from src.crawler import DocumentationCrawler


def test_crawler():
    """Test the crawler with a simple website."""
    print("=" * 60)
    print("Testing Crawler Module")
    print("=" * 60)
    
    # Test with a simple, stable website
    test_urls = [
        "https://example.com",
        "https://httpbin.org/html"
    ]
    
    for test_url in test_urls:
        print(f"\nTesting crawler on: {test_url}")
        try:
            crawler = DocumentationCrawler(
                base_url=test_url,
                max_depth=1,
                delay=0.5,
                max_pages=5
            )
            
            print(f"  Max depth: 1")
            print(f"  Max pages: 5")
            print(f"  Request delay: 0.5s")
            
            documents = crawler.crawl()
            
            print(f"\n  ✓ Successfully crawled!")
            print(f"    - Documents: {len(documents)}")
            print(f"    - Failed URLs: {len(crawler.failed_urls)}")
            
            if documents:
                print(f"\n  Sample document:")
                doc = documents[0]
                print(f"    URL: {doc['url']}")
                print(f"    Content length: {len(doc['content'])} chars")
                print(f"    Preview: {doc['content'][:200]}...")
            
        except Exception as e:
            print(f"  ✗ Error: {e}")
            import traceback
            traceback.print_exc()


def test_crawler_normalization():
    """Test crawler URL normalization and filtering logic."""
    crawler = DocumentationCrawler(base_url="https://example.com", respect_robots_txt=False)

    normalized = crawler._normalize_url("https://Example.com/path/?utm_source=test#section")
    assert normalized == "https://example.com/path"

    sorted_query = crawler._normalize_url("https://example.com/path/?b=2&a=1")
    assert sorted_query == "https://example.com/path?a=1&b=2"

    assert not crawler._should_crawl_url("javascript:alert(1)")
    assert not crawler._should_crawl_url("https://example.org/other")
    assert not crawler._should_crawl_url("https://example.com/image.jpg")
    assert crawler._should_crawl_url("https://example.com/docs")


def test_imports():
    """Test that all required modules can be imported."""
    print("\n" + "=" * 60)
    print("Testing Imports")
    print("=" * 60)
    
    modules_to_test = [
        ("langchain", "LangChain"),
        ("langchain_community", "LangChain Community"),
        ("langchain_ollama", "LangChain Ollama"),
        ("chromadb", "ChromaDB"),
        ("sentence_transformers", "Sentence Transformers"),
        ("gradio", "Gradio"),
        ("fastapi", "FastAPI"),
        ("uvicorn", "Uvicorn"),
        ("bs4", "BeautifulSoup4"),
        ("requests", "Requests"),
    ]
    
    all_ok = True
    for module_name, display_name in modules_to_test:
        try:
            __import__(module_name)
            print(f"  ✓ {display_name}")
        except ImportError as e:
            print(f"  ✗ {display_name}: {e}")
            all_ok = False
    
    return all_ok


def test_app_imports():
    """Test app module imports."""
    print("\n" + "=" * 60)
    print("Testing App Module Imports")
    print("=" * 60)
    
    try:
        from src.app_enhanced import (
            answer_question,
            index_crawler_results,
            load_documents_from_crawler
        )
        print("  ✓ Enhanced app_enhanced.py imports")
    except Exception as e:
        print(f"  ✗ Enhanced app_enhanced.py: {e}")

    try:
        from src.crawler import DocumentationCrawler
        print("  ✓ Crawler module imports")
    except Exception as e:
        print(f"  ✗ Crawler: {e}")


if __name__ == "__main__":
    print("\n🔍 RAG System Test Suite\n")
    
    # Test imports first
    if not test_imports():
        print("\n⚠️  Some dependencies are missing. Run: pip install -r requiements.txt")
    
    test_app_imports()
    
    # Optionally test crawler (disabled by default as it makes network calls)
    if len(sys.argv) > 1 and sys.argv[1] == "--crawl":
        test_crawler()
    else:
        print("\n" + "=" * 60)
        print("Crawler Test (Skipped)")
        print("=" * 60)
        print("To test crawler connectivity, run: python test_system.py --crawl")
    
    print("\n✓ All tests completed!\n")