from app.services.filter_classifier import FilterClassifier
classifier = FilterClassifier()

from typing import Optional, List, Dict, Any
from dataclasses import dataclass

@dataclass
class TestCase:
    name: str
    question: str
    expected_type: Optional[str] = None
    expected_category: Optional[str] = None
    expected_topic: Optional[str] = None
    expected_year: Optional[int] = None
    expected_intent: Optional[str] = None
    notes: str = ""  # Why this test matters

# ============================================================================
# BATCH TEST SUITE - Organized by weakness category
# ============================================================================

TEST_SUITE = [
    # ─────────────────────────────────────────────────────────────────────────
    # GROUP 1: Basic Sanity (Should Pass)
    # ─────────────────────────────────────────────────────────────────────────
    TestCase("basic_dept_list", "show me all departments", 
             "department", None, None, None, "list",
             "Basic type detection + list intent"),
    
    TestCase("basic_hostel_fees", "what are the hostel fees",
             "hostel", None, "fees", None, None,
             "Type + topic combo"),
    
    TestCase("basic_library", "library books catalog",
             "library", None, None, None, None,
             "Direct type match"),
    
    TestCase("basic_placement_stats_year", "campus placement statistics 2023",
             "placement", None, "stats", 2023, None,
             "Type + topic + year extraction"),
    
    # ─────────────────────────────────────────────────────────────────────────
    # GROUP 2: Abbreviation Expansion (Critical)
    # ─────────────────────────────────────────────────────────────────────────
    TestCase("abbr_ce", "CE department faculty",
             "department", "computer", "faculty", None, None,
             "'ce' -> computer engineering"),
    
    TestCase("abbr_cse_ds", "CSE DS syllabus",
             "department", "cse_ds", "syllabus", None, None,
             "'cse' + 'ds' -> cse_ds"),
    
    TestCase("abbr_ece_missing", "ECE lab equipment",  # ⚠️ WILL FAIL - not in ABBREVIATIONS
             "department", "electronics_comm", "lab", None, None,
             "'ece' NOT in ABBREVIATIONS dict - expect failure"),
    
    TestCase("abbr_ei", "EI sensors calibration",
             "department", "electronics_inst", None, None, None,
             "'ei' -> electronics instrumentation"),
    
    TestCase("abbr_it", "IT department placement stats",
             "department", "it", "stats", None, None,
             "'it' -> information technology"),
    
    TestCase("abbr_me", "ME workshop schedule",
             "department", "mechanical", None, None, None,
             "'me' -> mechanical"),
    
    TestCase("abbr_am", "AM stress analysis project",
             "department", "applied_mechanics", "project", None, None,
             "'am' -> applied mechanics"),
    
    TestCase("abbr_ict", "ICT networking lab",
             "department", "ict", "lab", None, None,
             "'ict' in ABBREVIATIONS"),
    
    # ─────────────────────────────────────────────────────────────────────────
    # GROUP 3: Type/Category/Topic Conflicts (Ambiguity Hell)
    # ─────────────────────────────────────────────────────────────────────────
    TestCase("conflict_club_type_vs_cat", "adventure club activities",
             "club", "adventure", None, None, None,
             "'club' word in TYPE, 'adventure' in CATEGORY"),
    
    TestCase("conflict_ieee_club", "join the ieee club",
             "club", "ieee", None, None, None,
             "ieee category + club type word"),
    
    TestCase("conflict_event_ambiguity", "technical events this month",
             None, None, "event", None, None,
             "'events' = topic, but clubs host events - type confusion risk"),
    
    TestCase("conflict_process_topic", "what is the admission process",  # ⚠️ CRITICAL
             "admission", None, "process", None, None,
             "'process' = TOPIC (procedure) - NOT intent"),
    
    TestCase("conflict_process_intent", "how to apply step by step process",  # ⚠️ CRITICAL
             "admission", None, None, None, "process",
             "'process' = INTENT (how to) - same word, different meaning!"),
    
    TestCase("conflict_fees_finance", "finance department fees structure",
             "service", "finance", "fees", None, None,
             "'finance' category + 'fees' topic overlap"),
    
    # ─────────────────────────────────────────────────────────────────────────
    # GROUP 4: Semantic Similarity Traps
    # ─────────────────────────────────────────────────────────────────────────
    TestCase("trap_computer_vs_cseds_hardware", "computer hardware VLSI design",
             "department", "computer", None, None, None,
             "MUST be 'computer' (VLSI), NOT cse_ds despite 'computer'"),
    
    TestCase("trap_cseds_ml", "computer science machine learning course",
             "department", "cse_ds", None, None, None,
             "'computer science' + 'ML' = cse_ds (ML in cse_ds anchors)"),
    
    TestCase("trap_ece_vs_ei_wireless", "electronics wireless communication",
             "department", "electronics_comm", None, None, None,
             "'wireless' = ECE anchor"),
    
    TestCase("trap_ece_vs_ei_sensors", "electronics sensor measurement biomedical",
             "department", "electronics_inst", None, None, None,
             "'sensors' + 'biomedical' = EI anchor"),
    
    TestCase("trap_it_vs_ict_software", "information technology software development ERP",
             "department", "it", None, None, None,
             "'software' + 'ERP' = IT anchor"),
    
    TestCase("trap_it_vs_ict_fiber", "information communication fiber optic bandwidth",
             "department", "ict", None, None, None,
             "'fiber optic' = ICT anchor"),
    
    # ─────────────────────────────────────────────────────────────────────────
    # GROUP 5: Intent Detection
    # ─────────────────────────────────────────────────────────────────────────
    TestCase("intent_list_explicit", "list all mechanical engineering labs",
             "department", "mechanical", "lab", None, "list",
             "Explicit 'list' keyword"),
    
    TestCase("intent_list_implicit", "show every available facility",
             "facility", None, None, None, "list",
             "Implicit list: 'show every'"),
    
    TestCase("intent_count_how_many", "how many students placed in 2024",
             "placement", None, None, 2024, "count",
             "'how many' = count intent"),
    
    TestCase("intent_count_total", "total number of hostels",
             "hostel", None, None, None, "count",
             "'total number' = count intent"),
    
    TestCase("intent_detail_explain", "explain the civil engineering syllabus",
             "department", "civil", "syllabus", None, "detail",
             "'explain' = detail intent"),
    
    TestCase("intent_detail_tell_me", "tell me about research publications",
             "research", "publication", None, None, "detail",
             "'tell me about' = detail intent"),
    
    TestCase("intent_greeting_hello", "hello good morning",
             None, None, None, None, "greeting",
             "Pure greeting - no other fields"),
    
    TestCase("intent_greeting_casual", "hi, how are you",
             None, None, None, None, "greeting",
             "Casual greeting"),
    
    # ─────────────────────────────────────────────────────────────────────────
    # GROUP 6: Year Extraction Edge Cases
    # ─────────────────────────────────────────────────────────────────────────
    TestCase("year_simple", "placement records 2023",
             None, None, None, 2023, None,
             "Standard year extraction"),
    
    TestCase("year_batch_of", "batch of 2024 students",
             None, None, None, 2024, None,
             "'batch of' prefix"),
    
    TestCase("year_explicit", "year 2025 admission process",
             "admission", None, None, 2025, None,
             "Explicit 'year' keyword"),
    
    TestCase("year_ordinal_fail", "2nd year computer science",  # ⚠️ LIKELY FAIL
             "department", "cse_ds", None, None, None,
             "'2nd year' - NO year should be extracted (regex is \\b20\\d{2}\\b)"),
    
    TestCase("year_range_first", "2020-2024 batch",  # ⚠️ AMBIGUOUS
             None, None, None, 2020, None,
             "Range - regex finds first 20xx (2020)"),
    
    TestCase("year_written_fail", "twenty twenty three",  # ⚠️ WILL FAIL
             None, None, None, None, None,
             "Written year - regex only matches digits"),
    
    # ─────────────────────────────────────────────────────────────────────────
    # GROUP 7: MASTER_INDEX Constraint Violations
    # ─────────────────────────────────────────────────────────────────────────
    TestCase("master_invalid_cat_for_type", "hostel admission process",
             "hostel", None, None, None, None,
             "'admission' invalid category for 'hostel' type -> should be None"),
    
    TestCase("master_invalid_topic_for_type", "placement syllabus",
             "placement", None, None, None, None,
             "'syllabus' invalid topic for 'placement' -> should be None"),
    
    TestCase("master_invalid_topic_research", "research timetable",
             "research", None, None, None, None,
             "'timetable' invalid for 'research' type -> should be None"),
    
    # ─────────────────────────────────────────────────────────────────────────
    # GROUP 8: Complex Real-World Queries
    # ─────────────────────────────────────────────────────────────────────────
    TestCase("complex_bonafide", "how to get bonafide certificate from finance office",
             "service", "finance", "document", None, "process",
             "4-field complex: service+finance+document+process"),
    
    TestCase("complex_five_fields", "show me the 2024 civil engineering final year project list",
             "department", "civil", "project", 2024, "list",
             "5 fields: type+category+topic+year+intent"),
    
    TestCase("complex_nss", "NSS volunteer induction schedule 2023",
             "club", "nss", "induction", 2023, None,
             "4 fields: club+nss+induction+year"),
    
    TestCase("vague_bus", "when does the bus leave",
             "service", "transport", None, None, None,
             "Short query - transport detection"),
    
    TestCase("ambiguous_hod", "where can I find the HOD",
             None, None, "contact", None, None,
             "Ambiguous: HOD in faculty topic, but asking contact/location"),
    
    # ─────────────────────────────────────────────────────────────────────────
    # GROUP 9: Adversarial / Edge Cases
    # ─────────────────────────────────────────────────────────────────────────
    TestCase("edge_empty", "",
             None, None, None, None, None,
             "Empty string - all None expected"),
    
    TestCase("edge_nonsense", "the quick brown fox",
             None, None, None, None, None,
             "No relevant keywords"),
    
    TestCase("edge_all_caps", "LIBRARY BOOKS",
             "library", None, None, None, None,
             "Case sensitivity test"),
    
    TestCase("edge_mixed_case", "Ce DePaRtMeNt",
             "department", "computer", None, None, None,
             "Mixed case abbreviation"),
    
    TestCase("edge_punctuation", "civil-engineering!!! lab???",
             "department", "civil", "lab", None, None,
             "Punctuation handling"),
    
    TestCase("edge_parentheses", "cse_ds (data science) syllabus",
             "department", "cse_ds", "syllabus", None, None,
             "Parentheses handling"),
]


# ============================================================================
# BATCH PROCESSING ENGINE
# ============================================================================

def run_batch_tests(test_cases: List[TestCase]) -> Dict[str, Any]:
    """
    Process all tests in batch, return structured results.
    """
    results = []
    
    for test in test_cases:
        # Run classifier
        actual = classifier.classify(test.question)
        
        # Check each expected field
        errors = []
        fields = [
            ("type", test.expected_type),
            ("category", test.expected_category),
            ("topic", test.expected_topic),
            ("year", test.expected_year),
            ("intent", test.expected_intent),
        ]
        
        for field_name, expected in fields:
            if expected is not None and actual[field_name] != expected:
                errors.append({
                    "field": field_name,
                    "expected": expected,
                    "actual": actual[field_name]
                })
        
        passed = len(errors) == 0
        
        results.append({
            "name": test.name,
            "question": test.question,
            "notes": test.notes,
            "expected": {
                "type": test.expected_type,
                "category": test.expected_category,
                "topic": test.expected_topic,
                "year": test.expected_year,
                "intent": test.expected_intent,
            },
            "actual": actual,
            "passed": passed,
            "errors": errors
        })
    
    return analyze_results(results)


def analyze_results(results: List[Dict]) -> Dict[str, Any]:
    """
    Generate comprehensive analysis report.
    """
    total = len(results)
    passed = sum(1 for r in results if r["passed"])
    failed = total - passed
    
    # Error breakdown by field
    field_errors = {"type": 0, "category": 0, "topic": 0, "year": 0, "intent": 0}
    for r in results:
        for err in r["errors"]:
            field_errors[err["field"]] += 1
    
    # Categorize failures
    critical_failures = [r for r in results if not r["passed"] and "CRITICAL" in r["notes"]]
    abbr_failures = [r for r in results if not r["passed"] and "abbrev" in r["name"].lower()]
    ambiguity_failures = [r for r in results if not r["passed"] and "conflict" in r["name"].lower()]
    
    return {
        "summary": {
            "total": total,
            "passed": passed,
            "failed": failed,
            "pass_rate": f"{passed/total*100:.1f}%" if total > 0 else "0%"
        },
        "field_error_rates": field_errors,
        "critical_issues": {
            "count": len(critical_failures),
            "tests": [r["name"] for r in critical_failures]
        },
        "abbreviation_issues": {
            "count": len(abbr_failures),
            "tests": [r["name"] for r in abbr_failures]
        },
        "ambiguity_issues": {
            "count": len(ambiguity_failures),
            "tests": [r["name"] for r in ambiguity_failures]
        },
        "all_results": results,
        "failed_tests": [r for r in results if not r["passed"]]
    }


def print_report(report: Dict[str, Any], verbose: bool = False):
    """
    Pretty print the test report.
    """
    s = report["summary"]
    print(f"\n{'='*70}")
    print(f"BATCH TEST RESULTS: {s['passed']}/{s['total']} passed ({s['pass_rate']})")
    print(f"{'='*70}")
    
    print(f"\nFIELD ERROR BREAKDOWN:")
    for field, count in report["field_error_rates"].items():
        if count > 0:
            print(f"   {field:12s}: {count} errors")
    
    if report["critical_issues"]["count"] > 0:
        print(f"\nCRITICAL ISSUES ({report['critical_issues']['count']}):")
        for name in report["critical_issues"]["tests"]:
            print(f"   • {name}")
    
    if report["abbreviation_issues"]["count"] > 0:
        print(f"\nABBREVIATION FAILURES ({report['abbreviation_issues']['count']}):")
        for name in report["abbreviation_issues"]["tests"]:
            print(f"   • {name}")
    
    if report["ambiguity_issues"]["count"] > 0:
        print(f"\nAMBIGUITY FAILURES ({report['ambiguity_issues']['count']}):")
        for name in report["ambiguity_issues"]["tests"]:
            print(f"   • {name}")
    
    if verbose and report["failed_tests"]:
        print(f"\n{'='*70}")
        print("DETAILED FAILURE LOG:")
        print(f"{'='*70}")
        for r in report["failed_tests"]:
            print(f"\n[X] {r['name']}")
            print(f"   Query: '{r['question']}'")
            print(f"   Note:  {r['notes']}")
            print(f"   Expected: {r['expected']}")
            print(f"   Actual:   {r['actual']}")
            for err in r["errors"]:
                print(f"   [!] {err['field']}: expected '{err['expected']}', got '{err['actual']}'")
    
    print(f"\n{'='*70}")


# ============================================================================
# USAGE
# ============================================================================

# Run all tests
report = run_batch_tests(TEST_SUITE)

# Print summary
print_report(report, verbose=True)

# Access specific results
# print(report["failed_tests"])  # List of all failures
# print(report["field_error_rates"])  # Error counts per field

# Run specific group only
# group_2_tests = [t for t in TEST_SUITE if t.name.startswith("abbr_")]
# abbr_report = run_batch_tests(group_2_tests)
# print_report(abbr_report)