Spaces:
Sleeping
Sleeping
| from app.services.filter_classifier import FilterClassifier | |
| classifier = FilterClassifier() | |
| from typing import Optional, List, Dict, Any | |
| from dataclasses import dataclass | |
| class TestCase: | |
| name: str | |
| question: str | |
| expected_type: Optional[str] = None | |
| expected_category: Optional[str] = None | |
| expected_topic: Optional[str] = None | |
| expected_year: Optional[int] = None | |
| expected_intent: Optional[str] = None | |
| notes: str = "" # Why this test matters | |
| # ============================================================================ | |
| # BATCH TEST SUITE - Organized by weakness category | |
| # ============================================================================ | |
| TEST_SUITE = [ | |
| # ───────────────────────────────────────────────────────────────────────── | |
| # GROUP 1: Basic Sanity (Should Pass) | |
| # ───────────────────────────────────────────────────────────────────────── | |
| TestCase("basic_dept_list", "show me all departments", | |
| "department", None, None, None, "list", | |
| "Basic type detection + list intent"), | |
| TestCase("basic_hostel_fees", "what are the hostel fees", | |
| "hostel", None, "fees", None, None, | |
| "Type + topic combo"), | |
| TestCase("basic_library", "library books catalog", | |
| "library", None, None, None, None, | |
| "Direct type match"), | |
| TestCase("basic_placement_stats_year", "campus placement statistics 2023", | |
| "placement", None, "stats", 2023, None, | |
| "Type + topic + year extraction"), | |
| # ───────────────────────────────────────────────────────────────────────── | |
| # GROUP 2: Abbreviation Expansion (Critical) | |
| # ───────────────────────────────────────────────────────────────────────── | |
| TestCase("abbr_ce", "CE department faculty", | |
| "department", "computer", "faculty", None, None, | |
| "'ce' -> computer engineering"), | |
| TestCase("abbr_cse_ds", "CSE DS syllabus", | |
| "department", "cse_ds", "syllabus", None, None, | |
| "'cse' + 'ds' -> cse_ds"), | |
| TestCase("abbr_ece_missing", "ECE lab equipment", # ⚠️ WILL FAIL - not in ABBREVIATIONS | |
| "department", "electronics_comm", "lab", None, None, | |
| "'ece' NOT in ABBREVIATIONS dict - expect failure"), | |
| TestCase("abbr_ei", "EI sensors calibration", | |
| "department", "electronics_inst", None, None, None, | |
| "'ei' -> electronics instrumentation"), | |
| TestCase("abbr_it", "IT department placement stats", | |
| "department", "it", "stats", None, None, | |
| "'it' -> information technology"), | |
| TestCase("abbr_me", "ME workshop schedule", | |
| "department", "mechanical", None, None, None, | |
| "'me' -> mechanical"), | |
| TestCase("abbr_am", "AM stress analysis project", | |
| "department", "applied_mechanics", "project", None, None, | |
| "'am' -> applied mechanics"), | |
| TestCase("abbr_ict", "ICT networking lab", | |
| "department", "ict", "lab", None, None, | |
| "'ict' in ABBREVIATIONS"), | |
| # ───────────────────────────────────────────────────────────────────────── | |
| # GROUP 3: Type/Category/Topic Conflicts (Ambiguity Hell) | |
| # ───────────────────────────────────────────────────────────────────────── | |
| TestCase("conflict_club_type_vs_cat", "adventure club activities", | |
| "club", "adventure", None, None, None, | |
| "'club' word in TYPE, 'adventure' in CATEGORY"), | |
| TestCase("conflict_ieee_club", "join the ieee club", | |
| "club", "ieee", None, None, None, | |
| "ieee category + club type word"), | |
| TestCase("conflict_event_ambiguity", "technical events this month", | |
| None, None, "event", None, None, | |
| "'events' = topic, but clubs host events - type confusion risk"), | |
| TestCase("conflict_process_topic", "what is the admission process", # ⚠️ CRITICAL | |
| "admission", None, "process", None, None, | |
| "'process' = TOPIC (procedure) - NOT intent"), | |
| TestCase("conflict_process_intent", "how to apply step by step process", # ⚠️ CRITICAL | |
| "admission", None, None, None, "process", | |
| "'process' = INTENT (how to) - same word, different meaning!"), | |
| TestCase("conflict_fees_finance", "finance department fees structure", | |
| "service", "finance", "fees", None, None, | |
| "'finance' category + 'fees' topic overlap"), | |
| # ───────────────────────────────────────────────────────────────────────── | |
| # GROUP 4: Semantic Similarity Traps | |
| # ───────────────────────────────────────────────────────────────────────── | |
| TestCase("trap_computer_vs_cseds_hardware", "computer hardware VLSI design", | |
| "department", "computer", None, None, None, | |
| "MUST be 'computer' (VLSI), NOT cse_ds despite 'computer'"), | |
| TestCase("trap_cseds_ml", "computer science machine learning course", | |
| "department", "cse_ds", None, None, None, | |
| "'computer science' + 'ML' = cse_ds (ML in cse_ds anchors)"), | |
| TestCase("trap_ece_vs_ei_wireless", "electronics wireless communication", | |
| "department", "electronics_comm", None, None, None, | |
| "'wireless' = ECE anchor"), | |
| TestCase("trap_ece_vs_ei_sensors", "electronics sensor measurement biomedical", | |
| "department", "electronics_inst", None, None, None, | |
| "'sensors' + 'biomedical' = EI anchor"), | |
| TestCase("trap_it_vs_ict_software", "information technology software development ERP", | |
| "department", "it", None, None, None, | |
| "'software' + 'ERP' = IT anchor"), | |
| TestCase("trap_it_vs_ict_fiber", "information communication fiber optic bandwidth", | |
| "department", "ict", None, None, None, | |
| "'fiber optic' = ICT anchor"), | |
| # ───────────────────────────────────────────────────────────────────────── | |
| # GROUP 5: Intent Detection | |
| # ───────────────────────────────────────────────────────────────────────── | |
| TestCase("intent_list_explicit", "list all mechanical engineering labs", | |
| "department", "mechanical", "lab", None, "list", | |
| "Explicit 'list' keyword"), | |
| TestCase("intent_list_implicit", "show every available facility", | |
| "facility", None, None, None, "list", | |
| "Implicit list: 'show every'"), | |
| TestCase("intent_count_how_many", "how many students placed in 2024", | |
| "placement", None, None, 2024, "count", | |
| "'how many' = count intent"), | |
| TestCase("intent_count_total", "total number of hostels", | |
| "hostel", None, None, None, "count", | |
| "'total number' = count intent"), | |
| TestCase("intent_detail_explain", "explain the civil engineering syllabus", | |
| "department", "civil", "syllabus", None, "detail", | |
| "'explain' = detail intent"), | |
| TestCase("intent_detail_tell_me", "tell me about research publications", | |
| "research", "publication", None, None, "detail", | |
| "'tell me about' = detail intent"), | |
| TestCase("intent_greeting_hello", "hello good morning", | |
| None, None, None, None, "greeting", | |
| "Pure greeting - no other fields"), | |
| TestCase("intent_greeting_casual", "hi, how are you", | |
| None, None, None, None, "greeting", | |
| "Casual greeting"), | |
| # ───────────────────────────────────────────────────────────────────────── | |
| # GROUP 6: Year Extraction Edge Cases | |
| # ───────────────────────────────────────────────────────────────────────── | |
| TestCase("year_simple", "placement records 2023", | |
| None, None, None, 2023, None, | |
| "Standard year extraction"), | |
| TestCase("year_batch_of", "batch of 2024 students", | |
| None, None, None, 2024, None, | |
| "'batch of' prefix"), | |
| TestCase("year_explicit", "year 2025 admission process", | |
| "admission", None, None, 2025, None, | |
| "Explicit 'year' keyword"), | |
| TestCase("year_ordinal_fail", "2nd year computer science", # ⚠️ LIKELY FAIL | |
| "department", "cse_ds", None, None, None, | |
| "'2nd year' - NO year should be extracted (regex is \\b20\\d{2}\\b)"), | |
| TestCase("year_range_first", "2020-2024 batch", # ⚠️ AMBIGUOUS | |
| None, None, None, 2020, None, | |
| "Range - regex finds first 20xx (2020)"), | |
| TestCase("year_written_fail", "twenty twenty three", # ⚠️ WILL FAIL | |
| None, None, None, None, None, | |
| "Written year - regex only matches digits"), | |
| # ───────────────────────────────────────────────────────────────────────── | |
| # GROUP 7: MASTER_INDEX Constraint Violations | |
| # ───────────────────────────────────────────────────────────────────────── | |
| TestCase("master_invalid_cat_for_type", "hostel admission process", | |
| "hostel", None, None, None, None, | |
| "'admission' invalid category for 'hostel' type -> should be None"), | |
| TestCase("master_invalid_topic_for_type", "placement syllabus", | |
| "placement", None, None, None, None, | |
| "'syllabus' invalid topic for 'placement' -> should be None"), | |
| TestCase("master_invalid_topic_research", "research timetable", | |
| "research", None, None, None, None, | |
| "'timetable' invalid for 'research' type -> should be None"), | |
| # ───────────────────────────────────────────────────────────────────────── | |
| # GROUP 8: Complex Real-World Queries | |
| # ───────────────────────────────────────────────────────────────────────── | |
| TestCase("complex_bonafide", "how to get bonafide certificate from finance office", | |
| "service", "finance", "document", None, "process", | |
| "4-field complex: service+finance+document+process"), | |
| TestCase("complex_five_fields", "show me the 2024 civil engineering final year project list", | |
| "department", "civil", "project", 2024, "list", | |
| "5 fields: type+category+topic+year+intent"), | |
| TestCase("complex_nss", "NSS volunteer induction schedule 2023", | |
| "club", "nss", "induction", 2023, None, | |
| "4 fields: club+nss+induction+year"), | |
| TestCase("vague_bus", "when does the bus leave", | |
| "service", "transport", None, None, None, | |
| "Short query - transport detection"), | |
| TestCase("ambiguous_hod", "where can I find the HOD", | |
| None, None, "contact", None, None, | |
| "Ambiguous: HOD in faculty topic, but asking contact/location"), | |
| # ───────────────────────────────────────────────────────────────────────── | |
| # GROUP 9: Adversarial / Edge Cases | |
| # ───────────────────────────────────────────────────────────────────────── | |
| TestCase("edge_empty", "", | |
| None, None, None, None, None, | |
| "Empty string - all None expected"), | |
| TestCase("edge_nonsense", "the quick brown fox", | |
| None, None, None, None, None, | |
| "No relevant keywords"), | |
| TestCase("edge_all_caps", "LIBRARY BOOKS", | |
| "library", None, None, None, None, | |
| "Case sensitivity test"), | |
| TestCase("edge_mixed_case", "Ce DePaRtMeNt", | |
| "department", "computer", None, None, None, | |
| "Mixed case abbreviation"), | |
| TestCase("edge_punctuation", "civil-engineering!!! lab???", | |
| "department", "civil", "lab", None, None, | |
| "Punctuation handling"), | |
| TestCase("edge_parentheses", "cse_ds (data science) syllabus", | |
| "department", "cse_ds", "syllabus", None, None, | |
| "Parentheses handling"), | |
| ] | |
| # ============================================================================ | |
| # BATCH PROCESSING ENGINE | |
| # ============================================================================ | |
| def run_batch_tests(test_cases: List[TestCase]) -> Dict[str, Any]: | |
| """ | |
| Process all tests in batch, return structured results. | |
| """ | |
| results = [] | |
| for test in test_cases: | |
| # Run classifier | |
| actual = classifier.classify(test.question) | |
| # Check each expected field | |
| errors = [] | |
| fields = [ | |
| ("type", test.expected_type), | |
| ("category", test.expected_category), | |
| ("topic", test.expected_topic), | |
| ("year", test.expected_year), | |
| ("intent", test.expected_intent), | |
| ] | |
| for field_name, expected in fields: | |
| if expected is not None and actual[field_name] != expected: | |
| errors.append({ | |
| "field": field_name, | |
| "expected": expected, | |
| "actual": actual[field_name] | |
| }) | |
| passed = len(errors) == 0 | |
| results.append({ | |
| "name": test.name, | |
| "question": test.question, | |
| "notes": test.notes, | |
| "expected": { | |
| "type": test.expected_type, | |
| "category": test.expected_category, | |
| "topic": test.expected_topic, | |
| "year": test.expected_year, | |
| "intent": test.expected_intent, | |
| }, | |
| "actual": actual, | |
| "passed": passed, | |
| "errors": errors | |
| }) | |
| return analyze_results(results) | |
| def analyze_results(results: List[Dict]) -> Dict[str, Any]: | |
| """ | |
| Generate comprehensive analysis report. | |
| """ | |
| total = len(results) | |
| passed = sum(1 for r in results if r["passed"]) | |
| failed = total - passed | |
| # Error breakdown by field | |
| field_errors = {"type": 0, "category": 0, "topic": 0, "year": 0, "intent": 0} | |
| for r in results: | |
| for err in r["errors"]: | |
| field_errors[err["field"]] += 1 | |
| # Categorize failures | |
| critical_failures = [r for r in results if not r["passed"] and "CRITICAL" in r["notes"]] | |
| abbr_failures = [r for r in results if not r["passed"] and "abbrev" in r["name"].lower()] | |
| ambiguity_failures = [r for r in results if not r["passed"] and "conflict" in r["name"].lower()] | |
| return { | |
| "summary": { | |
| "total": total, | |
| "passed": passed, | |
| "failed": failed, | |
| "pass_rate": f"{passed/total*100:.1f}%" if total > 0 else "0%" | |
| }, | |
| "field_error_rates": field_errors, | |
| "critical_issues": { | |
| "count": len(critical_failures), | |
| "tests": [r["name"] for r in critical_failures] | |
| }, | |
| "abbreviation_issues": { | |
| "count": len(abbr_failures), | |
| "tests": [r["name"] for r in abbr_failures] | |
| }, | |
| "ambiguity_issues": { | |
| "count": len(ambiguity_failures), | |
| "tests": [r["name"] for r in ambiguity_failures] | |
| }, | |
| "all_results": results, | |
| "failed_tests": [r for r in results if not r["passed"]] | |
| } | |
| def print_report(report: Dict[str, Any], verbose: bool = False): | |
| """ | |
| Pretty print the test report. | |
| """ | |
| s = report["summary"] | |
| print(f"\n{'='*70}") | |
| print(f"BATCH TEST RESULTS: {s['passed']}/{s['total']} passed ({s['pass_rate']})") | |
| print(f"{'='*70}") | |
| print(f"\nFIELD ERROR BREAKDOWN:") | |
| for field, count in report["field_error_rates"].items(): | |
| if count > 0: | |
| print(f" {field:12s}: {count} errors") | |
| if report["critical_issues"]["count"] > 0: | |
| print(f"\nCRITICAL ISSUES ({report['critical_issues']['count']}):") | |
| for name in report["critical_issues"]["tests"]: | |
| print(f" • {name}") | |
| if report["abbreviation_issues"]["count"] > 0: | |
| print(f"\nABBREVIATION FAILURES ({report['abbreviation_issues']['count']}):") | |
| for name in report["abbreviation_issues"]["tests"]: | |
| print(f" • {name}") | |
| if report["ambiguity_issues"]["count"] > 0: | |
| print(f"\nAMBIGUITY FAILURES ({report['ambiguity_issues']['count']}):") | |
| for name in report["ambiguity_issues"]["tests"]: | |
| print(f" • {name}") | |
| if verbose and report["failed_tests"]: | |
| print(f"\n{'='*70}") | |
| print("DETAILED FAILURE LOG:") | |
| print(f"{'='*70}") | |
| for r in report["failed_tests"]: | |
| print(f"\n[X] {r['name']}") | |
| print(f" Query: '{r['question']}'") | |
| print(f" Note: {r['notes']}") | |
| print(f" Expected: {r['expected']}") | |
| print(f" Actual: {r['actual']}") | |
| for err in r["errors"]: | |
| print(f" [!] {err['field']}: expected '{err['expected']}', got '{err['actual']}'") | |
| print(f"\n{'='*70}") | |
| # ============================================================================ | |
| # USAGE | |
| # ============================================================================ | |
| # Run all tests | |
| report = run_batch_tests(TEST_SUITE) | |
| # Print summary | |
| print_report(report, verbose=True) | |
| # Access specific results | |
| # print(report["failed_tests"]) # List of all failures | |
| # print(report["field_error_rates"]) # Error counts per field | |
| # Run specific group only | |
| # group_2_tests = [t for t in TEST_SUITE if t.name.startswith("abbr_")] | |
| # abbr_report = run_batch_tests(group_2_tests) | |
| # print_report(abbr_report) |