Spaces:

harshvisualz
/

vgecbot

Sleeping

App Files Files Community

vgecbot / classifier-demo.py

harsh-dev

docker deployment

4225666 about 2 months ago

raw

history blame contribute delete

20 kB

	from app.services.filter_classifier import FilterClassifier
	classifier = FilterClassifier()

	from typing import Optional, List, Dict, Any
	from dataclasses import dataclass

	@dataclass
	class TestCase:
	name: str
	question: str
	expected_type: Optional[str] = None
	expected_category: Optional[str] = None
	expected_topic: Optional[str] = None
	expected_year: Optional[int] = None
	expected_intent: Optional[str] = None
	notes: str = "" # Why this test matters

	# ============================================================================
	# BATCH TEST SUITE - Organized by weakness category
	# ============================================================================

	TEST_SUITE = [
	# ─────────────────────────────────────────────────────────────────────────
	# GROUP 1: Basic Sanity (Should Pass)
	# ─────────────────────────────────────────────────────────────────────────
	TestCase("basic_dept_list", "show me all departments",
	"department", None, None, None, "list",
	"Basic type detection + list intent"),

	TestCase("basic_hostel_fees", "what are the hostel fees",
	"hostel", None, "fees", None, None,
	"Type + topic combo"),

	TestCase("basic_library", "library books catalog",
	"library", None, None, None, None,
	"Direct type match"),

	TestCase("basic_placement_stats_year", "campus placement statistics 2023",
	"placement", None, "stats", 2023, None,
	"Type + topic + year extraction"),

	# ─────────────────────────────────────────────────────────────────────────
	# GROUP 2: Abbreviation Expansion (Critical)
	# ─────────────────────────────────────────────────────────────────────────
	TestCase("abbr_ce", "CE department faculty",
	"department", "computer", "faculty", None, None,
	"'ce' -> computer engineering"),

	TestCase("abbr_cse_ds", "CSE DS syllabus",
	"department", "cse_ds", "syllabus", None, None,
	"'cse' + 'ds' -> cse_ds"),

	TestCase("abbr_ece_missing", "ECE lab equipment", # ⚠️ WILL FAIL - not in ABBREVIATIONS
	"department", "electronics_comm", "lab", None, None,
	"'ece' NOT in ABBREVIATIONS dict - expect failure"),

	TestCase("abbr_ei", "EI sensors calibration",
	"department", "electronics_inst", None, None, None,
	"'ei' -> electronics instrumentation"),

	TestCase("abbr_it", "IT department placement stats",
	"department", "it", "stats", None, None,
	"'it' -> information technology"),

	TestCase("abbr_me", "ME workshop schedule",
	"department", "mechanical", None, None, None,
	"'me' -> mechanical"),

	TestCase("abbr_am", "AM stress analysis project",
	"department", "applied_mechanics", "project", None, None,
	"'am' -> applied mechanics"),

	TestCase("abbr_ict", "ICT networking lab",
	"department", "ict", "lab", None, None,
	"'ict' in ABBREVIATIONS"),

	# ─────────────────────────────────────────────────────────────────────────
	# GROUP 3: Type/Category/Topic Conflicts (Ambiguity Hell)
	# ─────────────────────────────────────────────────────────────────────────
	TestCase("conflict_club_type_vs_cat", "adventure club activities",
	"club", "adventure", None, None, None,
	"'club' word in TYPE, 'adventure' in CATEGORY"),

	TestCase("conflict_ieee_club", "join the ieee club",
	"club", "ieee", None, None, None,
	"ieee category + club type word"),

	TestCase("conflict_event_ambiguity", "technical events this month",
	None, None, "event", None, None,
	"'events' = topic, but clubs host events - type confusion risk"),

	TestCase("conflict_process_topic", "what is the admission process", # ⚠️ CRITICAL
	"admission", None, "process", None, None,
	"'process' = TOPIC (procedure) - NOT intent"),

	TestCase("conflict_process_intent", "how to apply step by step process", # ⚠️ CRITICAL
	"admission", None, None, None, "process",
	"'process' = INTENT (how to) - same word, different meaning!"),

	TestCase("conflict_fees_finance", "finance department fees structure",
	"service", "finance", "fees", None, None,
	"'finance' category + 'fees' topic overlap"),

	# ─────────────────────────────────────────────────────────────────────────
	# GROUP 4: Semantic Similarity Traps
	# ─────────────────────────────────────────────────────────────────────────
	TestCase("trap_computer_vs_cseds_hardware", "computer hardware VLSI design",
	"department", "computer", None, None, None,
	"MUST be 'computer' (VLSI), NOT cse_ds despite 'computer'"),

	TestCase("trap_cseds_ml", "computer science machine learning course",
	"department", "cse_ds", None, None, None,
	"'computer science' + 'ML' = cse_ds (ML in cse_ds anchors)"),

	TestCase("trap_ece_vs_ei_wireless", "electronics wireless communication",
	"department", "electronics_comm", None, None, None,
	"'wireless' = ECE anchor"),

	TestCase("trap_ece_vs_ei_sensors", "electronics sensor measurement biomedical",
	"department", "electronics_inst", None, None, None,
	"'sensors' + 'biomedical' = EI anchor"),

	TestCase("trap_it_vs_ict_software", "information technology software development ERP",
	"department", "it", None, None, None,
	"'software' + 'ERP' = IT anchor"),

	TestCase("trap_it_vs_ict_fiber", "information communication fiber optic bandwidth",
	"department", "ict", None, None, None,
	"'fiber optic' = ICT anchor"),

	# ─────────────────────────────────────────────────────────────────────────
	# GROUP 5: Intent Detection
	# ─────────────────────────────────────────────────────────────────────────
	TestCase("intent_list_explicit", "list all mechanical engineering labs",
	"department", "mechanical", "lab", None, "list",
	"Explicit 'list' keyword"),

	TestCase("intent_list_implicit", "show every available facility",
	"facility", None, None, None, "list",
	"Implicit list: 'show every'"),

	TestCase("intent_count_how_many", "how many students placed in 2024",
	"placement", None, None, 2024, "count",
	"'how many' = count intent"),

	TestCase("intent_count_total", "total number of hostels",
	"hostel", None, None, None, "count",
	"'total number' = count intent"),

	TestCase("intent_detail_explain", "explain the civil engineering syllabus",
	"department", "civil", "syllabus", None, "detail",
	"'explain' = detail intent"),

	TestCase("intent_detail_tell_me", "tell me about research publications",
	"research", "publication", None, None, "detail",
	"'tell me about' = detail intent"),

	TestCase("intent_greeting_hello", "hello good morning",
	None, None, None, None, "greeting",
	"Pure greeting - no other fields"),

	TestCase("intent_greeting_casual", "hi, how are you",
	None, None, None, None, "greeting",
	"Casual greeting"),

	# ─────────────────────────────────────────────────────────────────────────
	# GROUP 6: Year Extraction Edge Cases
	# ─────────────────────────────────────────────────────────────────────────
	TestCase("year_simple", "placement records 2023",
	None, None, None, 2023, None,
	"Standard year extraction"),

	TestCase("year_batch_of", "batch of 2024 students",
	None, None, None, 2024, None,
	"'batch of' prefix"),

	TestCase("year_explicit", "year 2025 admission process",
	"admission", None, None, 2025, None,
	"Explicit 'year' keyword"),

	TestCase("year_ordinal_fail", "2nd year computer science", # ⚠️ LIKELY FAIL
	"department", "cse_ds", None, None, None,
	"'2nd year' - NO year should be extracted (regex is \\b20\\d{2}\\b)"),

	TestCase("year_range_first", "2020-2024 batch", # ⚠️ AMBIGUOUS
	None, None, None, 2020, None,
	"Range - regex finds first 20xx (2020)"),

	TestCase("year_written_fail", "twenty twenty three", # ⚠️ WILL FAIL
	None, None, None, None, None,
	"Written year - regex only matches digits"),

	# ─────────────────────────────────────────────────────────────────────────
	# GROUP 7: MASTER_INDEX Constraint Violations
	# ─────────────────────────────────────────────────────────────────────────
	TestCase("master_invalid_cat_for_type", "hostel admission process",
	"hostel", None, None, None, None,
	"'admission' invalid category for 'hostel' type -> should be None"),

	TestCase("master_invalid_topic_for_type", "placement syllabus",
	"placement", None, None, None, None,
	"'syllabus' invalid topic for 'placement' -> should be None"),

	TestCase("master_invalid_topic_research", "research timetable",
	"research", None, None, None, None,
	"'timetable' invalid for 'research' type -> should be None"),

	# ─────────────────────────────────────────────────────────────────────────
	# GROUP 8: Complex Real-World Queries
	# ─────────────────────────────────────────────────────────────────────────
	TestCase("complex_bonafide", "how to get bonafide certificate from finance office",
	"service", "finance", "document", None, "process",
	"4-field complex: service+finance+document+process"),

	TestCase("complex_five_fields", "show me the 2024 civil engineering final year project list",
	"department", "civil", "project", 2024, "list",
	"5 fields: type+category+topic+year+intent"),

	TestCase("complex_nss", "NSS volunteer induction schedule 2023",
	"club", "nss", "induction", 2023, None,
	"4 fields: club+nss+induction+year"),

	TestCase("vague_bus", "when does the bus leave",
	"service", "transport", None, None, None,
	"Short query - transport detection"),

	TestCase("ambiguous_hod", "where can I find the HOD",
	None, None, "contact", None, None,
	"Ambiguous: HOD in faculty topic, but asking contact/location"),

	# ─────────────────────────────────────────────────────────────────────────
	# GROUP 9: Adversarial / Edge Cases
	# ─────────────────────────────────────────────────────────────────────────
	TestCase("edge_empty", "",
	None, None, None, None, None,
	"Empty string - all None expected"),

	TestCase("edge_nonsense", "the quick brown fox",
	None, None, None, None, None,
	"No relevant keywords"),

	TestCase("edge_all_caps", "LIBRARY BOOKS",
	"library", None, None, None, None,
	"Case sensitivity test"),

	TestCase("edge_mixed_case", "Ce DePaRtMeNt",
	"department", "computer", None, None, None,
	"Mixed case abbreviation"),

	TestCase("edge_punctuation", "civil-engineering!!! lab???",
	"department", "civil", "lab", None, None,
	"Punctuation handling"),

	TestCase("edge_parentheses", "cse_ds (data science) syllabus",
	"department", "cse_ds", "syllabus", None, None,
	"Parentheses handling"),
	]


	# ============================================================================
	# BATCH PROCESSING ENGINE
	# ============================================================================

	def run_batch_tests(test_cases: List[TestCase]) -> Dict[str, Any]:
	"""
	Process all tests in batch, return structured results.
	"""
	results = []

	for test in test_cases:
	# Run classifier
	actual = classifier.classify(test.question)

	# Check each expected field
	errors = []
	fields = [
	("type", test.expected_type),
	("category", test.expected_category),
	("topic", test.expected_topic),
	("year", test.expected_year),
	("intent", test.expected_intent),
	]

	for field_name, expected in fields:
	if expected is not None and actual[field_name] != expected:
	errors.append({
	"field": field_name,
	"expected": expected,
	"actual": actual[field_name]
	})

	passed = len(errors) == 0

	results.append({
	"name": test.name,
	"question": test.question,
	"notes": test.notes,
	"expected": {
	"type": test.expected_type,
	"category": test.expected_category,
	"topic": test.expected_topic,
	"year": test.expected_year,
	"intent": test.expected_intent,
	},
	"actual": actual,
	"passed": passed,
	"errors": errors
	})

	return analyze_results(results)


	def analyze_results(results: List[Dict]) -> Dict[str, Any]:
	"""
	Generate comprehensive analysis report.
	"""
	total = len(results)
	passed = sum(1 for r in results if r["passed"])
	failed = total - passed

	# Error breakdown by field
	field_errors = {"type": 0, "category": 0, "topic": 0, "year": 0, "intent": 0}
	for r in results:
	for err in r["errors"]:
	field_errors[err["field"]] += 1

	# Categorize failures
	critical_failures = [r for r in results if not r["passed"] and "CRITICAL" in r["notes"]]
	abbr_failures = [r for r in results if not r["passed"] and "abbrev" in r["name"].lower()]
	ambiguity_failures = [r for r in results if not r["passed"] and "conflict" in r["name"].lower()]

	return {
	"summary": {
	"total": total,
	"passed": passed,
	"failed": failed,
	"pass_rate": f"{passed/total*100:.1f}%" if total > 0 else "0%"
	},
	"field_error_rates": field_errors,
	"critical_issues": {
	"count": len(critical_failures),
	"tests": [r["name"] for r in critical_failures]
	},
	"abbreviation_issues": {
	"count": len(abbr_failures),
	"tests": [r["name"] for r in abbr_failures]
	},
	"ambiguity_issues": {
	"count": len(ambiguity_failures),
	"tests": [r["name"] for r in ambiguity_failures]
	},
	"all_results": results,
	"failed_tests": [r for r in results if not r["passed"]]
	}


	def print_report(report: Dict[str, Any], verbose: bool = False):
	"""
	Pretty print the test report.
	"""
	s = report["summary"]
	print(f"\n{'='*70}")
	print(f"BATCH TEST RESULTS: {s['passed']}/{s['total']} passed ({s['pass_rate']})")
	print(f"{'='*70}")

	print(f"\nFIELD ERROR BREAKDOWN:")
	for field, count in report["field_error_rates"].items():
	if count > 0:
	print(f" {field:12s}: {count} errors")

	if report["critical_issues"]["count"] > 0:
	print(f"\nCRITICAL ISSUES ({report['critical_issues']['count']}):")
	for name in report["critical_issues"]["tests"]:
	print(f" • {name}")

	if report["abbreviation_issues"]["count"] > 0:
	print(f"\nABBREVIATION FAILURES ({report['abbreviation_issues']['count']}):")
	for name in report["abbreviation_issues"]["tests"]:
	print(f" • {name}")

	if report["ambiguity_issues"]["count"] > 0:
	print(f"\nAMBIGUITY FAILURES ({report['ambiguity_issues']['count']}):")
	for name in report["ambiguity_issues"]["tests"]:
	print(f" • {name}")

	if verbose and report["failed_tests"]:
	print(f"\n{'='*70}")
	print("DETAILED FAILURE LOG:")
	print(f"{'='*70}")
	for r in report["failed_tests"]:
	print(f"\n[X] {r['name']}")
	print(f" Query: '{r['question']}'")
	print(f" Note: {r['notes']}")
	print(f" Expected: {r['expected']}")
	print(f" Actual: {r['actual']}")
	for err in r["errors"]:
	print(f" [!] {err['field']}: expected '{err['expected']}', got '{err['actual']}'")

	print(f"\n{'='*70}")


	# ============================================================================
	# USAGE
	# ============================================================================

	# Run all tests
	report = run_batch_tests(TEST_SUITE)

	# Print summary
	print_report(report, verbose=True)

	# Access specific results
	# print(report["failed_tests"]) # List of all failures
	# print(report["field_error_rates"]) # Error counts per field

	# Run specific group only
	# group_2_tests = [t for t in TEST_SUITE if t.name.startswith("abbr_")]
	# abbr_report = run_batch_tests(group_2_tests)
	# print_report(abbr_report)