Spaces:
Sleeping
Sleeping
| """Rule-based validation: 8 fast checks (~50ms, no LLM required).""" | |
| from __future__ import annotations | |
| import re | |
| from dataclasses import dataclass, field | |
| from typing import List, Optional, Tuple | |
| from loguru import logger | |
| # ββ Bloom verb banks (Anderson & Krathwohl, 2001 revised taxonomy) βββββββββββββ | |
| # Canonical verbs from: Anderson, L.W. & Krathwohl, D.R. (2001). | |
| # A Taxonomy for Learning, Teaching and Assessing. Longman. | |
| # Extensions marked (*) are beyond A&K canonical list, retained for | |
| # Nigerian curriculum alignment (common in NERDC/WAEC marking schemes). | |
| BLOOM_VERBS_BY_LEVEL: dict[str, List[str]] = { | |
| "REMEMBER": ["define", "duplicate", "list", "memorise", "memorize", "recall", | |
| "repeat", "reproduce", "state", "recognise", "recognize", | |
| "identify", "name", "label", "match", "select", "locate", | |
| "know"], # * | |
| "UNDERSTAND": ["classify", "describe", "discuss", "explain", "identify", | |
| "locate", "recognise", "recognize", "report", "select", | |
| "translate", "paraphrase", "summarise", "summarize", | |
| "interpret", "exemplify", "infer", "compare", | |
| "give examples", "illustrate", | |
| "express", "tell", "review", "understand"], # * | |
| "APPLY": ["choose", "demonstrate", "dramatise", "dramatize", | |
| "employ", "illustrate", "interpret", "operate", | |
| "schedule", "sketch", "solve", "use", "write", | |
| "carry out", "execute", "implement", "apply", | |
| "calculate", "complete", "show", "practise", "practice", | |
| "perform", "model", "present"], # * | |
| "ANALYZE": ["appraise", "compare", "contrast", "criticise", "criticize", | |
| "differentiate", "discriminate", "distinguish", "examine", | |
| "experiment", "question", "test", "analyse", "analyze", | |
| "break down", "categorise", "categorize", "separate", | |
| "order", "attribute", "organise", "organize", "deconstruct", | |
| "investigate", "relate", "infer"], | |
| "EVALUATE": ["appraise", "argue", "defend", "judge", "select", | |
| "support", "value", "evaluate", "critique", "assess", | |
| "justify", "recommend", "rate", "rank", "measure", | |
| "decide", "review", "weigh", "conclude", | |
| "prioritise", "prioritize"], | |
| "CREATE": ["assemble", "construct", "create", "design", "develop", | |
| "formulate", "write", "plan", "produce", "generate", | |
| "invent", "make", "build", "compose", "hypothesise", | |
| "hypothesize", "propose", "combine", "compile", "devise"], | |
| } | |
| # ββ Profanity / explicit content blocklist (non-exhaustive) βββββββββββββββββββ | |
| EXPLICIT_BLOCKLIST = [ | |
| r"\bfuck\b", r"\bshit\b", r"\bporn\b", r"\bsex\b(?!ual\s+health|\s+education)", | |
| r"\bpenis\b(?!\s+(?:development|anatomy|health))", r"\bvagina\b(?!\s+(?:anatomy|health))", | |
| r"\bnude\b", r"\bnaked\b", | |
| ] | |
| # ββ Cultural flag: Western names that dilute local authenticity ββββββββββββββββ | |
| WESTERN_NAMES = { | |
| "john", "james", "peter", "michael", "david", "william", "robert", "richard", | |
| "thomas", "charles", "george", "edward", "henry", "joseph", "paul", | |
| "mary", "jennifer", "jessica", "emily", "sarah", "elizabeth", "lisa", | |
| "susan", "karen", "nancy", "betty", "helen", "sandra", "donna", | |
| "matthew", "andrew", "daniel", "christopher", "mark", "joshua", "ryan", | |
| "kevin", "brian", "gary", "timothy", "jason", "jeff", "frank", | |
| } | |
| LOCAL_NAMES = { | |
| "chukwuemeka", "adaeze", "aminu", "tunde", "kofi", "ama", "fatima", | |
| "emeka", "ngozi", "kwame", "abena", "bola", "sola", "kemi", "biodun", | |
| "seun", "yetunde", "taiwo", "kehinde", "femi", "toyin", "chioma", | |
| "nkechi", "uchenna", "obiora", "chiamaka", "obinna", "chidi", "aisha", | |
| "musa", "ibrahim", "halima", "zainab", "binta", "garba", "yusuf", | |
| "efua", "akosua", "adjoa", "esi", "yaw", "kojo", "ama", "akua", | |
| "nana", "afia", "ekua", "mensah", "asante", "owusu", "boateng", | |
| } | |
| # ββ Curriculum alignment keywords (per board) βββββββββββββββββββββββββββββββββ | |
| CURRICULUM_KEYWORDS = { | |
| "NERDC": ["objective", "activity", "evaluation", "term", "week", "topic", "learning", "assessment"], | |
| "WAEC": ["objective", "question", "mark", "answer", "examination", "candidate", "section"], | |
| "NECO": ["objective", "section", "question", "answer", "mark", "examination"], | |
| "NABTEB": ["objective", "practical", "trade", "skill", "competency", "assessment"], | |
| "UBEC": ["objective", "activity", "term", "week", "learning", "lesson"], | |
| "GES_GH": ["objective", "activity", "term", "week", "learning", "lesson", "indicator"], | |
| "DEFAULT":["objective", "topic", "learning", "assessment", "activity"], | |
| } | |
| # ββ Date hallucination check βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| YEAR_PATTERN = re.compile(r"\b(1[0-7]\d{2}|20[3-9]\d|2[1-9]\d{2})\b") # outside [1800-2030] | |
| class RuleResult: | |
| rule_name: str | |
| passed: bool | |
| message: str = "" | |
| is_hard_fail: bool = False # hard fails auto-fail regardless of judge | |
| class ValidationRulesReport: | |
| passed: List[str] = field(default_factory=list) | |
| failed: List[str] = field(default_factory=list) | |
| hard_failed: List[str] = field(default_factory=list) | |
| notes: List[str] = field(default_factory=list) | |
| def all_passed(self) -> bool: | |
| return not self.failed and not self.hard_failed | |
| def has_hard_fail(self) -> bool: | |
| return bool(self.hard_failed) | |
| # ββ Individual rule functions ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def check_length(content: str, max_tokens: int) -> RuleResult: | |
| """Content must be β₯ 25% and β€ 600% of max_tokens (in characters).""" | |
| char_count = len(content) | |
| lower = max_tokens * 0.25 * 4 # approx chars | |
| upper = max_tokens * 6.0 * 4 | |
| if char_count < lower: | |
| return RuleResult("length_check", False, f"Content too short ({char_count} chars; min ~{int(lower)})") | |
| if char_count > upper: | |
| return RuleResult("length_check", False, f"Content too long ({char_count} chars; max ~{int(upper)})") | |
| return RuleResult("length_check", True) | |
| def check_language_detection( | |
| content: str, | |
| expected_lang: str, | |
| threshold: float = 0.85, | |
| ) -> RuleResult: | |
| """Detect language and fail if confidence > threshold for wrong language.""" | |
| try: | |
| from lingua import Language as LinguaLang, LanguageDetectorBuilder # type: ignore | |
| detector = ( | |
| LanguageDetectorBuilder.from_all_languages() | |
| .with_low_accuracy_mode() | |
| .build() | |
| ) | |
| result = detector.detect_language_of(content[:1000]) | |
| if result is None: | |
| return RuleResult("language_detection", True, "Language could not be determined; skipping") | |
| detected = result.name.lower() | |
| lang_map = {"en": "english", "yo": "yoruba", "ha": "hausa", "ig": "igbo", "pcm": "english"} | |
| expected_name = lang_map.get(expected_lang, expected_lang.lower()) | |
| if expected_name not in detected and detected not in expected_name: | |
| conf_result = detector.compute_language_confidence(content[:1000], result) | |
| if conf_result > threshold: | |
| return RuleResult( | |
| "language_detection", False, | |
| f"Language mismatch: expected '{expected_lang}', detected '{detected}' (conf {conf_result:.2f})" | |
| ) | |
| except ImportError: | |
| # Fall back to langdetect | |
| try: | |
| from langdetect import detect # type: ignore | |
| detected = detect(content[:1000]) | |
| lang_map = {"en": "en", "yo": "yo", "ha": "ha", "ig": "ig", "pcm": "en"} | |
| expected_code = lang_map.get(expected_lang, expected_lang.split("-")[0]) | |
| if detected != expected_code and not expected_lang.startswith("en-"): | |
| return RuleResult( | |
| "language_detection", False, | |
| f"Language mismatch: expected '{expected_lang}', detected '{detected}'" | |
| ) | |
| except Exception as exc: | |
| logger.warning(f"Language detection skipped: {exc}") | |
| return RuleResult("language_detection", True) | |
| def check_bloom_verbs( | |
| content: str, | |
| bloom_level: str, | |
| ) -> RuleResult: | |
| """At least one Bloom-level verb must appear in the content.""" | |
| verbs = BLOOM_VERBS_BY_LEVEL.get(bloom_level.upper(), []) | |
| if not verbs: | |
| return RuleResult("bloom_verb_presence", True, "No verb list for bloom level; skipping") | |
| lower = content.lower() | |
| found = [v for v in verbs if v in lower] | |
| if not found: | |
| return RuleResult( | |
| "bloom_verb_presence", False, | |
| f"No {bloom_level} Bloom verbs found. Expected one of: {', '.join(verbs[:5])}" | |
| ) | |
| return RuleResult("bloom_verb_presence", True, f"Bloom verbs found: {', '.join(found[:3])}") | |
| def check_cultural_flags( | |
| content: str, | |
| use_local_names: bool = True, | |
| western_ratio_threshold: float = 0.6, | |
| ) -> RuleResult: | |
| """Flag if Western name ratio > threshold when local names expected.""" | |
| if not use_local_names: | |
| return RuleResult("cultural_flag_check", True, "Local names not required") | |
| words = re.findall(r"\b[A-Z][a-z]+\b", content) | |
| names = [w for w in words if w.lower() in WESTERN_NAMES or w.lower() in LOCAL_NAMES] | |
| if not names: | |
| return RuleResult("cultural_flag_check", True, "No recognisable names found; skipping") | |
| western_count = sum(1 for n in names if n.lower() in WESTERN_NAMES) | |
| ratio = western_count / len(names) | |
| if ratio > western_ratio_threshold: | |
| return RuleResult( | |
| "cultural_flag_check", False, | |
| f"Western name ratio {ratio:.0%} exceeds {western_ratio_threshold:.0%} threshold. " | |
| f"Found Western names: {list(set(n for n in names if n.lower() in WESTERN_NAMES))[:5]}" | |
| ) | |
| return RuleResult("cultural_flag_check", True, f"Name diversity OK (Western ratio: {ratio:.0%})") | |
| def check_format_compliance( | |
| content: str, | |
| content_type: str, | |
| num_questions: Optional[int] = None, | |
| ) -> RuleResult: | |
| """Check structural compliance based on content type.""" | |
| ct = content_type.upper() | |
| if ct in {"QUIZ", "EXAM_QUESTIONS", "QUESTION_BANK", "DIAGNOSTIC_TEST"}: | |
| # Should have question marks or numbered questions | |
| question_marks = content.count("?") | |
| numbered_q = len(re.findall(r"^\s*Q?\d+[\.\)]\s", content, re.MULTILINE)) | |
| total_q = question_marks + numbered_q | |
| if num_questions and total_q < max(1, num_questions // 2): | |
| return RuleResult( | |
| "format_compliance", False, | |
| f"Assessment has only {total_q} questions; expected ~{num_questions}" | |
| ) | |
| elif ct == "LESSON_PLAN": | |
| # Each tuple: (canonical name, accepted synonyms) | |
| required_sections = [ | |
| ("objective", ["objective", "learning outcome", "aim", "goal", "target"]), | |
| ("activity", ["activity", "procedure", "exercise", "task", "instruction", "method", "practice"]), | |
| ("assessment", ["assessment", "evaluation", "evaluate", "test", "quiz", "check", "review"]), | |
| ] | |
| lower = content.lower() | |
| missing = [name for name, synonyms in required_sections if not any(s in lower for s in synonyms)] | |
| if missing: | |
| return RuleResult( | |
| "format_compliance", False, | |
| f"Lesson plan missing sections: {', '.join(missing)}" | |
| ) | |
| elif ct in {"SCHEME_OF_WORK", "TERM_PLAN", "SCOPE_AND_SEQUENCE"}: | |
| # Should reference weeks or terms | |
| if not re.search(r"week\s*\d+|term\s*\d+|w\d+\s*[:\-]", content, re.IGNORECASE): | |
| return RuleResult( | |
| "format_compliance", False, | |
| "Curriculum plan lacks week/term structure" | |
| ) | |
| return RuleResult("format_compliance", True) | |
| def check_no_hallucinated_dates(content: str) -> RuleResult: | |
| """Flag years outside [1800β2030] as potential hallucinations.""" | |
| bad_years = YEAR_PATTERN.findall(content) | |
| if bad_years: | |
| return RuleResult( | |
| "no_hallucinated_dates", False, | |
| f"Suspicious years found: {bad_years[:5]}", | |
| ) | |
| return RuleResult("no_hallucinated_dates", True) | |
| def check_no_explicit_content(content: str) -> RuleResult: | |
| """Hard-fail if explicit/profane content is detected.""" | |
| lower = content.lower() | |
| for pattern in EXPLICIT_BLOCKLIST: | |
| if re.search(pattern, lower): | |
| return RuleResult( | |
| "no_explicit_content", False, | |
| f"Explicit content detected (pattern: {pattern})", | |
| is_hard_fail=True, | |
| ) | |
| return RuleResult("no_explicit_content", True) | |
| def check_curriculum_alignment( | |
| content: str, | |
| curriculum_board: str = "NERDC", | |
| min_keywords: int = 3, | |
| ) -> RuleResult: | |
| """At least min_keywords curriculum terms must appear.""" | |
| keywords = CURRICULUM_KEYWORDS.get(curriculum_board.upper(), CURRICULUM_KEYWORDS["DEFAULT"]) | |
| lower = content.lower() | |
| found = [kw for kw in keywords if kw in lower] | |
| if len(found) < min_keywords: | |
| return RuleResult( | |
| "curriculum_alignment", False, | |
| f"Only {len(found)}/{min_keywords} curriculum terms found: {found}" | |
| ) | |
| return RuleResult("curriculum_alignment", True, f"Curriculum terms found: {found[:5]}") | |
| # ββ Master runner ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def run_all_rules( | |
| content: str, | |
| *, | |
| max_tokens: int = 1024, | |
| expected_language: str = "en", | |
| bloom_level: str = "UNDERSTAND", | |
| use_local_names: bool = True, | |
| content_type: str = "LESSON_PLAN", | |
| curriculum_board: str = "NERDC", | |
| num_questions: Optional[int] = None, | |
| ) -> ValidationRulesReport: | |
| """Run all 8 validation rules and return a consolidated report.""" | |
| report = ValidationRulesReport() | |
| checks = [ | |
| check_length(content, max_tokens), | |
| check_language_detection(content, expected_language), | |
| check_bloom_verbs(content, bloom_level), | |
| check_cultural_flags(content, use_local_names), | |
| check_format_compliance(content, content_type, num_questions), | |
| check_no_hallucinated_dates(content), | |
| check_no_explicit_content(content), | |
| check_curriculum_alignment(content, curriculum_board), | |
| ] | |
| for result in checks: | |
| if result.passed: | |
| report.passed.append(result.rule_name) | |
| else: | |
| if result.is_hard_fail: | |
| report.hard_failed.append(result.rule_name) | |
| else: | |
| report.failed.append(result.rule_name) | |
| if result.message: | |
| report.notes.append(f"{result.rule_name}: {result.message}") | |
| logger.info( | |
| f"Rules: {len(report.passed)} passed, {len(report.failed)} failed, " | |
| f"{len(report.hard_failed)} hard-failed" | |
| ) | |
| return report | |