Mercy Babayemi
AfriPed β€” clean deploy (ENGREG teacher corpus as TXT, no binary files)
5070743
"""Rule-based validation: 8 fast checks (~50ms, no LLM required)."""
from __future__ import annotations
import re
from dataclasses import dataclass, field
from typing import List, Optional, Tuple
from loguru import logger
# ── Bloom verb banks (Anderson & Krathwohl, 2001 revised taxonomy) ─────────────
# Canonical verbs from: Anderson, L.W. & Krathwohl, D.R. (2001).
# A Taxonomy for Learning, Teaching and Assessing. Longman.
# Extensions marked (*) are beyond A&K canonical list, retained for
# Nigerian curriculum alignment (common in NERDC/WAEC marking schemes).
BLOOM_VERBS_BY_LEVEL: dict[str, List[str]] = {
"REMEMBER": ["define", "duplicate", "list", "memorise", "memorize", "recall",
"repeat", "reproduce", "state", "recognise", "recognize",
"identify", "name", "label", "match", "select", "locate",
"know"], # *
"UNDERSTAND": ["classify", "describe", "discuss", "explain", "identify",
"locate", "recognise", "recognize", "report", "select",
"translate", "paraphrase", "summarise", "summarize",
"interpret", "exemplify", "infer", "compare",
"give examples", "illustrate",
"express", "tell", "review", "understand"], # *
"APPLY": ["choose", "demonstrate", "dramatise", "dramatize",
"employ", "illustrate", "interpret", "operate",
"schedule", "sketch", "solve", "use", "write",
"carry out", "execute", "implement", "apply",
"calculate", "complete", "show", "practise", "practice",
"perform", "model", "present"], # *
"ANALYZE": ["appraise", "compare", "contrast", "criticise", "criticize",
"differentiate", "discriminate", "distinguish", "examine",
"experiment", "question", "test", "analyse", "analyze",
"break down", "categorise", "categorize", "separate",
"order", "attribute", "organise", "organize", "deconstruct",
"investigate", "relate", "infer"],
"EVALUATE": ["appraise", "argue", "defend", "judge", "select",
"support", "value", "evaluate", "critique", "assess",
"justify", "recommend", "rate", "rank", "measure",
"decide", "review", "weigh", "conclude",
"prioritise", "prioritize"],
"CREATE": ["assemble", "construct", "create", "design", "develop",
"formulate", "write", "plan", "produce", "generate",
"invent", "make", "build", "compose", "hypothesise",
"hypothesize", "propose", "combine", "compile", "devise"],
}
# ── Profanity / explicit content blocklist (non-exhaustive) ───────────────────
EXPLICIT_BLOCKLIST = [
r"\bfuck\b", r"\bshit\b", r"\bporn\b", r"\bsex\b(?!ual\s+health|\s+education)",
r"\bpenis\b(?!\s+(?:development|anatomy|health))", r"\bvagina\b(?!\s+(?:anatomy|health))",
r"\bnude\b", r"\bnaked\b",
]
# ── Cultural flag: Western names that dilute local authenticity ────────────────
WESTERN_NAMES = {
"john", "james", "peter", "michael", "david", "william", "robert", "richard",
"thomas", "charles", "george", "edward", "henry", "joseph", "paul",
"mary", "jennifer", "jessica", "emily", "sarah", "elizabeth", "lisa",
"susan", "karen", "nancy", "betty", "helen", "sandra", "donna",
"matthew", "andrew", "daniel", "christopher", "mark", "joshua", "ryan",
"kevin", "brian", "gary", "timothy", "jason", "jeff", "frank",
}
LOCAL_NAMES = {
"chukwuemeka", "adaeze", "aminu", "tunde", "kofi", "ama", "fatima",
"emeka", "ngozi", "kwame", "abena", "bola", "sola", "kemi", "biodun",
"seun", "yetunde", "taiwo", "kehinde", "femi", "toyin", "chioma",
"nkechi", "uchenna", "obiora", "chiamaka", "obinna", "chidi", "aisha",
"musa", "ibrahim", "halima", "zainab", "binta", "garba", "yusuf",
"efua", "akosua", "adjoa", "esi", "yaw", "kojo", "ama", "akua",
"nana", "afia", "ekua", "mensah", "asante", "owusu", "boateng",
}
# ── Curriculum alignment keywords (per board) ─────────────────────────────────
CURRICULUM_KEYWORDS = {
"NERDC": ["objective", "activity", "evaluation", "term", "week", "topic", "learning", "assessment"],
"WAEC": ["objective", "question", "mark", "answer", "examination", "candidate", "section"],
"NECO": ["objective", "section", "question", "answer", "mark", "examination"],
"NABTEB": ["objective", "practical", "trade", "skill", "competency", "assessment"],
"UBEC": ["objective", "activity", "term", "week", "learning", "lesson"],
"GES_GH": ["objective", "activity", "term", "week", "learning", "lesson", "indicator"],
"DEFAULT":["objective", "topic", "learning", "assessment", "activity"],
}
# ── Date hallucination check ───────────────────────────────────────────────────
YEAR_PATTERN = re.compile(r"\b(1[0-7]\d{2}|20[3-9]\d|2[1-9]\d{2})\b") # outside [1800-2030]
@dataclass
class RuleResult:
rule_name: str
passed: bool
message: str = ""
is_hard_fail: bool = False # hard fails auto-fail regardless of judge
@dataclass
class ValidationRulesReport:
passed: List[str] = field(default_factory=list)
failed: List[str] = field(default_factory=list)
hard_failed: List[str] = field(default_factory=list)
notes: List[str] = field(default_factory=list)
@property
def all_passed(self) -> bool:
return not self.failed and not self.hard_failed
@property
def has_hard_fail(self) -> bool:
return bool(self.hard_failed)
# ── Individual rule functions ──────────────────────────────────────────────────
def check_length(content: str, max_tokens: int) -> RuleResult:
"""Content must be β‰₯ 25% and ≀ 600% of max_tokens (in characters)."""
char_count = len(content)
lower = max_tokens * 0.25 * 4 # approx chars
upper = max_tokens * 6.0 * 4
if char_count < lower:
return RuleResult("length_check", False, f"Content too short ({char_count} chars; min ~{int(lower)})")
if char_count > upper:
return RuleResult("length_check", False, f"Content too long ({char_count} chars; max ~{int(upper)})")
return RuleResult("length_check", True)
def check_language_detection(
content: str,
expected_lang: str,
threshold: float = 0.85,
) -> RuleResult:
"""Detect language and fail if confidence > threshold for wrong language."""
try:
from lingua import Language as LinguaLang, LanguageDetectorBuilder # type: ignore
detector = (
LanguageDetectorBuilder.from_all_languages()
.with_low_accuracy_mode()
.build()
)
result = detector.detect_language_of(content[:1000])
if result is None:
return RuleResult("language_detection", True, "Language could not be determined; skipping")
detected = result.name.lower()
lang_map = {"en": "english", "yo": "yoruba", "ha": "hausa", "ig": "igbo", "pcm": "english"}
expected_name = lang_map.get(expected_lang, expected_lang.lower())
if expected_name not in detected and detected not in expected_name:
conf_result = detector.compute_language_confidence(content[:1000], result)
if conf_result > threshold:
return RuleResult(
"language_detection", False,
f"Language mismatch: expected '{expected_lang}', detected '{detected}' (conf {conf_result:.2f})"
)
except ImportError:
# Fall back to langdetect
try:
from langdetect import detect # type: ignore
detected = detect(content[:1000])
lang_map = {"en": "en", "yo": "yo", "ha": "ha", "ig": "ig", "pcm": "en"}
expected_code = lang_map.get(expected_lang, expected_lang.split("-")[0])
if detected != expected_code and not expected_lang.startswith("en-"):
return RuleResult(
"language_detection", False,
f"Language mismatch: expected '{expected_lang}', detected '{detected}'"
)
except Exception as exc:
logger.warning(f"Language detection skipped: {exc}")
return RuleResult("language_detection", True)
def check_bloom_verbs(
content: str,
bloom_level: str,
) -> RuleResult:
"""At least one Bloom-level verb must appear in the content."""
verbs = BLOOM_VERBS_BY_LEVEL.get(bloom_level.upper(), [])
if not verbs:
return RuleResult("bloom_verb_presence", True, "No verb list for bloom level; skipping")
lower = content.lower()
found = [v for v in verbs if v in lower]
if not found:
return RuleResult(
"bloom_verb_presence", False,
f"No {bloom_level} Bloom verbs found. Expected one of: {', '.join(verbs[:5])}"
)
return RuleResult("bloom_verb_presence", True, f"Bloom verbs found: {', '.join(found[:3])}")
def check_cultural_flags(
content: str,
use_local_names: bool = True,
western_ratio_threshold: float = 0.6,
) -> RuleResult:
"""Flag if Western name ratio > threshold when local names expected."""
if not use_local_names:
return RuleResult("cultural_flag_check", True, "Local names not required")
words = re.findall(r"\b[A-Z][a-z]+\b", content)
names = [w for w in words if w.lower() in WESTERN_NAMES or w.lower() in LOCAL_NAMES]
if not names:
return RuleResult("cultural_flag_check", True, "No recognisable names found; skipping")
western_count = sum(1 for n in names if n.lower() in WESTERN_NAMES)
ratio = western_count / len(names)
if ratio > western_ratio_threshold:
return RuleResult(
"cultural_flag_check", False,
f"Western name ratio {ratio:.0%} exceeds {western_ratio_threshold:.0%} threshold. "
f"Found Western names: {list(set(n for n in names if n.lower() in WESTERN_NAMES))[:5]}"
)
return RuleResult("cultural_flag_check", True, f"Name diversity OK (Western ratio: {ratio:.0%})")
def check_format_compliance(
content: str,
content_type: str,
num_questions: Optional[int] = None,
) -> RuleResult:
"""Check structural compliance based on content type."""
ct = content_type.upper()
if ct in {"QUIZ", "EXAM_QUESTIONS", "QUESTION_BANK", "DIAGNOSTIC_TEST"}:
# Should have question marks or numbered questions
question_marks = content.count("?")
numbered_q = len(re.findall(r"^\s*Q?\d+[\.\)]\s", content, re.MULTILINE))
total_q = question_marks + numbered_q
if num_questions and total_q < max(1, num_questions // 2):
return RuleResult(
"format_compliance", False,
f"Assessment has only {total_q} questions; expected ~{num_questions}"
)
elif ct == "LESSON_PLAN":
# Each tuple: (canonical name, accepted synonyms)
required_sections = [
("objective", ["objective", "learning outcome", "aim", "goal", "target"]),
("activity", ["activity", "procedure", "exercise", "task", "instruction", "method", "practice"]),
("assessment", ["assessment", "evaluation", "evaluate", "test", "quiz", "check", "review"]),
]
lower = content.lower()
missing = [name for name, synonyms in required_sections if not any(s in lower for s in synonyms)]
if missing:
return RuleResult(
"format_compliance", False,
f"Lesson plan missing sections: {', '.join(missing)}"
)
elif ct in {"SCHEME_OF_WORK", "TERM_PLAN", "SCOPE_AND_SEQUENCE"}:
# Should reference weeks or terms
if not re.search(r"week\s*\d+|term\s*\d+|w\d+\s*[:\-]", content, re.IGNORECASE):
return RuleResult(
"format_compliance", False,
"Curriculum plan lacks week/term structure"
)
return RuleResult("format_compliance", True)
def check_no_hallucinated_dates(content: str) -> RuleResult:
"""Flag years outside [1800–2030] as potential hallucinations."""
bad_years = YEAR_PATTERN.findall(content)
if bad_years:
return RuleResult(
"no_hallucinated_dates", False,
f"Suspicious years found: {bad_years[:5]}",
)
return RuleResult("no_hallucinated_dates", True)
def check_no_explicit_content(content: str) -> RuleResult:
"""Hard-fail if explicit/profane content is detected."""
lower = content.lower()
for pattern in EXPLICIT_BLOCKLIST:
if re.search(pattern, lower):
return RuleResult(
"no_explicit_content", False,
f"Explicit content detected (pattern: {pattern})",
is_hard_fail=True,
)
return RuleResult("no_explicit_content", True)
def check_curriculum_alignment(
content: str,
curriculum_board: str = "NERDC",
min_keywords: int = 3,
) -> RuleResult:
"""At least min_keywords curriculum terms must appear."""
keywords = CURRICULUM_KEYWORDS.get(curriculum_board.upper(), CURRICULUM_KEYWORDS["DEFAULT"])
lower = content.lower()
found = [kw for kw in keywords if kw in lower]
if len(found) < min_keywords:
return RuleResult(
"curriculum_alignment", False,
f"Only {len(found)}/{min_keywords} curriculum terms found: {found}"
)
return RuleResult("curriculum_alignment", True, f"Curriculum terms found: {found[:5]}")
# ── Master runner ──────────────────────────────────────────────────────────────
def run_all_rules(
content: str,
*,
max_tokens: int = 1024,
expected_language: str = "en",
bloom_level: str = "UNDERSTAND",
use_local_names: bool = True,
content_type: str = "LESSON_PLAN",
curriculum_board: str = "NERDC",
num_questions: Optional[int] = None,
) -> ValidationRulesReport:
"""Run all 8 validation rules and return a consolidated report."""
report = ValidationRulesReport()
checks = [
check_length(content, max_tokens),
check_language_detection(content, expected_language),
check_bloom_verbs(content, bloom_level),
check_cultural_flags(content, use_local_names),
check_format_compliance(content, content_type, num_questions),
check_no_hallucinated_dates(content),
check_no_explicit_content(content),
check_curriculum_alignment(content, curriculum_board),
]
for result in checks:
if result.passed:
report.passed.append(result.rule_name)
else:
if result.is_hard_fail:
report.hard_failed.append(result.rule_name)
else:
report.failed.append(result.rule_name)
if result.message:
report.notes.append(f"{result.rule_name}: {result.message}")
logger.info(
f"Rules: {len(report.passed)} passed, {len(report.failed)} failed, "
f"{len(report.hard_failed)} hard-failed"
)
return report